osfmk/vm/vm_fault.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*
  24  * @OSF_COPYRIGHT@
  25  */
  26 /*
  27  * Mach Operating System
  28  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  29  * All Rights Reserved.
  30  *
  31  * Permission to use, copy, modify and distribute this software and its
  32  * documentation is hereby granted, provided that both the copyright
  33  * notice and this permission notice appear in all copies of the
  34  * software, derivative works or modified versions, and any portions
  35  * thereof, and that both notices appear in supporting documentation.
  36  *
  37  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  38  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  39  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  40  *
  41  * Carnegie Mellon requests users of this software to return to
  42  *
  43  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  44  *  School of Computer Science
  45  *  Carnegie Mellon University
  46  *  Pittsburgh PA 15213-3890
  47  *
  48  * any improvements or extensions that they make and grant Carnegie Mellon
  49  * the rights to redistribute these changes.
  50  */
  51 /*
  52  */
  53 /*
  54  *      File:   vm_fault.c
  55  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  56  *
  57  *      Page fault handling module.
  58  */
  59
  60 #include <mach_cluster_stats.h>
  61 #include <mach_pagemap.h>
  62 #include <mach_kdb.h>
  63
  64 #include <mach/mach_types.h>
  65 #include <mach/kern_return.h>
  66 #include <mach/message.h>       /* for error codes */
  67 #include <mach/vm_param.h>
  68 #include <mach/vm_behavior.h>
  69 #include <mach/memory_object.h>
  70                                 /* For memory_object_data_{request,unlock} */
  71
  72 #include <kern/kern_types.h>
  73 #include <kern/host_statistics.h>
  74 #include <kern/counters.h>
  75 #include <kern/task.h>
  76 #include <kern/thread.h>
  77 #include <kern/sched_prim.h>
  78 #include <kern/host.h>
  79 #include <kern/xpr.h>
  80 #include <kern/mach_param.h>
  81 #include <kern/macro_help.h>
  82 #include <kern/zalloc.h>
  83 #include <kern/misc_protos.h>
  84
  85 #include <ppc/proc_reg.h>
  86
  87 #include <vm/vm_fault.h>
  88 #include <vm/task_working_set.h>
  89 #include <vm/vm_map.h>
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_page.h>
  92 #include <vm/vm_kern.h>
  93 #include <vm/pmap.h>
  94 #include <vm/vm_pageout.h>
  95 #include <vm/vm_protos.h>
  96
  97 #include <sys/kdebug.h>
  98
  99 #define VM_FAULT_CLASSIFY       0
 100 #define VM_FAULT_STATIC_CONFIG  1
 101
 102 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
 103
 104 unsigned int    vm_object_absent_max = 50;
 105
 106 int             vm_fault_debug = 0;
 107
 108 #if     !VM_FAULT_STATIC_CONFIG
 109 boolean_t       vm_fault_dirty_handling = FALSE;
 110 boolean_t       vm_fault_interruptible = FALSE;
 111 boolean_t       software_reference_bits = TRUE;
 112 #endif
 113
 114 #if     MACH_KDB
 115 extern struct db_watchpoint *db_watchpoint_list;
 116 #endif  /* MACH_KDB */
 117
 118
 119 /* Forward declarations of internal routines. */
 120 extern kern_return_t vm_fault_wire_fast(
 121                                 vm_map_t        map,
 122                                 vm_map_offset_t va,
 123                                 vm_map_entry_t  entry,
 124                                 pmap_t          pmap,
 125                                 vm_map_offset_t pmap_addr);
 126
 127 extern void vm_fault_continue(void);
 128
 129 extern void vm_fault_copy_cleanup(
 130                                 vm_page_t       page,
 131                                 vm_page_t       top_page);
 132
 133 extern void vm_fault_copy_dst_cleanup(
 134                                 vm_page_t       page);
 135
 136 #if     VM_FAULT_CLASSIFY
 137 extern void vm_fault_classify(vm_object_t       object,
 138                           vm_object_offset_t    offset,
 139                           vm_prot_t             fault_type);
 140
 141 extern void vm_fault_classify_init(void);
 142 #endif
 143
 144 /*
 145  *      Routine:        vm_fault_init
 146  *      Purpose:
 147  *              Initialize our private data structures.
 148  */
 149 void
 150 vm_fault_init(void)
 151 {
 152 }
 153
 154 /*
 155  *      Routine:        vm_fault_cleanup
 156  *      Purpose:
 157  *              Clean up the result of vm_fault_page.
 158  *      Results:
 159  *              The paging reference for "object" is released.
 160  *              "object" is unlocked.
 161  *              If "top_page" is not null,  "top_page" is
 162  *              freed and the paging reference for the object
 163  *              containing it is released.
 164  *
 165  *      In/out conditions:
 166  *              "object" must be locked.
 167  */
 168 void
 169 vm_fault_cleanup(
 170         register vm_object_t    object,
 171         register vm_page_t      top_page)
 172 {
 173         vm_object_paging_end(object);
 174         vm_object_unlock(object);
 175
 176         if (top_page != VM_PAGE_NULL) {
 177             object = top_page->object;
 178             vm_object_lock(object);
 179             VM_PAGE_FREE(top_page);
 180             vm_object_paging_end(object);
 181             vm_object_unlock(object);
 182         }
 183 }
 184
 185 #if     MACH_CLUSTER_STATS
 186 #define MAXCLUSTERPAGES 16
 187 struct {
 188         unsigned long pages_in_cluster;
 189         unsigned long pages_at_higher_offsets;
 190         unsigned long pages_at_lower_offsets;
 191 } cluster_stats_in[MAXCLUSTERPAGES];
 192 #define CLUSTER_STAT(clause)    clause
 193 #define CLUSTER_STAT_HIGHER(x)  \
 194         ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
 195 #define CLUSTER_STAT_LOWER(x)   \
 196          ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
 197 #define CLUSTER_STAT_CLUSTER(x) \
 198         ((cluster_stats_in[(x)].pages_in_cluster)++)
 199 #else   /* MACH_CLUSTER_STATS */
 200 #define CLUSTER_STAT(clause)
 201 #endif  /* MACH_CLUSTER_STATS */
 202
 203 /* XXX - temporary */
 204 boolean_t vm_allow_clustered_pagein = FALSE;
 205 int vm_pagein_cluster_used = 0;
 206
 207 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
 208
 209
 210 boolean_t       vm_page_deactivate_behind = TRUE;
 211 /*
 212  * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
 213  */
 214 int vm_default_ahead = 0;
 215 int vm_default_behind = MAX_UPL_TRANSFER;
 216
 217 /*
 218  *      vm_page_deactivate_behind
 219  *
 220  *      Determine if sequential access is in progress
 221  *      in accordance with the behavior specified.  If
 222  *      so, compute a potential page to deactive and
 223  *      deactivate it.
 224  *
 225  *      The object must be locked.
 226  */
 227 static
 228 boolean_t
 229 vm_fault_deactivate_behind(
 230         vm_object_t             object,
 231         vm_object_offset_t      offset,
 232         vm_behavior_t           behavior)
 233 {
 234         vm_page_t m;
 235
 236 #if TRACEFAULTPAGE
 237         dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
 238 #endif
 239
 240         if (object == kernel_object) {
 241                 /*
 242                  * Do not deactivate pages from the kernel object: they
 243                  * are not intended to become pageable.
 244                  */
 245                 return FALSE;
 246         }
 247
 248         switch (behavior) {
 249         case VM_BEHAVIOR_RANDOM:
 250                 object->sequential = PAGE_SIZE_64;
 251                 m = VM_PAGE_NULL;
 252                 break;
 253         case VM_BEHAVIOR_SEQUENTIAL:
 254                 if (offset &&
 255                         object->last_alloc == offset - PAGE_SIZE_64) {
 256                         object->sequential += PAGE_SIZE_64;
 257                         m = vm_page_lookup(object, offset - PAGE_SIZE_64);
 258                 } else {
 259                         object->sequential = PAGE_SIZE_64; /* reset */
 260                         m = VM_PAGE_NULL;
 261                 }
 262                 break;
 263         case VM_BEHAVIOR_RSEQNTL:
 264                 if (object->last_alloc &&
 265                         object->last_alloc == offset + PAGE_SIZE_64) {
 266                         object->sequential += PAGE_SIZE_64;
 267                         m = vm_page_lookup(object, offset + PAGE_SIZE_64);
 268                 } else {
 269                         object->sequential = PAGE_SIZE_64; /* reset */
 270                         m = VM_PAGE_NULL;
 271                 }
 272                 break;
 273         case VM_BEHAVIOR_DEFAULT:
 274         default:
 275                 if (offset &&
 276                         object->last_alloc == offset - PAGE_SIZE_64) {
 277                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 278
 279                         object->sequential += PAGE_SIZE_64;
 280                         m = (offset >= behind &&
 281                                 object->sequential >= behind) ?
 282                                 vm_page_lookup(object, offset - behind) :
 283                                 VM_PAGE_NULL;
 284                 } else if (object->last_alloc &&
 285                         object->last_alloc == offset + PAGE_SIZE_64) {
 286                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 287
 288                         object->sequential += PAGE_SIZE_64;
 289                         m = (offset < -behind &&
 290                                 object->sequential >= behind) ?
 291                                 vm_page_lookup(object, offset + behind) :
 292                                 VM_PAGE_NULL;
 293                 } else {
 294                         object->sequential = PAGE_SIZE_64;
 295                         m = VM_PAGE_NULL;
 296                 }
 297                 break;
 298         }
 299
 300         object->last_alloc = offset;
 301
 302         if (m) {
 303                 if (!m->busy) {
 304                         vm_page_lock_queues();
 305                         vm_page_deactivate(m);
 306                         vm_page_unlock_queues();
 307 #if TRACEFAULTPAGE
 308                         dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
 309 #endif
 310                 }
 311                 return TRUE;
 312         }
 313         return FALSE;
 314 }
 315
 316
 317 /*
 318  *      Routine:        vm_fault_page
 319  *      Purpose:
 320  *              Find the resident page for the virtual memory
 321  *              specified by the given virtual memory object
 322  *              and offset.
 323  *      Additional arguments:
 324  *              The required permissions for the page is given
 325  *              in "fault_type".  Desired permissions are included
 326  *              in "protection".  The minimum and maximum valid offsets
 327  *              within the object for the relevant map entry are
 328  *              passed in "lo_offset" and "hi_offset" respectively and
 329  *              the expected page reference pattern is passed in "behavior".
 330  *              These three parameters are used to determine pagein cluster
 331  *              limits.
 332  *
 333  *              If the desired page is known to be resident (for
 334  *              example, because it was previously wired down), asserting
 335  *              the "unwiring" parameter will speed the search.
 336  *
 337  *              If the operation can be interrupted (by thread_abort
 338  *              or thread_terminate), then the "interruptible"
 339  *              parameter should be asserted.
 340  *
 341  *      Results:
 342  *              The page containing the proper data is returned
 343  *              in "result_page".
 344  *
 345  *      In/out conditions:
 346  *              The source object must be locked and referenced,
 347  *              and must donate one paging reference.  The reference
 348  *              is not affected.  The paging reference and lock are
 349  *              consumed.
 350  *
 351  *              If the call succeeds, the object in which "result_page"
 352  *              resides is left locked and holding a paging reference.
 353  *              If this is not the original object, a busy page in the
 354  *              original object is returned in "top_page", to prevent other
 355  *              callers from pursuing this same data, along with a paging
 356  *              reference for the original object.  The "top_page" should
 357  *              be destroyed when this guarantee is no longer required.
 358  *              The "result_page" is also left busy.  It is not removed
 359  *              from the pageout queues.
 360  */
 361
 362 vm_fault_return_t
 363 vm_fault_page(
 364         /* Arguments: */
 365         vm_object_t     first_object,   /* Object to begin search */
 366         vm_object_offset_t first_offset,        /* Offset into object */
 367         vm_prot_t       fault_type,     /* What access is requested */
 368         boolean_t       must_be_resident,/* Must page be resident? */
 369         int             interruptible,  /* how may fault be interrupted? */
 370         vm_map_offset_t lo_offset,      /* Map entry start */
 371         vm_map_offset_t hi_offset,      /* Map entry end */
 372         vm_behavior_t   behavior,       /* Page reference behavior */
 373         /* Modifies in place: */
 374         vm_prot_t       *protection,    /* Protection for mapping */
 375         /* Returns: */
 376         vm_page_t       *result_page,   /* Page found, if successful */
 377         vm_page_t       *top_page,      /* Page in top object, if
 378                                          * not result_page.  */
 379         int             *type_of_fault, /* if non-null, fill in with type of fault
 380                                          * COW, zero-fill, etc... returned in trace point */
 381         /* More arguments: */
 382         kern_return_t   *error_code,    /* code if page is in error */
 383         boolean_t       no_zero_fill,   /* don't zero fill absent pages */
 384         boolean_t       data_supply,    /* treat as data_supply if
 385                                          * it is a write fault and a full
 386                                          * page is provided */
 387         vm_map_t        map,
 388         __unused vm_map_offset_t        vaddr)
 389 {
 390         register
 391         vm_page_t               m;
 392         register
 393         vm_object_t             object;
 394         register
 395         vm_object_offset_t      offset;
 396         vm_page_t               first_m;
 397         vm_object_t             next_object;
 398         vm_object_t             copy_object;
 399         boolean_t               look_for_page;
 400         vm_prot_t               access_required = fault_type;
 401         vm_prot_t               wants_copy_flag;
 402         vm_object_size_t        length;
 403         vm_object_offset_t      cluster_start, cluster_end;
 404         CLUSTER_STAT(int pages_at_higher_offsets;)
 405         CLUSTER_STAT(int pages_at_lower_offsets;)
 406         kern_return_t   wait_result;
 407         boolean_t               interruptible_state;
 408         boolean_t               bumped_pagein = FALSE;
 409
 410
 411 #if     MACH_PAGEMAP
 412 /*
 413  * MACH page map - an optional optimization where a bit map is maintained
 414  * by the VM subsystem for internal objects to indicate which pages of
 415  * the object currently reside on backing store.  This existence map
 416  * duplicates information maintained by the vnode pager.  It is
 417  * created at the time of the first pageout against the object, i.e.
 418  * at the same time pager for the object is created.  The optimization
 419  * is designed to eliminate pager interaction overhead, if it is
 420  * 'known' that the page does not exist on backing store.
 421  *
 422  * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
 423  * either marked as paged out in the existence map for the object or no
 424  * existence map exists for the object.  LOOK_FOR() is one of the
 425  * criteria in the decision to invoke the pager.   It is also used as one
 426  * of the criteria to terminate the scan for adjacent pages in a clustered
 427  * pagein operation.  Note that LOOK_FOR() always evaluates to TRUE for
 428  * permanent objects.  Note also that if the pager for an internal object
 429  * has not been created, the pager is not invoked regardless of the value
 430  * of LOOK_FOR() and that clustered pagein scans are only done on an object
 431  * for which a pager has been created.
 432  *
 433  * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
 434  * is marked as paged out in the existence map for the object.  PAGED_OUT()
 435  * PAGED_OUT() is used to determine if a page has already been pushed
 436  * into a copy object in order to avoid a redundant page out operation.
 437  */
 438 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 439                         != VM_EXTERNAL_STATE_ABSENT)
 440 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 441                         == VM_EXTERNAL_STATE_EXISTS)
 442 #else /* MACH_PAGEMAP */
 443 /*
 444  * If the MACH page map optimization is not enabled,
 445  * LOOK_FOR() always evaluates to TRUE.  The pager will always be
 446  * invoked to resolve missing pages in an object, assuming the pager
 447  * has been created for the object.  In a clustered page operation, the
 448  * absence of a page on backing backing store cannot be used to terminate
 449  * a scan for adjacent pages since that information is available only in
 450  * the pager.  Hence pages that may not be paged out are potentially
 451  * included in a clustered request.  The vnode pager is coded to deal
 452  * with any combination of absent/present pages in a clustered
 453  * pagein request.  PAGED_OUT() always evaluates to FALSE, i.e. the pager
 454  * will always be invoked to push a dirty page into a copy object assuming
 455  * a pager has been created.  If the page has already been pushed, the
 456  * pager will ingore the new request.
 457  */
 458 #define LOOK_FOR(o, f) TRUE
 459 #define PAGED_OUT(o, f) FALSE
 460 #endif /* MACH_PAGEMAP */
 461
 462 /*
 463  *      Recovery actions
 464  */
 465 #define PREPARE_RELEASE_PAGE(m)                         \
 466         MACRO_BEGIN                                     \
 467         vm_page_lock_queues();                          \
 468         MACRO_END
 469
 470 #define DO_RELEASE_PAGE(m)                              \
 471         MACRO_BEGIN                                     \
 472         PAGE_WAKEUP_DONE(m);                            \
 473         if (!m->active && !m->inactive)                 \
 474                 vm_page_activate(m);                    \
 475         vm_page_unlock_queues();                        \
 476         MACRO_END
 477
 478 #define RELEASE_PAGE(m)                                 \
 479         MACRO_BEGIN                                     \
 480         PREPARE_RELEASE_PAGE(m);                        \
 481         DO_RELEASE_PAGE(m);                             \
 482         MACRO_END
 483
 484 #if TRACEFAULTPAGE
 485         dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
 486 #endif
 487
 488
 489
 490 #if     !VM_FAULT_STATIC_CONFIG
 491         if (vm_fault_dirty_handling
 492 #if     MACH_KDB
 493                 /*
 494                  *      If there are watchpoints set, then
 495                  *      we don't want to give away write permission
 496                  *      on a read fault.  Make the task write fault,
 497                  *      so that the watchpoint code notices the access.
 498                  */
 499             || db_watchpoint_list
 500 #endif  /* MACH_KDB */
 501             ) {
 502                 /*
 503                  *      If we aren't asking for write permission,
 504                  *      then don't give it away.  We're using write
 505                  *      faults to set the dirty bit.
 506                  */
 507                 if (!(fault_type & VM_PROT_WRITE))
 508                         *protection &= ~VM_PROT_WRITE;
 509         }
 510
 511         if (!vm_fault_interruptible)
 512                 interruptible = THREAD_UNINT;
 513 #else   /* STATIC_CONFIG */
 514 #if     MACH_KDB
 515                 /*
 516                  *      If there are watchpoints set, then
 517                  *      we don't want to give away write permission
 518                  *      on a read fault.  Make the task write fault,
 519                  *      so that the watchpoint code notices the access.
 520                  */
 521             if (db_watchpoint_list) {
 522                 /*
 523                  *      If we aren't asking for write permission,
 524                  *      then don't give it away.  We're using write
 525                  *      faults to set the dirty bit.
 526                  */
 527                 if (!(fault_type & VM_PROT_WRITE))
 528                         *protection &= ~VM_PROT_WRITE;
 529         }
 530
 531 #endif  /* MACH_KDB */
 532 #endif  /* STATIC_CONFIG */
 533
 534         interruptible_state = thread_interrupt_level(interruptible);
 535
 536         /*
 537          *      INVARIANTS (through entire routine):
 538          *
 539          *      1)      At all times, we must either have the object
 540          *              lock or a busy page in some object to prevent
 541          *              some other thread from trying to bring in
 542          *              the same page.
 543          *
 544          *              Note that we cannot hold any locks during the
 545          *              pager access or when waiting for memory, so
 546          *              we use a busy page then.
 547          *
 548          *              Note also that we aren't as concerned about more than
 549          *              one thread attempting to memory_object_data_unlock
 550          *              the same page at once, so we don't hold the page
 551          *              as busy then, but do record the highest unlock
 552          *              value so far.  [Unlock requests may also be delivered
 553          *              out of order.]
 554          *
 555          *      2)      To prevent another thread from racing us down the
 556          *              shadow chain and entering a new page in the top
 557          *              object before we do, we must keep a busy page in
 558          *              the top object while following the shadow chain.
 559          *
 560          *      3)      We must increment paging_in_progress on any object
 561          *              for which we have a busy page
 562          *
 563          *      4)      We leave busy pages on the pageout queues.
 564          *              If the pageout daemon comes across a busy page,
 565          *              it will remove the page from the pageout queues.
 566          */
 567
 568         /*
 569          *      Search for the page at object/offset.
 570          */
 571
 572         object = first_object;
 573         offset = first_offset;
 574         first_m = VM_PAGE_NULL;
 575         access_required = fault_type;
 576
 577         XPR(XPR_VM_FAULT,
 578                 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
 579                 (integer_t)object, offset, fault_type, *protection, 0);
 580
 581         /*
 582          *      See whether this page is resident
 583          */
 584
 585         while (TRUE) {
 586 #if TRACEFAULTPAGE
 587                 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
 588 #endif
 589                 if (!object->alive) {
 590                         vm_fault_cleanup(object, first_m);
 591                         thread_interrupt_level(interruptible_state);
 592                         return(VM_FAULT_MEMORY_ERROR);
 593                 }
 594                 m = vm_page_lookup(object, offset);
 595 #if TRACEFAULTPAGE
 596                 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
 597 #endif
 598                 if (m != VM_PAGE_NULL) {
 599                         /*
 600                          *      If the page was pre-paged as part of a
 601                          *      cluster, record the fact.
 602                          *      If we were passed a valid pointer for
 603                          *      "type_of_fault", than we came from
 604                          *      vm_fault... we'll let it deal with
 605                          *      this condition, since it
 606                          *      needs to see m->clustered to correctly
 607                          *      account the pageins.
 608                          */
 609                         if (type_of_fault == NULL && m->clustered) {
 610                                 vm_pagein_cluster_used++;
 611                                 m->clustered = FALSE;
 612                         }
 613
 614                         /*
 615                          *      If the page is being brought in,
 616                          *      wait for it and then retry.
 617                          *
 618                          *      A possible optimization: if the page
 619                          *      is known to be resident, we can ignore
 620                          *      pages that are absent (regardless of
 621                          *      whether they're busy).
 622                          */
 623
 624                         if (m->busy) {
 625 #if TRACEFAULTPAGE
 626                                 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 627 #endif
 628                                 wait_result = PAGE_SLEEP(object, m, interruptible);
 629                                 XPR(XPR_VM_FAULT,
 630                                     "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
 631                                         (integer_t)object, offset,
 632                                         (integer_t)m, 0, 0);
 633                                 counter(c_vm_fault_page_block_busy_kernel++);
 634
 635                                 if (wait_result != THREAD_AWAKENED) {
 636                                         vm_fault_cleanup(object, first_m);
 637                                         thread_interrupt_level(interruptible_state);
 638                                         if (wait_result == THREAD_RESTART)
 639                                           {
 640                                                 return(VM_FAULT_RETRY);
 641                                           }
 642                                         else
 643                                           {
 644                                                 return(VM_FAULT_INTERRUPTED);
 645                                           }
 646                                 }
 647                                 continue;
 648                         }
 649
 650                         if (m->encrypted) {
 651                                 /*
 652                                  * ENCRYPTED SWAP:
 653                                  * the user needs access to a page that we
 654                                  * encrypted before paging it out.
 655                                  * Decrypt the page now.
 656                                  * Keep it busy to prevent anyone from
 657                                  * accessing it during the decryption.
 658                                  */
 659                                 m->busy = TRUE;
 660                                 vm_page_decrypt(m, 0);
 661                                 assert(object == m->object);
 662                                 assert(m->busy);
 663                                 PAGE_WAKEUP_DONE(m);
 664
 665                                 /*
 666                                  * Retry from the top, in case
 667                                  * something changed while we were
 668                                  * decrypting.
 669                                  */
 670                                 continue;
 671                         }
 672                         ASSERT_PAGE_DECRYPTED(m);
 673
 674                         /*
 675                          *      If the page is in error, give up now.
 676                          */
 677
 678                         if (m->error) {
 679 #if TRACEFAULTPAGE
 680                                 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code);      /* (TEST/DEBUG) */
 681 #endif
 682                                 if (error_code)
 683                                         *error_code = m->page_error;
 684                                 VM_PAGE_FREE(m);
 685                                 vm_fault_cleanup(object, first_m);
 686                                 thread_interrupt_level(interruptible_state);
 687                                 return(VM_FAULT_MEMORY_ERROR);
 688                         }
 689
 690                         /*
 691                          *      If the pager wants us to restart
 692                          *      at the top of the chain,
 693                          *      typically because it has moved the
 694                          *      page to another pager, then do so.
 695                          */
 696
 697                         if (m->restart) {
 698 #if TRACEFAULTPAGE
 699                                 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 700 #endif
 701                                 VM_PAGE_FREE(m);
 702                                 vm_fault_cleanup(object, first_m);
 703                                 thread_interrupt_level(interruptible_state);
 704                                 return(VM_FAULT_RETRY);
 705                         }
 706
 707                         /*
 708                          *      If the page isn't busy, but is absent,
 709                          *      then it was deemed "unavailable".
 710                          */
 711
 712                         if (m->absent) {
 713                                 /*
 714                                  * Remove the non-existent page (unless it's
 715                                  * in the top object) and move on down to the
 716                                  * next object (if there is one).
 717                                  */
 718 #if TRACEFAULTPAGE
 719                                 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow);  /* (TEST/DEBUG) */
 720 #endif
 721
 722                                 next_object = object->shadow;
 723                                 if (next_object == VM_OBJECT_NULL) {
 724                                         vm_page_t real_m;
 725
 726                                         assert(!must_be_resident);
 727
 728                                         if (object->shadow_severed) {
 729                                                 vm_fault_cleanup(
 730                                                         object, first_m);
 731                                                 thread_interrupt_level(interruptible_state);
 732                                                 return VM_FAULT_MEMORY_ERROR;
 733                                         }
 734
 735                                         /*
 736                                          * Absent page at bottom of shadow
 737                                          * chain; zero fill the page we left
 738                                          * busy in the first object, and flush
 739                                          * the absent page.  But first we
 740                                          * need to allocate a real page.
 741                                          */
 742                                         if (VM_PAGE_THROTTLED() ||
 743                                             (real_m = vm_page_grab())
 744                                                         == VM_PAGE_NULL) {
 745                                                 vm_fault_cleanup(
 746                                                         object, first_m);
 747                                                 thread_interrupt_level(
 748                                                         interruptible_state);
 749                                                 return(
 750                                                    VM_FAULT_MEMORY_SHORTAGE);
 751                                         }
 752
 753                                         /*
 754                                          * are we protecting the system from
 755                                          * backing store exhaustion.  If so
 756                                          * sleep unless we are privileged.
 757                                          */
 758
 759                                         if(vm_backing_store_low) {
 760                                            if(!(current_task()->priv_flags
 761                                                 & VM_BACKING_STORE_PRIV)) {
 762                                                 assert_wait((event_t)
 763                                                         &vm_backing_store_low,
 764                                                         THREAD_UNINT);
 765                                                 vm_fault_cleanup(object,
 766                                                                     first_m);
 767                                                 thread_block(THREAD_CONTINUE_NULL);
 768                                                 thread_interrupt_level(
 769                                                         interruptible_state);
 770                                                 return(VM_FAULT_RETRY);
 771                                            }
 772                                         }
 773
 774
 775                                         XPR(XPR_VM_FAULT,
 776               "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
 777                                                 (integer_t)object, offset,
 778                                                 (integer_t)m,
 779                                                 (integer_t)first_object, 0);
 780                                         if (object != first_object) {
 781                                                 VM_PAGE_FREE(m);
 782                                                 vm_object_paging_end(object);
 783                                                 vm_object_unlock(object);
 784                                                 object = first_object;
 785                                                 offset = first_offset;
 786                                                 m = first_m;
 787                                                 first_m = VM_PAGE_NULL;
 788                                                 vm_object_lock(object);
 789                                         }
 790
 791                                         VM_PAGE_FREE(m);
 792                                         assert(real_m->busy);
 793                                         vm_page_insert(real_m, object, offset);
 794                                         m = real_m;
 795
 796                                         /*
 797                                          *  Drop the lock while zero filling
 798                                          *  page.  Then break because this
 799                                          *  is the page we wanted.  Checking
 800                                          *  the page lock is a waste of time;
 801                                          *  this page was either absent or
 802                                          *  newly allocated -- in both cases
 803                                          *  it can't be page locked by a pager.
 804                                          */
 805                                         m->no_isync = FALSE;
 806
 807                                         if (!no_zero_fill) {
 808                                                 vm_object_unlock(object);
 809                                                 vm_page_zero_fill(m);
 810                                                 vm_object_lock(object);
 811
 812                                                 if (type_of_fault)
 813                                                         *type_of_fault = DBG_ZERO_FILL_FAULT;
 814                                                 VM_STAT(zero_fill_count++);
 815                                         }
 816                                         if (bumped_pagein == TRUE) {
 817                                                 VM_STAT(pageins--);
 818                                                 current_task()->pageins--;
 819                                         }
 820                                         vm_page_lock_queues();
 821                                         VM_PAGE_QUEUES_REMOVE(m);
 822                                         m->page_ticket = vm_page_ticket;
 823                                         assert(!m->laundry);
 824                                         assert(m->object != kernel_object);
 825                                         assert(m->pageq.next == NULL &&
 826                                                m->pageq.prev == NULL);
 827                                         if(m->object->size > 0x200000) {
 828                                                 m->zero_fill = TRUE;
 829                                                 /* depends on the queues lock */
 830                                                 vm_zf_count += 1;
 831                                                 queue_enter(&vm_page_queue_zf,
 832                                                         m, vm_page_t, pageq);
 833                                         } else {
 834                                                 queue_enter(
 835                                                         &vm_page_queue_inactive,
 836                                                         m, vm_page_t, pageq);
 837                                         }
 838                                         vm_page_ticket_roll++;
 839                                         if(vm_page_ticket_roll ==
 840                                                 VM_PAGE_TICKETS_IN_ROLL) {
 841                                                 vm_page_ticket_roll = 0;
 842                                                 if(vm_page_ticket ==
 843                                                      VM_PAGE_TICKET_ROLL_IDS)
 844                                                         vm_page_ticket= 0;
 845                                                 else
 846                                                         vm_page_ticket++;
 847                                         }
 848                                         m->inactive = TRUE;
 849                                         vm_page_inactive_count++;
 850                                         vm_page_unlock_queues();
 851                                         break;
 852                                 } else {
 853                                         if (must_be_resident) {
 854                                                 vm_object_paging_end(object);
 855                                         } else if (object != first_object) {
 856                                                 vm_object_paging_end(object);
 857                                                 VM_PAGE_FREE(m);
 858                                         } else {
 859                                                 first_m = m;
 860                                                 m->absent = FALSE;
 861                                                 m->unusual = FALSE;
 862                                                 vm_object_absent_release(object);
 863                                                 m->busy = TRUE;
 864
 865                                                 vm_page_lock_queues();
 866                                                 VM_PAGE_QUEUES_REMOVE(m);
 867                                                 vm_page_unlock_queues();
 868                                         }
 869                                         XPR(XPR_VM_FAULT,
 870                                             "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
 871                                                 (integer_t)object, offset,
 872                                                 (integer_t)next_object,
 873                                                 offset+object->shadow_offset,0);
 874                                         offset += object->shadow_offset;
 875                                         hi_offset += object->shadow_offset;
 876                                         lo_offset += object->shadow_offset;
 877                                         access_required = VM_PROT_READ;
 878                                         vm_object_lock(next_object);
 879                                         vm_object_unlock(object);
 880                                         object = next_object;
 881                                         vm_object_paging_begin(object);
 882                                         continue;
 883                                 }
 884                         }
 885
 886                         if ((m->cleaning)
 887                                 && ((object != first_object) ||
 888                                     (object->copy != VM_OBJECT_NULL))
 889                                 && (fault_type & VM_PROT_WRITE)) {
 890                                 /*
 891                                  * This is a copy-on-write fault that will
 892                                  * cause us to revoke access to this page, but
 893                                  * this page is in the process of being cleaned
 894                                  * in a clustered pageout. We must wait until
 895                                  * the cleaning operation completes before
 896                                  * revoking access to the original page,
 897                                  * otherwise we might attempt to remove a
 898                                  * wired mapping.
 899                                  */
 900 #if TRACEFAULTPAGE
 901                                 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset);  /* (TEST/DEBUG) */
 902 #endif
 903                                 XPR(XPR_VM_FAULT,
 904                                     "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
 905                                         (integer_t)object, offset,
 906                                         (integer_t)m, 0, 0);
 907                                 /* take an extra ref so that object won't die */
 908                                 assert(object->ref_count > 0);
 909                                 object->ref_count++;
 910                                 vm_object_res_reference(object);
 911                                 vm_fault_cleanup(object, first_m);
 912                                 counter(c_vm_fault_page_block_backoff_kernel++);
 913                                 vm_object_lock(object);
 914                                 assert(object->ref_count > 0);
 915                                 m = vm_page_lookup(object, offset);
 916                                 if (m != VM_PAGE_NULL && m->cleaning) {
 917                                         PAGE_ASSERT_WAIT(m, interruptible);
 918                                         vm_object_unlock(object);
 919                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
 920                                         vm_object_deallocate(object);
 921                                         goto backoff;
 922                                 } else {
 923                                         vm_object_unlock(object);
 924                                         vm_object_deallocate(object);
 925                                         thread_interrupt_level(interruptible_state);
 926                                         return VM_FAULT_RETRY;
 927                                 }
 928                         }
 929
 930                         /*
 931                          *      If the desired access to this page has
 932                          *      been locked out, request that it be unlocked.
 933                          */
 934
 935                         if (access_required & m->page_lock) {
 936                                 if ((access_required & m->unlock_request) != access_required) {
 937                                         vm_prot_t       new_unlock_request;
 938                                         kern_return_t   rc;
 939
 940 #if TRACEFAULTPAGE
 941                                         dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready);     /* (TEST/DEBUG) */
 942 #endif
 943                                         if (!object->pager_ready) {
 944                                         XPR(XPR_VM_FAULT,
 945                                             "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
 946                                                 access_required,
 947                                                 (integer_t)object, offset,
 948                                                 (integer_t)m, 0);
 949                                                 /* take an extra ref */
 950                                                 assert(object->ref_count > 0);
 951                                                 object->ref_count++;
 952                                                 vm_object_res_reference(object);
 953                                                 vm_fault_cleanup(object,
 954                                                                  first_m);
 955                                                 counter(c_vm_fault_page_block_backoff_kernel++);
 956                                                 vm_object_lock(object);
 957                                                 assert(object->ref_count > 0);
 958                                                 if (!object->pager_ready) {
 959                                                         wait_result = vm_object_assert_wait(
 960                                                                 object,
 961                                                                 VM_OBJECT_EVENT_PAGER_READY,
 962                                                                 interruptible);
 963                                                         vm_object_unlock(object);
 964                                                         if (wait_result == THREAD_WAITING)
 965                                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
 966                                                         vm_object_deallocate(object);
 967                                                         goto backoff;
 968                                                 } else {
 969                                                         vm_object_unlock(object);
 970                                                         vm_object_deallocate(object);
 971                                                         thread_interrupt_level(interruptible_state);
 972                                                         return VM_FAULT_RETRY;
 973                                                 }
 974                                         }
 975
 976                                         new_unlock_request = m->unlock_request =
 977                                                 (access_required | m->unlock_request);
 978                                         vm_object_unlock(object);
 979                                         XPR(XPR_VM_FAULT,
 980                                             "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
 981                                         (integer_t)object, offset,
 982                                         (integer_t)m, new_unlock_request, 0);
 983                                         if ((rc = memory_object_data_unlock(
 984                                                 object->pager,
 985                                                 offset + object->paging_offset,
 986                                                 PAGE_SIZE,
 987                                                 new_unlock_request))
 988                                              != KERN_SUCCESS) {
 989                                                 if (vm_fault_debug)
 990                                                     printf("vm_fault: memory_object_data_unlock failed\n");
 991                                                 vm_object_lock(object);
 992                                                 vm_fault_cleanup(object, first_m);
 993                                                 thread_interrupt_level(interruptible_state);
 994                                                 return((rc == MACH_SEND_INTERRUPTED) ?
 995                                                         VM_FAULT_INTERRUPTED :
 996                                                         VM_FAULT_MEMORY_ERROR);
 997                                         }
 998                                         vm_object_lock(object);
 999                                         continue;
1000                                 }
1001
1002                                 XPR(XPR_VM_FAULT,
1003         "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
1004                                         access_required, (integer_t)object,
1005                                         offset, (integer_t)m, 0);
1006                                 /* take an extra ref so object won't die */
1007                                 assert(object->ref_count > 0);
1008                                 object->ref_count++;
1009                                 vm_object_res_reference(object);
1010                                 vm_fault_cleanup(object, first_m);
1011                                 counter(c_vm_fault_page_block_backoff_kernel++);
1012                                 vm_object_lock(object);
1013                                 assert(object->ref_count > 0);
1014                                 m = vm_page_lookup(object, offset);
1015                                 if (m != VM_PAGE_NULL &&
1016                                     (access_required & m->page_lock) &&
1017                                     !((access_required & m->unlock_request) != access_required)) {
1018                                         PAGE_ASSERT_WAIT(m, interruptible);
1019                                         vm_object_unlock(object);
1020                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1021                                         vm_object_deallocate(object);
1022                                         goto backoff;
1023                                 } else {
1024                                         vm_object_unlock(object);
1025                                         vm_object_deallocate(object);
1026                                         thread_interrupt_level(interruptible_state);
1027                                         return VM_FAULT_RETRY;
1028                                 }
1029                         }
1030                         /*
1031                          *      We mark the page busy and leave it on
1032                          *      the pageout queues.  If the pageout
1033                          *      deamon comes across it, then it will
1034                          *      remove the page.
1035                          */
1036
1037 #if TRACEFAULTPAGE
1038                         dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1039 #endif
1040
1041 #if     !VM_FAULT_STATIC_CONFIG
1042                         if (!software_reference_bits) {
1043                                 vm_page_lock_queues();
1044                                 if (m->inactive)
1045                                         vm_stat.reactivations++;
1046
1047                                 VM_PAGE_QUEUES_REMOVE(m);
1048                                 vm_page_unlock_queues();
1049                         }
1050 #endif
1051                         XPR(XPR_VM_FAULT,
1052                             "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1053                                 (integer_t)object, offset, (integer_t)m, 0, 0);
1054                         assert(!m->busy);
1055                         m->busy = TRUE;
1056                         assert(!m->absent);
1057                         break;
1058                 }
1059
1060                 look_for_page =
1061                         (object->pager_created) &&
1062                           LOOK_FOR(object, offset) &&
1063                             (!data_supply);
1064
1065 #if TRACEFAULTPAGE
1066                 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);      /* (TEST/DEBUG) */
1067 #endif
1068                 if ((look_for_page || (object == first_object))
1069                                 && !must_be_resident
1070                                 && !(object->phys_contiguous))  {
1071                         /*
1072                          *      Allocate a new page for this object/offset
1073                          *      pair.
1074                          */
1075
1076                         m = vm_page_grab_fictitious();
1077 #if TRACEFAULTPAGE
1078                         dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
1079 #endif
1080                         if (m == VM_PAGE_NULL) {
1081                                 vm_fault_cleanup(object, first_m);
1082                                 thread_interrupt_level(interruptible_state);
1083                                 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1084                         }
1085                         vm_page_insert(m, object, offset);
1086                 }
1087
1088                 if ((look_for_page && !must_be_resident)) {
1089                         kern_return_t   rc;
1090
1091                         /*
1092                          *      If the memory manager is not ready, we
1093                          *      cannot make requests.
1094                          */
1095                         if (!object->pager_ready) {
1096 #if TRACEFAULTPAGE
1097                                 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
1098 #endif
1099                                 if(m != VM_PAGE_NULL)
1100                                         VM_PAGE_FREE(m);
1101                                 XPR(XPR_VM_FAULT,
1102                                 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1103                                         (integer_t)object, offset, 0, 0, 0);
1104                                 /* take an extra ref so object won't die */
1105                                 assert(object->ref_count > 0);
1106                                 object->ref_count++;
1107                                 vm_object_res_reference(object);
1108                                 vm_fault_cleanup(object, first_m);
1109                                 counter(c_vm_fault_page_block_backoff_kernel++);
1110                                 vm_object_lock(object);
1111                                 assert(object->ref_count > 0);
1112                                 if (!object->pager_ready) {
1113                                         wait_result = vm_object_assert_wait(object,
1114                                                               VM_OBJECT_EVENT_PAGER_READY,
1115                                                               interruptible);
1116                                         vm_object_unlock(object);
1117                                         if (wait_result == THREAD_WAITING)
1118                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
1119                                         vm_object_deallocate(object);
1120                                         goto backoff;
1121                                 } else {
1122                                         vm_object_unlock(object);
1123                                         vm_object_deallocate(object);
1124                                         thread_interrupt_level(interruptible_state);
1125                                         return VM_FAULT_RETRY;
1126                                 }
1127                         }
1128
1129                         if(object->phys_contiguous) {
1130                                 if(m != VM_PAGE_NULL) {
1131                                         VM_PAGE_FREE(m);
1132                                         m = VM_PAGE_NULL;
1133                                 }
1134                                 goto no_clustering;
1135                         }
1136                         if (object->internal) {
1137                                 /*
1138                                  *      Requests to the default pager
1139                                  *      must reserve a real page in advance,
1140                                  *      because the pager's data-provided
1141                                  *      won't block for pages.  IMPORTANT:
1142                                  *      this acts as a throttling mechanism
1143                                  *      for data_requests to the default
1144                                  *      pager.
1145                                  */
1146
1147 #if TRACEFAULTPAGE
1148                                 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1149 #endif
1150                                 if (m->fictitious && !vm_page_convert(m)) {
1151                                         VM_PAGE_FREE(m);
1152                                         vm_fault_cleanup(object, first_m);
1153                                         thread_interrupt_level(interruptible_state);
1154                                         return(VM_FAULT_MEMORY_SHORTAGE);
1155                                 }
1156                         } else if (object->absent_count >
1157                                                 vm_object_absent_max) {
1158                                 /*
1159                                  *      If there are too many outstanding page
1160                                  *      requests pending on this object, we
1161                                  *      wait for them to be resolved now.
1162                                  */
1163
1164 #if TRACEFAULTPAGE
1165                                 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1166 #endif
1167                                 if(m != VM_PAGE_NULL)
1168                                         VM_PAGE_FREE(m);
1169                                 /* take an extra ref so object won't die */
1170                                 assert(object->ref_count > 0);
1171                                 object->ref_count++;
1172                                 vm_object_res_reference(object);
1173                                 vm_fault_cleanup(object, first_m);
1174                                 counter(c_vm_fault_page_block_backoff_kernel++);
1175                                 vm_object_lock(object);
1176                                 assert(object->ref_count > 0);
1177                                 if (object->absent_count > vm_object_absent_max) {
1178                                         vm_object_absent_assert_wait(object,
1179                                                                      interruptible);
1180                                         vm_object_unlock(object);
1181                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1182                                         vm_object_deallocate(object);
1183                                         goto backoff;
1184                                 } else {
1185                                         vm_object_unlock(object);
1186                                         vm_object_deallocate(object);
1187                                         thread_interrupt_level(interruptible_state);
1188                                         return VM_FAULT_RETRY;
1189                                 }
1190                         }
1191
1192                         /*
1193                          *      Indicate that the page is waiting for data
1194                          *      from the memory manager.
1195                          */
1196
1197                         if(m != VM_PAGE_NULL) {
1198
1199                                 m->list_req_pending = TRUE;
1200                                 m->absent = TRUE;
1201                                 m->unusual = TRUE;
1202                                 object->absent_count++;
1203
1204                         }
1205
1206 no_clustering:
1207                         cluster_start = offset;
1208                         length = PAGE_SIZE;
1209
1210                         /*
1211                          * lengthen the cluster by the pages in the working set
1212                          */
1213                         if((map != NULL) &&
1214                                 (current_task()->dynamic_working_set != 0)) {
1215                                 cluster_end = cluster_start + length;
1216                                 /* tws values for start and end are just a
1217                                  * suggestions.  Therefore, as long as
1218                                  * build_cluster does not use pointers or
1219                                  * take action based on values that
1220                                  * could be affected by re-entrance we
1221                                  * do not need to take the map lock.
1222                                  */
1223                                 cluster_end = offset + PAGE_SIZE_64;
1224                                 tws_build_cluster(
1225                                         current_task()->dynamic_working_set,
1226                                         object, &cluster_start,
1227                                         &cluster_end, 0x40000);
1228                                 length = cluster_end - cluster_start;
1229                         }
1230 #if TRACEFAULTPAGE
1231                         dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0);  /* (TEST/DEBUG) */
1232 #endif
1233                         /*
1234                          *      We have a busy page, so we can
1235                          *      release the object lock.
1236                          */
1237                         vm_object_unlock(object);
1238
1239                         /*
1240                          *      Call the memory manager to retrieve the data.
1241                          */
1242
1243                         if (type_of_fault)
1244                                 *type_of_fault = ((int)length << 8) | DBG_PAGEIN_FAULT;
1245                         VM_STAT(pageins++);
1246                         current_task()->pageins++;
1247                         bumped_pagein = TRUE;
1248
1249                         /*
1250                          *      If this object uses a copy_call strategy,
1251                          *      and we are interested in a copy of this object
1252                          *      (having gotten here only by following a
1253                          *      shadow chain), then tell the memory manager
1254                          *      via a flag added to the desired_access
1255                          *      parameter, so that it can detect a race
1256                          *      between our walking down the shadow chain
1257                          *      and its pushing pages up into a copy of
1258                          *      the object that it manages.
1259                          */
1260
1261                         if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1262                             object != first_object) {
1263                                 wants_copy_flag = VM_PROT_WANTS_COPY;
1264                         } else {
1265                                 wants_copy_flag = VM_PROT_NONE;
1266                         }
1267
1268                         XPR(XPR_VM_FAULT,
1269                             "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1270                                 (integer_t)object, offset, (integer_t)m,
1271                                 access_required | wants_copy_flag, 0);
1272
1273                         rc = memory_object_data_request(object->pager,
1274                                         cluster_start + object->paging_offset,
1275                                         length,
1276                                         access_required | wants_copy_flag);
1277
1278
1279 #if TRACEFAULTPAGE
1280                         dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1281 #endif
1282                         if (rc != KERN_SUCCESS) {
1283                                 if (rc != MACH_SEND_INTERRUPTED
1284                                     && vm_fault_debug)
1285                                         printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n",
1286                                                 "memory_object_data_request",
1287                                                 object->pager,
1288                                                 cluster_start + object->paging_offset,
1289                                                 length, access_required, rc);
1290                                 /*
1291                                  *      Don't want to leave a busy page around,
1292                                  *      but the data request may have blocked,
1293                                  *      so check if it's still there and busy.
1294                                  */
1295                                 if(!object->phys_contiguous) {
1296                                    vm_object_lock(object);
1297                                    for (; length; length -= PAGE_SIZE,
1298                                       cluster_start += PAGE_SIZE_64) {
1299                                       vm_page_t p;
1300                                       if ((p = vm_page_lookup(object,
1301                                                                 cluster_start))
1302                                             && p->absent && p->busy
1303                                             && p != first_m) {
1304                                          VM_PAGE_FREE(p);
1305                                       }
1306                                    }
1307                                 }
1308                                 vm_fault_cleanup(object, first_m);
1309                                 thread_interrupt_level(interruptible_state);
1310                                 return((rc == MACH_SEND_INTERRUPTED) ?
1311                                         VM_FAULT_INTERRUPTED :
1312                                         VM_FAULT_MEMORY_ERROR);
1313                         }
1314
1315                         vm_object_lock(object);
1316                         if ((interruptible != THREAD_UNINT) &&
1317                             (current_thread()->state & TH_ABORT)) {
1318                                 vm_fault_cleanup(object, first_m);
1319                                 thread_interrupt_level(interruptible_state);
1320                                 return(VM_FAULT_INTERRUPTED);
1321                         }
1322                         if (m == VM_PAGE_NULL &&
1323                             object->phys_contiguous) {
1324                                 /*
1325                                  * No page here means that the object we
1326                                  * initially looked up was "physically
1327                                  * contiguous" (i.e. device memory).  However,
1328                                  * with Virtual VRAM, the object might not
1329                                  * be backed by that device memory anymore,
1330                                  * so we're done here only if the object is
1331                                  * still "phys_contiguous".
1332                                  * Otherwise, if the object is no longer
1333                                  * "phys_contiguous", we need to retry the
1334                                  * page fault against the object's new backing
1335                                  * store (different memory object).
1336                                  */
1337                                 break;
1338                         }
1339
1340                         /*
1341                          * Retry with same object/offset, since new data may
1342                          * be in a different page (i.e., m is meaningless at
1343                          * this point).
1344                          */
1345                         continue;
1346                 }
1347
1348                 /*
1349                  * The only case in which we get here is if
1350                  * object has no pager (or unwiring).  If the pager doesn't
1351                  * have the page this is handled in the m->absent case above
1352                  * (and if you change things here you should look above).
1353                  */
1354 #if TRACEFAULTPAGE
1355                 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1356 #endif
1357                 if (object == first_object)
1358                         first_m = m;
1359                 else
1360                         assert(m == VM_PAGE_NULL);
1361
1362                 XPR(XPR_VM_FAULT,
1363                     "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1364                         (integer_t)object, offset, (integer_t)m,
1365                         (integer_t)object->shadow, 0);
1366                 /*
1367                  *      Move on to the next object.  Lock the next
1368                  *      object before unlocking the current one.
1369                  */
1370                 next_object = object->shadow;
1371                 if (next_object == VM_OBJECT_NULL) {
1372                         assert(!must_be_resident);
1373                         /*
1374                          *      If there's no object left, fill the page
1375                          *      in the top object with zeros.  But first we
1376                          *      need to allocate a real page.
1377                          */
1378
1379                         if (object != first_object) {
1380                                 vm_object_paging_end(object);
1381                                 vm_object_unlock(object);
1382
1383                                 object = first_object;
1384                                 offset = first_offset;
1385                                 vm_object_lock(object);
1386                         }
1387
1388                         m = first_m;
1389                         assert(m->object == object);
1390                         first_m = VM_PAGE_NULL;
1391
1392                         if(m == VM_PAGE_NULL) {
1393                                 m = vm_page_grab();
1394                                 if (m == VM_PAGE_NULL) {
1395                                         vm_fault_cleanup(
1396                                                 object, VM_PAGE_NULL);
1397                                         thread_interrupt_level(
1398                                                 interruptible_state);
1399                                         return(VM_FAULT_MEMORY_SHORTAGE);
1400                                 }
1401                                 vm_page_insert(
1402                                         m, object, offset);
1403                         }
1404
1405                         if (object->shadow_severed) {
1406                                 VM_PAGE_FREE(m);
1407                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1408                                 thread_interrupt_level(interruptible_state);
1409                                 return VM_FAULT_MEMORY_ERROR;
1410                         }
1411
1412                         /*
1413                          * are we protecting the system from
1414                          * backing store exhaustion.  If so
1415                          * sleep unless we are privileged.
1416                          */
1417
1418                         if(vm_backing_store_low) {
1419                                 if(!(current_task()->priv_flags
1420                                                 & VM_BACKING_STORE_PRIV)) {
1421                                         assert_wait((event_t)
1422                                                 &vm_backing_store_low,
1423                                                 THREAD_UNINT);
1424                                         VM_PAGE_FREE(m);
1425                                         vm_fault_cleanup(object, VM_PAGE_NULL);
1426                                         thread_block(THREAD_CONTINUE_NULL);
1427                                         thread_interrupt_level(
1428                                                 interruptible_state);
1429                                         return(VM_FAULT_RETRY);
1430                                 }
1431                         }
1432
1433                         if (VM_PAGE_THROTTLED() ||
1434                             (m->fictitious && !vm_page_convert(m))) {
1435                                 VM_PAGE_FREE(m);
1436                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1437                                 thread_interrupt_level(interruptible_state);
1438                                 return(VM_FAULT_MEMORY_SHORTAGE);
1439                         }
1440                         m->no_isync = FALSE;
1441
1442                         if (!no_zero_fill) {
1443                                 vm_object_unlock(object);
1444                                 vm_page_zero_fill(m);
1445                                 vm_object_lock(object);
1446
1447                                 if (type_of_fault)
1448                                         *type_of_fault = DBG_ZERO_FILL_FAULT;
1449                                 VM_STAT(zero_fill_count++);
1450                         }
1451                         if (bumped_pagein == TRUE) {
1452                                 VM_STAT(pageins--);
1453                                 current_task()->pageins--;
1454                         }
1455                         vm_page_lock_queues();
1456                         VM_PAGE_QUEUES_REMOVE(m);
1457                         assert(!m->laundry);
1458                         assert(m->object != kernel_object);
1459                         assert(m->pageq.next == NULL &&
1460                                m->pageq.prev == NULL);
1461                         if(m->object->size > 0x200000) {
1462                                 m->zero_fill = TRUE;
1463                                 /* depends on the queues lock */
1464                                 vm_zf_count += 1;
1465                                 queue_enter(&vm_page_queue_zf,
1466                                         m, vm_page_t, pageq);
1467                         } else {
1468                                 queue_enter(
1469                                         &vm_page_queue_inactive,
1470                                         m, vm_page_t, pageq);
1471                         }
1472                         m->page_ticket = vm_page_ticket;
1473                         vm_page_ticket_roll++;
1474                         if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1475                                 vm_page_ticket_roll = 0;
1476                                 if(vm_page_ticket ==
1477                                         VM_PAGE_TICKET_ROLL_IDS)
1478                                         vm_page_ticket= 0;
1479                                 else
1480                                         vm_page_ticket++;
1481                         }
1482                         m->inactive = TRUE;
1483                         vm_page_inactive_count++;
1484                         vm_page_unlock_queues();
1485 #if 0
1486                         pmap_clear_modify(m->phys_page);
1487 #endif
1488                         break;
1489                 }
1490                 else {
1491                         if ((object != first_object) || must_be_resident)
1492                                 vm_object_paging_end(object);
1493                         offset += object->shadow_offset;
1494                         hi_offset += object->shadow_offset;
1495                         lo_offset += object->shadow_offset;
1496                         access_required = VM_PROT_READ;
1497                         vm_object_lock(next_object);
1498                         vm_object_unlock(object);
1499                         object = next_object;
1500                         vm_object_paging_begin(object);
1501                 }
1502         }
1503
1504         /*
1505          *      PAGE HAS BEEN FOUND.
1506          *
1507          *      This page (m) is:
1508          *              busy, so that we can play with it;
1509          *              not absent, so that nobody else will fill it;
1510          *              possibly eligible for pageout;
1511          *
1512          *      The top-level page (first_m) is:
1513          *              VM_PAGE_NULL if the page was found in the
1514          *               top-level object;
1515          *              busy, not absent, and ineligible for pageout.
1516          *
1517          *      The current object (object) is locked.  A paging
1518          *      reference is held for the current and top-level
1519          *      objects.
1520          */
1521
1522 #if TRACEFAULTPAGE
1523         dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1524 #endif
1525 #if     EXTRA_ASSERTIONS
1526         if(m != VM_PAGE_NULL) {
1527                 assert(m->busy && !m->absent);
1528                 assert((first_m == VM_PAGE_NULL) ||
1529                         (first_m->busy && !first_m->absent &&
1530                          !first_m->active && !first_m->inactive));
1531         }
1532 #endif  /* EXTRA_ASSERTIONS */
1533
1534         /*
1535          * ENCRYPTED SWAP:
1536          * If we found a page, we must have decrypted it before we
1537          * get here...
1538          */
1539         if (m != VM_PAGE_NULL) {
1540                 ASSERT_PAGE_DECRYPTED(m);
1541         }
1542
1543         XPR(XPR_VM_FAULT,
1544        "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1545                 (integer_t)object, offset, (integer_t)m,
1546                 (integer_t)first_object, (integer_t)first_m);
1547         /*
1548          *      If the page is being written, but isn't
1549          *      already owned by the top-level object,
1550          *      we have to copy it into a new page owned
1551          *      by the top-level object.
1552          */
1553
1554         if ((object != first_object) && (m != VM_PAGE_NULL)) {
1555                 /*
1556                  *      We only really need to copy if we
1557                  *      want to write it.
1558                  */
1559
1560 #if TRACEFAULTPAGE
1561                         dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1562 #endif
1563                 if (fault_type & VM_PROT_WRITE) {
1564                         vm_page_t copy_m;
1565
1566                         assert(!must_be_resident);
1567
1568                         /*
1569                          * are we protecting the system from
1570                          * backing store exhaustion.  If so
1571                          * sleep unless we are privileged.
1572                          */
1573
1574                         if(vm_backing_store_low) {
1575                                 if(!(current_task()->priv_flags
1576                                                 & VM_BACKING_STORE_PRIV)) {
1577                                         assert_wait((event_t)
1578                                                 &vm_backing_store_low,
1579                                                 THREAD_UNINT);
1580                                         RELEASE_PAGE(m);
1581                                         vm_fault_cleanup(object, first_m);
1582                                         thread_block(THREAD_CONTINUE_NULL);
1583                                         thread_interrupt_level(
1584                                                 interruptible_state);
1585                                         return(VM_FAULT_RETRY);
1586                                 }
1587                         }
1588
1589                         /*
1590                          *      If we try to collapse first_object at this
1591                          *      point, we may deadlock when we try to get
1592                          *      the lock on an intermediate object (since we
1593                          *      have the bottom object locked).  We can't
1594                          *      unlock the bottom object, because the page
1595                          *      we found may move (by collapse) if we do.
1596                          *
1597                          *      Instead, we first copy the page.  Then, when
1598                          *      we have no more use for the bottom object,
1599                          *      we unlock it and try to collapse.
1600                          *
1601                          *      Note that we copy the page even if we didn't
1602                          *      need to... that's the breaks.
1603                          */
1604
1605                         /*
1606                          *      Allocate a page for the copy
1607                          */
1608                         copy_m = vm_page_grab();
1609                         if (copy_m == VM_PAGE_NULL) {
1610                                 RELEASE_PAGE(m);
1611                                 vm_fault_cleanup(object, first_m);
1612                                 thread_interrupt_level(interruptible_state);
1613                                 return(VM_FAULT_MEMORY_SHORTAGE);
1614                         }
1615
1616
1617                         XPR(XPR_VM_FAULT,
1618                             "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1619                                 (integer_t)object, offset,
1620                                 (integer_t)m, (integer_t)copy_m, 0);
1621                         vm_page_copy(m, copy_m);
1622
1623                         /*
1624                          *      If another map is truly sharing this
1625                          *      page with us, we have to flush all
1626                          *      uses of the original page, since we
1627                          *      can't distinguish those which want the
1628                          *      original from those which need the
1629                          *      new copy.
1630                          *
1631                          *      XXXO If we know that only one map has
1632                          *      access to this page, then we could
1633                          *      avoid the pmap_disconnect() call.
1634                          */
1635
1636                         vm_page_lock_queues();
1637                         assert(!m->cleaning);
1638                         pmap_disconnect(m->phys_page);
1639                         vm_page_deactivate(m);
1640                         copy_m->dirty = TRUE;
1641                         /*
1642                          * Setting reference here prevents this fault from
1643                          * being counted as a (per-thread) reactivate as well
1644                          * as a copy-on-write.
1645                          */
1646                         first_m->reference = TRUE;
1647                         vm_page_unlock_queues();
1648
1649                         /*
1650                          *      We no longer need the old page or object.
1651                          */
1652
1653                         PAGE_WAKEUP_DONE(m);
1654                         vm_object_paging_end(object);
1655                         vm_object_unlock(object);
1656
1657                         if (type_of_fault)
1658                                 *type_of_fault = DBG_COW_FAULT;
1659                         VM_STAT(cow_faults++);
1660                         current_task()->cow_faults++;
1661                         object = first_object;
1662                         offset = first_offset;
1663
1664                         vm_object_lock(object);
1665                         VM_PAGE_FREE(first_m);
1666                         first_m = VM_PAGE_NULL;
1667                         assert(copy_m->busy);
1668                         vm_page_insert(copy_m, object, offset);
1669                         m = copy_m;
1670
1671                         /*
1672                          *      Now that we've gotten the copy out of the
1673                          *      way, let's try to collapse the top object.
1674                          *      But we have to play ugly games with
1675                          *      paging_in_progress to do that...
1676                          */
1677
1678                         vm_object_paging_end(object);
1679                         vm_object_collapse(object, offset);
1680                         vm_object_paging_begin(object);
1681
1682                 }
1683                 else {
1684                         *protection &= (~VM_PROT_WRITE);
1685                 }
1686         }
1687
1688         /*
1689          *      Now check whether the page needs to be pushed into the
1690          *      copy object.  The use of asymmetric copy on write for
1691          *      shared temporary objects means that we may do two copies to
1692          *      satisfy the fault; one above to get the page from a
1693          *      shadowed object, and one here to push it into the copy.
1694          */
1695
1696         while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1697                    (m!= VM_PAGE_NULL)) {
1698                 vm_object_offset_t      copy_offset;
1699                 vm_page_t               copy_m;
1700
1701 #if TRACEFAULTPAGE
1702                 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type);    /* (TEST/DEBUG) */
1703 #endif
1704                 /*
1705                  *      If the page is being written, but hasn't been
1706                  *      copied to the copy-object, we have to copy it there.
1707                  */
1708
1709                 if ((fault_type & VM_PROT_WRITE) == 0) {
1710                         *protection &= ~VM_PROT_WRITE;
1711                         break;
1712                 }
1713
1714                 /*
1715                  *      If the page was guaranteed to be resident,
1716                  *      we must have already performed the copy.
1717                  */
1718
1719                 if (must_be_resident)
1720                         break;
1721
1722                 /*
1723                  *      Try to get the lock on the copy_object.
1724                  */
1725                 if (!vm_object_lock_try(copy_object)) {
1726                         vm_object_unlock(object);
1727
1728                         mutex_pause();  /* wait a bit */
1729
1730                         vm_object_lock(object);
1731                         continue;
1732                 }
1733
1734                 /*
1735                  *      Make another reference to the copy-object,
1736                  *      to keep it from disappearing during the
1737                  *      copy.
1738                  */
1739                 assert(copy_object->ref_count > 0);
1740                 copy_object->ref_count++;
1741                 VM_OBJ_RES_INCR(copy_object);
1742
1743                 /*
1744                  *      Does the page exist in the copy?
1745                  */
1746                 copy_offset = first_offset - copy_object->shadow_offset;
1747                 if (copy_object->size <= copy_offset)
1748                         /*
1749                          * Copy object doesn't cover this page -- do nothing.
1750                          */
1751                         ;
1752                 else if ((copy_m =
1753                         vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1754                         /* Page currently exists in the copy object */
1755                         if (copy_m->busy) {
1756                                 /*
1757                                  *      If the page is being brought
1758                                  *      in, wait for it and then retry.
1759                                  */
1760                                 RELEASE_PAGE(m);
1761                                 /* take an extra ref so object won't die */
1762                                 assert(copy_object->ref_count > 0);
1763                                 copy_object->ref_count++;
1764                                 vm_object_res_reference(copy_object);
1765                                 vm_object_unlock(copy_object);
1766                                 vm_fault_cleanup(object, first_m);
1767                                 counter(c_vm_fault_page_block_backoff_kernel++);
1768                                 vm_object_lock(copy_object);
1769                                 assert(copy_object->ref_count > 0);
1770                                 VM_OBJ_RES_DECR(copy_object);
1771                                 copy_object->ref_count--;
1772                                 assert(copy_object->ref_count > 0);
1773                                 copy_m = vm_page_lookup(copy_object, copy_offset);
1774                                 /*
1775                                  * ENCRYPTED SWAP:
1776                                  * it's OK if the "copy_m" page is encrypted,
1777                                  * because we're not moving it nor handling its
1778                                  * contents.
1779                                  */
1780                                 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1781                                         PAGE_ASSERT_WAIT(copy_m, interruptible);
1782                                         vm_object_unlock(copy_object);
1783                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1784                                         vm_object_deallocate(copy_object);
1785                                         goto backoff;
1786                                 } else {
1787                                         vm_object_unlock(copy_object);
1788                                         vm_object_deallocate(copy_object);
1789                                         thread_interrupt_level(interruptible_state);
1790                                         return VM_FAULT_RETRY;
1791                                 }
1792                         }
1793                 }
1794                 else if (!PAGED_OUT(copy_object, copy_offset)) {
1795                         /*
1796                          * If PAGED_OUT is TRUE, then the page used to exist
1797                          * in the copy-object, and has already been paged out.
1798                          * We don't need to repeat this. If PAGED_OUT is
1799                          * FALSE, then either we don't know (!pager_created,
1800                          * for example) or it hasn't been paged out.
1801                          * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1802                          * We must copy the page to the copy object.
1803                          */
1804
1805                         /*
1806                          * are we protecting the system from
1807                          * backing store exhaustion.  If so
1808                          * sleep unless we are privileged.
1809                          */
1810
1811                         if(vm_backing_store_low) {
1812                                 if(!(current_task()->priv_flags
1813                                                 & VM_BACKING_STORE_PRIV)) {
1814                                         assert_wait((event_t)
1815                                                 &vm_backing_store_low,
1816                                                 THREAD_UNINT);
1817                                         RELEASE_PAGE(m);
1818                                         VM_OBJ_RES_DECR(copy_object);
1819                                         copy_object->ref_count--;
1820                                         assert(copy_object->ref_count > 0);
1821                                         vm_object_unlock(copy_object);
1822                                         vm_fault_cleanup(object, first_m);
1823                                         thread_block(THREAD_CONTINUE_NULL);
1824                                         thread_interrupt_level(
1825                                                 interruptible_state);
1826                                         return(VM_FAULT_RETRY);
1827                                 }
1828                         }
1829
1830                         /*
1831                          *      Allocate a page for the copy
1832                          */
1833                         copy_m = vm_page_alloc(copy_object, copy_offset);
1834                         if (copy_m == VM_PAGE_NULL) {
1835                                 RELEASE_PAGE(m);
1836                                 VM_OBJ_RES_DECR(copy_object);
1837                                 copy_object->ref_count--;
1838                                 assert(copy_object->ref_count > 0);
1839                                 vm_object_unlock(copy_object);
1840                                 vm_fault_cleanup(object, first_m);
1841                                 thread_interrupt_level(interruptible_state);
1842                                 return(VM_FAULT_MEMORY_SHORTAGE);
1843                         }
1844
1845                         /*
1846                          *      Must copy page into copy-object.
1847                          */
1848
1849                         vm_page_copy(m, copy_m);
1850
1851                         /*
1852                          *      If the old page was in use by any users
1853                          *      of the copy-object, it must be removed
1854                          *      from all pmaps.  (We can't know which
1855                          *      pmaps use it.)
1856                          */
1857
1858                         vm_page_lock_queues();
1859                         assert(!m->cleaning);
1860                         pmap_disconnect(m->phys_page);
1861                         copy_m->dirty = TRUE;
1862                         vm_page_unlock_queues();
1863
1864                         /*
1865                          *      If there's a pager, then immediately
1866                          *      page out this page, using the "initialize"
1867                          *      option.  Else, we use the copy.
1868                          */
1869
1870                         if
1871 #if     MACH_PAGEMAP
1872                           ((!copy_object->pager_created) ||
1873                                 vm_external_state_get(
1874                                         copy_object->existence_map, copy_offset)
1875                                 == VM_EXTERNAL_STATE_ABSENT)
1876 #else
1877                           (!copy_object->pager_created)
1878 #endif
1879                                 {
1880                                 vm_page_lock_queues();
1881                                 vm_page_activate(copy_m);
1882                                 vm_page_unlock_queues();
1883                                 PAGE_WAKEUP_DONE(copy_m);
1884                         }
1885                         else {
1886                                 assert(copy_m->busy == TRUE);
1887
1888                                 /*
1889                                  *      The page is already ready for pageout:
1890                                  *      not on pageout queues and busy.
1891                                  *      Unlock everything except the
1892                                  *      copy_object itself.
1893                                  */
1894
1895                                 vm_object_unlock(object);
1896
1897                                 /*
1898                                  *      Write the page to the copy-object,
1899                                  *      flushing it from the kernel.
1900                                  */
1901
1902                                 vm_pageout_initialize_page(copy_m);
1903
1904                                 /*
1905                                  *      Since the pageout may have
1906                                  *      temporarily dropped the
1907                                  *      copy_object's lock, we
1908                                  *      check whether we'll have
1909                                  *      to deallocate the hard way.
1910                                  */
1911
1912                                 if ((copy_object->shadow != object) ||
1913                                     (copy_object->ref_count == 1)) {
1914                                         vm_object_unlock(copy_object);
1915                                         vm_object_deallocate(copy_object);
1916                                         vm_object_lock(object);
1917                                         continue;
1918                                 }
1919
1920                                 /*
1921                                  *      Pick back up the old object's
1922                                  *      lock.  [It is safe to do so,
1923                                  *      since it must be deeper in the
1924                                  *      object tree.]
1925                                  */
1926
1927                                 vm_object_lock(object);
1928                         }
1929
1930                         /*
1931                          *      Because we're pushing a page upward
1932                          *      in the object tree, we must restart
1933                          *      any faults that are waiting here.
1934                          *      [Note that this is an expansion of
1935                          *      PAGE_WAKEUP that uses the THREAD_RESTART
1936                          *      wait result].  Can't turn off the page's
1937                          *      busy bit because we're not done with it.
1938                          */
1939
1940                         if (m->wanted) {
1941                                 m->wanted = FALSE;
1942                                 thread_wakeup_with_result((event_t) m,
1943                                         THREAD_RESTART);
1944                         }
1945                 }
1946
1947                 /*
1948                  *      The reference count on copy_object must be
1949                  *      at least 2: one for our extra reference,
1950                  *      and at least one from the outside world
1951                  *      (we checked that when we last locked
1952                  *      copy_object).
1953                  */
1954                 copy_object->ref_count--;
1955                 assert(copy_object->ref_count > 0);
1956                 VM_OBJ_RES_DECR(copy_object);
1957                 vm_object_unlock(copy_object);
1958
1959                 break;
1960         }
1961
1962         *result_page = m;
1963         *top_page = first_m;
1964
1965         XPR(XPR_VM_FAULT,
1966                 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1967                 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1968         /*
1969          *      If the page can be written, assume that it will be.
1970          *      [Earlier, we restrict the permission to allow write
1971          *      access only if the fault so required, so we don't
1972          *      mark read-only data as dirty.]
1973          */
1974
1975
1976         if(m != VM_PAGE_NULL) {
1977 #if     !VM_FAULT_STATIC_CONFIG
1978                 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1979                         m->dirty = TRUE;
1980 #endif
1981                 if (vm_page_deactivate_behind)
1982                         vm_fault_deactivate_behind(object, offset, behavior);
1983         } else {
1984                 vm_object_unlock(object);
1985         }
1986         thread_interrupt_level(interruptible_state);
1987
1988 #if TRACEFAULTPAGE
1989         dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0);       /* (TEST/DEBUG) */
1990 #endif
1991         return(VM_FAULT_SUCCESS);
1992
1993 #if 0
1994     block_and_backoff:
1995         vm_fault_cleanup(object, first_m);
1996
1997         counter(c_vm_fault_page_block_backoff_kernel++);
1998         thread_block(THREAD_CONTINUE_NULL);
1999 #endif
2000
2001     backoff:
2002         thread_interrupt_level(interruptible_state);
2003         if (wait_result == THREAD_INTERRUPTED)
2004                 return VM_FAULT_INTERRUPTED;
2005         return VM_FAULT_RETRY;
2006
2007 #undef  RELEASE_PAGE
2008 }
2009
2010 /*
2011  *      Routine:        vm_fault_tws_insert
2012  *      Purpose:
2013  *              Add fault information to the task working set.
2014  *      Implementation:
2015  *              We always insert the base object/offset pair
2016  *              rather the actual object/offset.
2017  *      Assumptions:
2018  *              Map and real_map locked.
2019  *              Object locked and referenced.
2020  *      Returns:
2021  *              TRUE if startup file should be written.
2022  *              With object locked and still referenced.
2023  *              But we may drop the object lock temporarily.
2024  */
2025 static boolean_t
2026 vm_fault_tws_insert(
2027         vm_map_t map,
2028         vm_map_t real_map,
2029         vm_map_offset_t vaddr,
2030         vm_object_t object,
2031         vm_object_offset_t offset)
2032 {
2033         tws_hash_line_t line;
2034         task_t          task;
2035         kern_return_t   kr;
2036         boolean_t       result = FALSE;
2037
2038         /* Avoid possible map lock deadlock issues */
2039         if (map == kernel_map || map == kalloc_map ||
2040             real_map == kernel_map || real_map == kalloc_map)
2041                 return result;
2042
2043         task = current_task();
2044         if (task->dynamic_working_set != 0) {
2045                 vm_object_t     base_object;
2046                 vm_object_t     base_shadow;
2047                 vm_object_offset_t base_offset;
2048                 base_object = object;
2049                 base_offset = offset;
2050                 while ((base_shadow = base_object->shadow)) {
2051                         vm_object_lock(base_shadow);
2052                         vm_object_unlock(base_object);
2053                         base_offset +=
2054                                 base_object->shadow_offset;
2055                         base_object = base_shadow;
2056                 }
2057                 kr = tws_lookup(
2058                         task->dynamic_working_set,
2059                         base_offset, base_object,
2060                         &line);
2061                 if (kr == KERN_OPERATION_TIMED_OUT){
2062                         result = TRUE;
2063                         if (base_object != object) {
2064                                 vm_object_unlock(base_object);
2065                                 vm_object_lock(object);
2066                         }
2067                 } else if (kr != KERN_SUCCESS) {
2068                         if(base_object != object)
2069                                 vm_object_reference_locked(base_object);
2070                         kr = tws_insert(
2071                                    task->dynamic_working_set,
2072                                    base_offset, base_object,
2073                                    vaddr, real_map);
2074                         if(base_object != object) {
2075                                 vm_object_unlock(base_object);
2076                                 vm_object_deallocate(base_object);
2077                         }
2078                         if(kr == KERN_NO_SPACE) {
2079                                 if (base_object == object)
2080                                         vm_object_unlock(object);
2081                                 tws_expand_working_set(
2082                                    task->dynamic_working_set,
2083                                    TWS_HASH_LINE_COUNT,
2084                                    FALSE);
2085                                 if (base_object == object)
2086                                         vm_object_lock(object);
2087                         } else if(kr == KERN_OPERATION_TIMED_OUT) {
2088                                 result = TRUE;
2089                         }
2090                         if(base_object != object)
2091                                 vm_object_lock(object);
2092                 } else if (base_object != object) {
2093                         vm_object_unlock(base_object);
2094                         vm_object_lock(object);
2095                 }
2096         }
2097         return result;
2098 }
2099
2100 /*
2101  *      Routine:        vm_fault
2102  *      Purpose:
2103  *              Handle page faults, including pseudo-faults
2104  *              used to change the wiring status of pages.
2105  *      Returns:
2106  *              Explicit continuations have been removed.
2107  *      Implementation:
2108  *              vm_fault and vm_fault_page save mucho state
2109  *              in the moral equivalent of a closure.  The state
2110  *              structure is allocated when first entering vm_fault
2111  *              and deallocated when leaving vm_fault.
2112  */
2113
2114 extern int _map_enter_debug;
2115
2116 kern_return_t
2117 vm_fault(
2118         vm_map_t        map,
2119         vm_map_offset_t vaddr,
2120         vm_prot_t       fault_type,
2121         boolean_t       change_wiring,
2122         int             interruptible,
2123         pmap_t          caller_pmap,
2124         vm_map_offset_t caller_pmap_addr)
2125 {
2126         vm_map_version_t        version;        /* Map version for verificiation */
2127         boolean_t               wired;          /* Should mapping be wired down? */
2128         vm_object_t             object;         /* Top-level object */
2129         vm_object_offset_t      offset;         /* Top-level offset */
2130         vm_prot_t               prot;           /* Protection for mapping */
2131         vm_behavior_t           behavior;       /* Expected paging behavior */
2132         vm_map_offset_t         lo_offset, hi_offset;
2133         vm_object_t             old_copy_object; /* Saved copy object */
2134         vm_page_t               result_page;    /* Result of vm_fault_page */
2135         vm_page_t               top_page;       /* Placeholder page */
2136         kern_return_t           kr;
2137
2138         register
2139         vm_page_t               m;      /* Fast access to result_page */
2140         kern_return_t           error_code = 0; /* page error reasons */
2141         register
2142         vm_object_t             cur_object;
2143         register
2144         vm_object_offset_t      cur_offset;
2145         vm_page_t               cur_m;
2146         vm_object_t             new_object;
2147         int                     type_of_fault;
2148         vm_map_t                real_map = map;
2149         vm_map_t                original_map = map;
2150         pmap_t                  pmap = NULL;
2151         boolean_t               interruptible_state;
2152         unsigned int            cache_attr;
2153         int                     write_startup_file = 0;
2154         boolean_t               need_activation;
2155         vm_prot_t               full_fault_type;
2156
2157         if (get_preemption_level() != 0)
2158                 return (KERN_FAILURE);
2159
2160         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2161                               vaddr,
2162                               0,
2163                               0,
2164                               0,
2165                               0);
2166
2167         /* at present we do not fully check for execute permission */
2168         /* we generally treat it is read except in certain device  */
2169         /* memory settings */
2170         full_fault_type = fault_type;
2171         if(fault_type & VM_PROT_EXECUTE) {
2172                 fault_type &= ~VM_PROT_EXECUTE;
2173                 fault_type |= VM_PROT_READ;
2174         }
2175
2176         interruptible_state = thread_interrupt_level(interruptible);
2177
2178         /*
2179          * assume we will hit a page in the cache
2180          * otherwise, explicitly override with
2181          * the real fault type once we determine it
2182          */
2183         type_of_fault = DBG_CACHE_HIT_FAULT;
2184
2185         VM_STAT(faults++);
2186         current_task()->faults++;
2187
2188     RetryFault: ;
2189
2190         /*
2191          *      Find the backing store object and offset into
2192          *      it to begin the search.
2193          */
2194         map = original_map;
2195         vm_map_lock_read(map);
2196         kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2197                                 &object, &offset,
2198                                 &prot, &wired,
2199                                 &behavior, &lo_offset, &hi_offset, &real_map);
2200
2201 //if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr);
2202
2203         pmap = real_map->pmap;
2204
2205         if (kr != KERN_SUCCESS) {
2206                 vm_map_unlock_read(map);
2207                 goto done;
2208         }
2209
2210         /*
2211          *      If the page is wired, we must fault for the current protection
2212          *      value, to avoid further faults.
2213          */
2214
2215         if (wired)
2216                 fault_type = prot | VM_PROT_WRITE;
2217
2218 #if     VM_FAULT_CLASSIFY
2219         /*
2220          *      Temporary data gathering code
2221          */
2222         vm_fault_classify(object, offset, fault_type);
2223 #endif
2224         /*
2225          *      Fast fault code.  The basic idea is to do as much as
2226          *      possible while holding the map lock and object locks.
2227          *      Busy pages are not used until the object lock has to
2228          *      be dropped to do something (copy, zero fill, pmap enter).
2229          *      Similarly, paging references aren't acquired until that
2230          *      point, and object references aren't used.
2231          *
2232          *      If we can figure out what to do
2233          *      (zero fill, copy on write, pmap enter) while holding
2234          *      the locks, then it gets done.  Otherwise, we give up,
2235          *      and use the original fault path (which doesn't hold
2236          *      the map lock, and relies on busy pages).
2237          *      The give up cases include:
2238          *              - Have to talk to pager.
2239          *              - Page is busy, absent or in error.
2240          *              - Pager has locked out desired access.
2241          *              - Fault needs to be restarted.
2242          *              - Have to push page into copy object.
2243          *
2244          *      The code is an infinite loop that moves one level down
2245          *      the shadow chain each time.  cur_object and cur_offset
2246          *      refer to the current object being examined. object and offset
2247          *      are the original object from the map.  The loop is at the
2248          *      top level if and only if object and cur_object are the same.
2249          *
2250          *      Invariants:  Map lock is held throughout.  Lock is held on
2251          *              original object and cur_object (if different) when
2252          *              continuing or exiting loop.
2253          *
2254          */
2255
2256
2257         /*
2258          *      If this page is to be inserted in a copy delay object
2259          *      for writing, and if the object has a copy, then the
2260          *      copy delay strategy is implemented in the slow fault page.
2261          */
2262         if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2263             object->copy == VM_OBJECT_NULL ||
2264             (fault_type & VM_PROT_WRITE) == 0) {
2265         cur_object = object;
2266         cur_offset = offset;
2267
2268         while (TRUE) {
2269                 m = vm_page_lookup(cur_object, cur_offset);
2270                 if (m != VM_PAGE_NULL) {
2271                         if (m->busy) {
2272                                 wait_result_t   result;
2273
2274                                 if (object != cur_object)
2275                                         vm_object_unlock(object);
2276
2277                                 vm_map_unlock_read(map);
2278                                 if (real_map != map)
2279                                         vm_map_unlock(real_map);
2280
2281 #if     !VM_FAULT_STATIC_CONFIG
2282                                 if (!vm_fault_interruptible)
2283                                         interruptible = THREAD_UNINT;
2284 #endif
2285                                 result = PAGE_ASSERT_WAIT(m, interruptible);
2286
2287                                 vm_object_unlock(cur_object);
2288
2289                                 if (result == THREAD_WAITING) {
2290                                         result = thread_block(THREAD_CONTINUE_NULL);
2291
2292                                         counter(c_vm_fault_page_block_busy_kernel++);
2293                                 }
2294                                 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2295                                         goto RetryFault;
2296
2297                                 kr = KERN_ABORTED;
2298                                 goto done;
2299                         }
2300                         if (m->unusual && (m->error || m->restart || m->private
2301                             || m->absent || (fault_type & m->page_lock))) {
2302
2303                                 /*
2304                                  *      Unusual case. Give up.
2305                                  */
2306                                 break;
2307                         }
2308
2309                         if (m->encrypted) {
2310                                 /*
2311                                  * ENCRYPTED SWAP:
2312                                  * We've soft-faulted (because it's not in the page
2313                                  * table) on an encrypted page.
2314                                  * Keep the page "busy" so that noone messes with
2315                                  * it during the decryption.
2316                                  * Release the extra locks we're holding, keep only
2317                                  * the page's VM object lock.
2318                                  */
2319                                 m->busy = TRUE;
2320                                 if (object != cur_object) {
2321                                         vm_object_unlock(object);
2322                                 }
2323                                 vm_map_unlock_read(map);
2324                                 if (real_map != map)
2325                                         vm_map_unlock(real_map);
2326
2327                                 vm_page_decrypt(m, 0);
2328
2329                                 assert(m->busy);
2330                                 PAGE_WAKEUP_DONE(m);
2331                                 vm_object_unlock(m->object);
2332
2333                                 /*
2334                                  * Retry from the top, in case anything
2335                                  * changed while we were decrypting...
2336                                  */
2337                                 goto RetryFault;
2338                         }
2339                         ASSERT_PAGE_DECRYPTED(m);
2340
2341                         /*
2342                          *      Two cases of map in faults:
2343                          *          - At top level w/o copy object.
2344                          *          - Read fault anywhere.
2345                          *              --> must disallow write.
2346                          */
2347
2348                         if (object == cur_object &&
2349                             object->copy == VM_OBJECT_NULL)
2350                                 goto FastMapInFault;
2351
2352                         if ((fault_type & VM_PROT_WRITE) == 0) {
2353                                 boolean_t sequential;
2354
2355                                 prot &= ~VM_PROT_WRITE;
2356
2357                                 /*
2358                                  *      Set up to map the page ...
2359                                  *      mark the page busy, drop
2360                                  *      locks and take a paging reference
2361                                  *      on the object with the page.
2362                                  */
2363
2364                                 if (object != cur_object) {
2365                                         vm_object_unlock(object);
2366                                         object = cur_object;
2367                                 }
2368 FastMapInFault:
2369                                 m->busy = TRUE;
2370
2371                                 vm_object_paging_begin(object);
2372
2373 FastPmapEnter:
2374                                 /*
2375                                  *      Check a couple of global reasons to
2376                                  *      be conservative about write access.
2377                                  *      Then do the pmap_enter.
2378                                  */
2379 #if     !VM_FAULT_STATIC_CONFIG
2380                                 if (vm_fault_dirty_handling
2381 #if     MACH_KDB
2382                                     || db_watchpoint_list
2383 #endif
2384                                     && (fault_type & VM_PROT_WRITE) == 0)
2385                                         prot &= ~VM_PROT_WRITE;
2386 #else   /* STATIC_CONFIG */
2387 #if     MACH_KDB
2388                                 if (db_watchpoint_list
2389                                     && (fault_type & VM_PROT_WRITE) == 0)
2390                                         prot &= ~VM_PROT_WRITE;
2391 #endif  /* MACH_KDB */
2392 #endif  /* STATIC_CONFIG */
2393                                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2394
2395                                 sequential = FALSE;
2396                                 need_activation = FALSE;
2397
2398                                 if (m->no_isync == TRUE) {
2399                                         m->no_isync = FALSE;
2400                                         pmap_sync_page_data_phys(m->phys_page);
2401
2402                                         if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2403                                                 /*
2404                                                  * found it in the cache, but this
2405                                                  * is the first fault-in of the page (no_isync == TRUE)
2406                                                  * so it must have come in as part of
2407                                                  * a cluster... account 1 pagein against it
2408                                                  */
2409                                                 VM_STAT(pageins++);
2410                                                 current_task()->pageins++;
2411                                                 type_of_fault = DBG_PAGEIN_FAULT;
2412                                                 sequential = TRUE;
2413                                         }
2414                                         if (m->clustered)
2415                                                 need_activation = TRUE;
2416
2417                                 } else if (cache_attr != VM_WIMG_DEFAULT) {
2418                                         pmap_sync_page_attributes_phys(m->phys_page);
2419                                 }
2420
2421                                 if(caller_pmap) {
2422                                         PMAP_ENTER(caller_pmap,
2423                                                 caller_pmap_addr, m,
2424                                                 prot, cache_attr, wired);
2425                                 } else {
2426                                         PMAP_ENTER(pmap, vaddr, m,
2427                                                 prot, cache_attr, wired);
2428                                 }
2429
2430                                 /*
2431                                  *      Hold queues lock to manipulate
2432                                  *      the page queues.  Change wiring
2433                                  *      case is obvious.  In soft ref bits
2434                                  *      case activate page only if it fell
2435                                  *      off paging queues, otherwise just
2436                                  *      activate it if it's inactive.
2437                                  *
2438                                  *      NOTE: original vm_fault code will
2439                                  *      move active page to back of active
2440                                  *      queue.  This code doesn't.
2441                                  */
2442                                 vm_page_lock_queues();
2443
2444                                 if (m->clustered) {
2445                                         vm_pagein_cluster_used++;
2446                                         m->clustered = FALSE;
2447                                 }
2448                                 m->reference = TRUE;
2449
2450                                 if (change_wiring) {
2451                                         if (wired)
2452                                                 vm_page_wire(m);
2453                                         else
2454                                                 vm_page_unwire(m);
2455                                 }
2456 #if VM_FAULT_STATIC_CONFIG
2457                                 else {
2458                                         if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
2459                                                 vm_page_activate(m);
2460                                 }
2461 #else
2462                                 else if (software_reference_bits) {
2463                                         if (!m->active && !m->inactive)
2464                                                 vm_page_activate(m);
2465                                 }
2466                                 else if (!m->active) {
2467                                         vm_page_activate(m);
2468                                 }
2469 #endif
2470                                 vm_page_unlock_queues();
2471
2472                                 /*
2473                                  *      That's it, clean up and return.
2474                                  */
2475                                 PAGE_WAKEUP_DONE(m);
2476
2477                                 sequential = (sequential && vm_page_deactivate_behind) ?
2478                                         vm_fault_deactivate_behind(object, cur_offset, behavior) :
2479                                         FALSE;
2480
2481                                 /*
2482                                  * Add non-sequential pages to the working set.
2483                                  * The sequential pages will be brought in through
2484                                  * normal clustering behavior.
2485                                  */
2486                                 if (!sequential && !object->private) {
2487                                         write_startup_file =
2488                                                 vm_fault_tws_insert(map, real_map, vaddr,
2489                                                                 object, cur_offset);
2490                                 }
2491
2492                                 vm_object_paging_end(object);
2493                                 vm_object_unlock(object);
2494
2495                                 vm_map_unlock_read(map);
2496                                 if(real_map != map)
2497                                         vm_map_unlock(real_map);
2498
2499                                 if(write_startup_file)
2500                                         tws_send_startup_info(current_task());
2501
2502                                 thread_interrupt_level(interruptible_state);
2503
2504
2505                                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2506                                                       vaddr,
2507                                                       type_of_fault & 0xff,
2508                                                       KERN_SUCCESS,
2509                                                       type_of_fault >> 8,
2510                                                       0);
2511
2512                                 return KERN_SUCCESS;
2513                         }
2514
2515                         /*
2516                          *      Copy on write fault.  If objects match, then
2517                          *      object->copy must not be NULL (else control
2518                          *      would be in previous code block), and we
2519                          *      have a potential push into the copy object
2520                          *      with which we won't cope here.
2521                          */
2522
2523                         if (cur_object == object)
2524                                 break;
2525                         /*
2526                          *      This is now a shadow based copy on write
2527                          *      fault -- it requires a copy up the shadow
2528                          *      chain.
2529                          *
2530                          *      Allocate a page in the original top level
2531                          *      object. Give up if allocate fails.  Also
2532                          *      need to remember current page, as it's the
2533                          *      source of the copy.
2534                          */
2535                         cur_m = m;
2536                         m = vm_page_grab();
2537                         if (m == VM_PAGE_NULL) {
2538                                 break;
2539                         }
2540                         /*
2541                          *      Now do the copy.  Mark the source busy
2542                          *      and take out paging references on both
2543                          *      objects.
2544                          *
2545                          *      NOTE: This code holds the map lock across
2546                          *      the page copy.
2547                          */
2548
2549                         cur_m->busy = TRUE;
2550                         vm_page_copy(cur_m, m);
2551                         vm_page_insert(m, object, offset);
2552
2553                         vm_object_paging_begin(cur_object);
2554                         vm_object_paging_begin(object);
2555
2556                         type_of_fault = DBG_COW_FAULT;
2557                         VM_STAT(cow_faults++);
2558                         current_task()->cow_faults++;
2559
2560                         /*
2561                          *      Now cope with the source page and object
2562                          *      If the top object has a ref count of 1
2563                          *      then no other map can access it, and hence
2564                          *      it's not necessary to do the pmap_disconnect.
2565                          */
2566
2567                         vm_page_lock_queues();
2568                         vm_page_deactivate(cur_m);
2569                         m->dirty = TRUE;
2570                         pmap_disconnect(cur_m->phys_page);
2571                         vm_page_unlock_queues();
2572
2573                         PAGE_WAKEUP_DONE(cur_m);
2574                         vm_object_paging_end(cur_object);
2575                         vm_object_unlock(cur_object);
2576
2577                         /*
2578                          *      Slight hack to call vm_object collapse
2579                          *      and then reuse common map in code.
2580                          *      note that the object lock was taken above.
2581                          */
2582
2583                         vm_object_paging_end(object);
2584                         vm_object_collapse(object, offset);
2585                         vm_object_paging_begin(object);
2586
2587                         goto FastPmapEnter;
2588                 }
2589                 else {
2590
2591                         /*
2592                          *      No page at cur_object, cur_offset
2593                          */
2594
2595                         if (cur_object->pager_created) {
2596
2597                                 /*
2598                                  *      Have to talk to the pager.  Give up.
2599                                  */
2600                                 break;
2601                         }
2602
2603
2604                         if (cur_object->shadow == VM_OBJECT_NULL) {
2605
2606                                 if (cur_object->shadow_severed) {
2607                                         vm_object_paging_end(object);
2608                                         vm_object_unlock(object);
2609                                         vm_map_unlock_read(map);
2610                                         if(real_map != map)
2611                                                 vm_map_unlock(real_map);
2612
2613                                         if(write_startup_file)
2614                                                 tws_send_startup_info(
2615                                                                 current_task());
2616
2617                                         thread_interrupt_level(interruptible_state);
2618
2619                                         return KERN_MEMORY_ERROR;
2620                                 }
2621
2622                                 /*
2623                                  *      Zero fill fault.  Page gets
2624                                  *      filled in top object. Insert
2625                                  *      page, then drop any lower lock.
2626                                  *      Give up if no page.
2627                                  */
2628                                 if (VM_PAGE_THROTTLED()) {
2629                                         break;
2630                                 }
2631
2632                                 /*
2633                                  * are we protecting the system from
2634                                  * backing store exhaustion.  If so
2635                                  * sleep unless we are privileged.
2636                                  */
2637                                 if(vm_backing_store_low) {
2638                                         if(!(current_task()->priv_flags
2639                                                 & VM_BACKING_STORE_PRIV))
2640                                         break;
2641                                 }
2642                                 m = vm_page_alloc(object, offset);
2643                                 if (m == VM_PAGE_NULL) {
2644                                         break;
2645                                 }
2646                                 /*
2647                                  * This is a zero-fill or initial fill
2648                                  * page fault.  As such, we consider it
2649                                  * undefined with respect to instruction
2650                                  * execution.  i.e. it is the responsibility
2651                                  * of higher layers to call for an instruction
2652                                  * sync after changing the contents and before
2653                                  * sending a program into this area.  We
2654                                  * choose this approach for performance
2655                                  */
2656
2657                                 m->no_isync = FALSE;
2658
2659                                 if (cur_object != object)
2660                                         vm_object_unlock(cur_object);
2661
2662                                 vm_object_paging_begin(object);
2663                                 vm_object_unlock(object);
2664
2665                                 /*
2666                                  *      Now zero fill page and map it.
2667                                  *      the page is probably going to
2668                                  *      be written soon, so don't bother
2669                                  *      to clear the modified bit
2670                                  *
2671                                  *      NOTE: This code holds the map
2672                                  *      lock across the zero fill.
2673                                  */
2674
2675                                 if (!map->no_zero_fill) {
2676                                         vm_page_zero_fill(m);
2677                                         type_of_fault = DBG_ZERO_FILL_FAULT;
2678                                         VM_STAT(zero_fill_count++);
2679                                 }
2680                                 vm_page_lock_queues();
2681                                 VM_PAGE_QUEUES_REMOVE(m);
2682
2683                                 m->page_ticket = vm_page_ticket;
2684                                 assert(!m->laundry);
2685                                 assert(m->object != kernel_object);
2686                                 assert(m->pageq.next == NULL &&
2687                                        m->pageq.prev == NULL);
2688                                 if(m->object->size > 0x200000) {
2689                                         m->zero_fill = TRUE;
2690                                         /* depends on the queues lock */
2691                                         vm_zf_count += 1;
2692                                         queue_enter(&vm_page_queue_zf,
2693                                                 m, vm_page_t, pageq);
2694                                 } else {
2695                                         queue_enter(
2696                                                 &vm_page_queue_inactive,
2697                                                 m, vm_page_t, pageq);
2698                                 }
2699                                 vm_page_ticket_roll++;
2700                                 if(vm_page_ticket_roll ==
2701                                                 VM_PAGE_TICKETS_IN_ROLL) {
2702                                         vm_page_ticket_roll = 0;
2703                                         if(vm_page_ticket ==
2704                                                 VM_PAGE_TICKET_ROLL_IDS)
2705                                                 vm_page_ticket= 0;
2706                                         else
2707                                                 vm_page_ticket++;
2708                                 }
2709
2710                                 m->inactive = TRUE;
2711                                 vm_page_inactive_count++;
2712                                 vm_page_unlock_queues();
2713                                 vm_object_lock(object);
2714
2715                                 goto FastPmapEnter;
2716                         }
2717
2718                         /*
2719                          *      On to the next level
2720                          */
2721
2722                         cur_offset += cur_object->shadow_offset;
2723                         new_object = cur_object->shadow;
2724                         vm_object_lock(new_object);
2725                         if (cur_object != object)
2726                                 vm_object_unlock(cur_object);
2727                         cur_object = new_object;
2728
2729                         continue;
2730                 }
2731         }
2732
2733         /*
2734          *      Cleanup from fast fault failure.  Drop any object
2735          *      lock other than original and drop map lock.
2736          */
2737
2738         if (object != cur_object)
2739                 vm_object_unlock(cur_object);
2740         }
2741         vm_map_unlock_read(map);
2742
2743         if(real_map != map)
2744                 vm_map_unlock(real_map);
2745
2746         /*
2747          *      Make a reference to this object to
2748          *      prevent its disposal while we are messing with
2749          *      it.  Once we have the reference, the map is free
2750          *      to be diddled.  Since objects reference their
2751          *      shadows (and copies), they will stay around as well.
2752          */
2753
2754         assert(object->ref_count > 0);
2755         object->ref_count++;
2756         vm_object_res_reference(object);
2757         vm_object_paging_begin(object);
2758
2759         XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2760
2761         if (!object->private) {
2762                 write_startup_file =
2763                         vm_fault_tws_insert(map, real_map, vaddr, object, offset);
2764         }
2765
2766         kr = vm_fault_page(object, offset, fault_type,
2767                            (change_wiring && !wired),
2768                            interruptible,
2769                            lo_offset, hi_offset, behavior,
2770                            &prot, &result_page, &top_page,
2771                            &type_of_fault,
2772                            &error_code, map->no_zero_fill, FALSE, map, vaddr);
2773
2774         /*
2775          *      If we didn't succeed, lose the object reference immediately.
2776          */
2777
2778         if (kr != VM_FAULT_SUCCESS)
2779                 vm_object_deallocate(object);
2780
2781         /*
2782          *      See why we failed, and take corrective action.
2783          */
2784
2785         switch (kr) {
2786                 case VM_FAULT_SUCCESS:
2787                         break;
2788                 case VM_FAULT_MEMORY_SHORTAGE:
2789                         if (vm_page_wait((change_wiring) ?
2790                                          THREAD_UNINT :
2791                                          THREAD_ABORTSAFE))
2792                                 goto RetryFault;
2793                         /* fall thru */
2794                 case VM_FAULT_INTERRUPTED:
2795                         kr = KERN_ABORTED;
2796                         goto done;
2797                 case VM_FAULT_RETRY:
2798                         goto RetryFault;
2799                 case VM_FAULT_FICTITIOUS_SHORTAGE:
2800                         vm_page_more_fictitious();
2801                         goto RetryFault;
2802                 case VM_FAULT_MEMORY_ERROR:
2803                         if (error_code)
2804                                 kr = error_code;
2805                         else
2806                                 kr = KERN_MEMORY_ERROR;
2807                         goto done;
2808         }
2809
2810         m = result_page;
2811
2812         if(m != VM_PAGE_NULL) {
2813                 assert((change_wiring && !wired) ?
2814                     (top_page == VM_PAGE_NULL) :
2815                     ((top_page == VM_PAGE_NULL) == (m->object == object)));
2816         }
2817
2818         /*
2819          *      How to clean up the result of vm_fault_page.  This
2820          *      happens whether the mapping is entered or not.
2821          */
2822
2823 #define UNLOCK_AND_DEALLOCATE                           \
2824         MACRO_BEGIN                                     \
2825         vm_fault_cleanup(m->object, top_page);          \
2826         vm_object_deallocate(object);                   \
2827         MACRO_END
2828
2829         /*
2830          *      What to do with the resulting page from vm_fault_page
2831          *      if it doesn't get entered into the physical map:
2832          */
2833
2834 #define RELEASE_PAGE(m)                                 \
2835         MACRO_BEGIN                                     \
2836         PAGE_WAKEUP_DONE(m);                            \
2837         vm_page_lock_queues();                          \
2838         if (!m->active && !m->inactive)                 \
2839                 vm_page_activate(m);                    \
2840         vm_page_unlock_queues();                        \
2841         MACRO_END
2842
2843         /*
2844          *      We must verify that the maps have not changed
2845          *      since our last lookup.
2846          */
2847
2848         if(m != VM_PAGE_NULL) {
2849                 old_copy_object = m->object->copy;
2850                 vm_object_unlock(m->object);
2851         } else {
2852                 old_copy_object = VM_OBJECT_NULL;
2853         }
2854         if ((map != original_map) || !vm_map_verify(map, &version)) {
2855                 vm_object_t             retry_object;
2856                 vm_object_offset_t      retry_offset;
2857                 vm_prot_t               retry_prot;
2858
2859                 /*
2860                  *      To avoid trying to write_lock the map while another
2861                  *      thread has it read_locked (in vm_map_pageable), we
2862                  *      do not try for write permission.  If the page is
2863                  *      still writable, we will get write permission.  If it
2864                  *      is not, or has been marked needs_copy, we enter the
2865                  *      mapping without write permission, and will merely
2866                  *      take another fault.
2867                  */
2868                 map = original_map;
2869                 vm_map_lock_read(map);
2870                 kr = vm_map_lookup_locked(&map, vaddr,
2871                                    fault_type & ~VM_PROT_WRITE, &version,
2872                                    &retry_object, &retry_offset, &retry_prot,
2873                                    &wired, &behavior, &lo_offset, &hi_offset,
2874                                    &real_map);
2875                 pmap = real_map->pmap;
2876
2877                 if (kr != KERN_SUCCESS) {
2878                         vm_map_unlock_read(map);
2879                         if(m != VM_PAGE_NULL) {
2880                                 vm_object_lock(m->object);
2881                                 RELEASE_PAGE(m);
2882                                 UNLOCK_AND_DEALLOCATE;
2883                         } else {
2884                                 vm_object_deallocate(object);
2885                         }
2886                         goto done;
2887                 }
2888
2889                 vm_object_unlock(retry_object);
2890                 if(m != VM_PAGE_NULL) {
2891                         vm_object_lock(m->object);
2892                 } else {
2893                         vm_object_lock(object);
2894                 }
2895
2896                 if ((retry_object != object) ||
2897                     (retry_offset != offset)) {
2898                         vm_map_unlock_read(map);
2899                         if(real_map != map)
2900                                 vm_map_unlock(real_map);
2901                         if(m != VM_PAGE_NULL) {
2902                                 RELEASE_PAGE(m);
2903                                 UNLOCK_AND_DEALLOCATE;
2904                         } else {
2905                                 vm_object_deallocate(object);
2906                         }
2907                         goto RetryFault;
2908                 }
2909
2910                 /*
2911                  *      Check whether the protection has changed or the object
2912                  *      has been copied while we left the map unlocked.
2913                  */
2914                 prot &= retry_prot;
2915                 if(m != VM_PAGE_NULL) {
2916                         vm_object_unlock(m->object);
2917                 } else {
2918                         vm_object_unlock(object);
2919                 }
2920         }
2921         if(m != VM_PAGE_NULL) {
2922                 vm_object_lock(m->object);
2923         } else {
2924                 vm_object_lock(object);
2925         }
2926
2927         /*
2928          *      If the copy object changed while the top-level object
2929          *      was unlocked, then we must take away write permission.
2930          */
2931
2932         if(m != VM_PAGE_NULL) {
2933                 if (m->object->copy != old_copy_object)
2934                         prot &= ~VM_PROT_WRITE;
2935         }
2936
2937         /*
2938          *      If we want to wire down this page, but no longer have
2939          *      adequate permissions, we must start all over.
2940          */
2941
2942         if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2943                 vm_map_verify_done(map, &version);
2944                 if(real_map != map)
2945                         vm_map_unlock(real_map);
2946                 if(m != VM_PAGE_NULL) {
2947                         RELEASE_PAGE(m);
2948                         UNLOCK_AND_DEALLOCATE;
2949                 } else {
2950                         vm_object_deallocate(object);
2951                 }
2952                 goto RetryFault;
2953         }
2954
2955         /*
2956          *      Put this page into the physical map.
2957          *      We had to do the unlock above because pmap_enter
2958          *      may cause other faults.  The page may be on
2959          *      the pageout queues.  If the pageout daemon comes
2960          *      across the page, it will remove it from the queues.
2961          */
2962         need_activation = FALSE;
2963
2964         if (m != VM_PAGE_NULL) {
2965                 if (m->no_isync == TRUE) {
2966                         pmap_sync_page_data_phys(m->phys_page);
2967
2968                         if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2969                                 /*
2970                                  * found it in the cache, but this
2971                                  * is the first fault-in of the page (no_isync == TRUE)
2972                                  * so it must have come in as part of
2973                                  * a cluster... account 1 pagein against it
2974                                  */
2975                                  VM_STAT(pageins++);
2976                                  current_task()->pageins++;
2977
2978                                  type_of_fault = DBG_PAGEIN_FAULT;
2979                         }
2980                         if (m->clustered) {
2981                                 need_activation = TRUE;
2982                         }
2983                         m->no_isync = FALSE;
2984                 }
2985                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2986
2987                 if(caller_pmap) {
2988                         PMAP_ENTER(caller_pmap,
2989                                         caller_pmap_addr, m,
2990                                         prot, cache_attr, wired);
2991                 } else {
2992                         PMAP_ENTER(pmap, vaddr, m,
2993                                         prot, cache_attr, wired);
2994                 }
2995
2996                 /*
2997                  * Add working set information for private objects here.
2998                  */
2999                 if (m->object->private) {
3000                         write_startup_file =
3001                                 vm_fault_tws_insert(map, real_map, vaddr,
3002                                             m->object, m->offset);
3003                 }
3004         } else {
3005
3006 #ifndef i386
3007                 vm_map_entry_t          entry;
3008                 vm_map_offset_t         laddr;
3009                 vm_map_offset_t         ldelta, hdelta;
3010
3011                 /*
3012                  * do a pmap block mapping from the physical address
3013                  * in the object
3014                  */
3015
3016                 /* While we do not worry about execution protection in   */
3017                 /* general, certian pages may have instruction execution */
3018                 /* disallowed.  We will check here, and if not allowed   */
3019                 /* to execute, we return with a protection failure.      */
3020
3021                 if((full_fault_type & VM_PROT_EXECUTE) &&
3022                         (!pmap_eligible_for_execute((ppnum_t)
3023                                 (object->shadow_offset >> 12)))) {
3024
3025                         vm_map_verify_done(map, &version);
3026                         if(real_map != map)
3027                                 vm_map_unlock(real_map);
3028                         vm_fault_cleanup(object, top_page);
3029                         vm_object_deallocate(object);
3030                         kr = KERN_PROTECTION_FAILURE;
3031                         goto done;
3032                 }
3033
3034                 if(real_map != map) {
3035                         vm_map_unlock(real_map);
3036                 }
3037                 if (original_map != map) {
3038                         vm_map_unlock_read(map);
3039                         vm_map_lock_read(original_map);
3040                         map = original_map;
3041                 }
3042                 real_map = map;
3043
3044                 laddr = vaddr;
3045                 hdelta = 0xFFFFF000;
3046                 ldelta = 0xFFFFF000;
3047
3048
3049                 while(vm_map_lookup_entry(map, laddr, &entry)) {
3050                         if(ldelta > (laddr - entry->vme_start))
3051                                 ldelta = laddr - entry->vme_start;
3052                         if(hdelta > (entry->vme_end - laddr))
3053                                 hdelta = entry->vme_end - laddr;
3054                         if(entry->is_sub_map) {
3055
3056                                 laddr = (laddr - entry->vme_start)
3057                                                         + entry->offset;
3058                                 vm_map_lock_read(entry->object.sub_map);
3059                                 if(map != real_map)
3060                                         vm_map_unlock_read(map);
3061                                 if(entry->use_pmap) {
3062                                         vm_map_unlock_read(real_map);
3063                                         real_map = entry->object.sub_map;
3064                                 }
3065                                 map = entry->object.sub_map;
3066
3067                         } else {
3068                                 break;
3069                         }
3070                 }
3071
3072                 if(vm_map_lookup_entry(map, laddr, &entry) &&
3073                                         (entry->object.vm_object != NULL) &&
3074                                         (entry->object.vm_object == object)) {
3075
3076
3077                         if(caller_pmap) {
3078                                 /* Set up a block mapped area */
3079                                 pmap_map_block(caller_pmap,
3080                                         (addr64_t)(caller_pmap_addr - ldelta),
3081                                         (((vm_map_offset_t)
3082                                     (entry->object.vm_object->shadow_offset))
3083                                         + entry->offset +
3084                                         (laddr - entry->vme_start)
3085                                                         - ldelta) >> 12,
3086                                 ((ldelta + hdelta) >> 12), prot,
3087                                 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3088                         } else {
3089                                 /* Set up a block mapped area */
3090                                 pmap_map_block(real_map->pmap,
3091                                    (addr64_t)(vaddr - ldelta),
3092                                    (((vm_map_offset_t)
3093                                     (entry->object.vm_object->shadow_offset))
3094                                        + entry->offset +
3095                                        (laddr - entry->vme_start) - ldelta) >> 12,
3096                                    ((ldelta + hdelta) >> 12), prot,
3097                                    (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3098                         }
3099                 }
3100 #else
3101 #ifdef notyet
3102                 if(caller_pmap) {
3103                         pmap_enter(caller_pmap, caller_pmap_addr,
3104                                 object->shadow_offset>>12, prot, 0, TRUE);
3105                 } else {
3106                         pmap_enter(pmap, vaddr,
3107                                 object->shadow_offset>>12, prot, 0, TRUE);
3108                 }
3109                         /* Map it in */
3110 #endif
3111 #endif
3112
3113         }
3114
3115         /*
3116          *      If the page is not wired down and isn't already
3117          *      on a pageout queue, then put it where the
3118          *      pageout daemon can find it.
3119          */
3120         if(m != VM_PAGE_NULL) {
3121                 vm_page_lock_queues();
3122
3123                 if (m->clustered) {
3124                         vm_pagein_cluster_used++;
3125                         m->clustered = FALSE;
3126                 }
3127                 m->reference = TRUE;
3128
3129                 if (change_wiring) {
3130                         if (wired)
3131                                 vm_page_wire(m);
3132                         else
3133                                 vm_page_unwire(m);
3134                 }
3135 #if     VM_FAULT_STATIC_CONFIG
3136                 else {
3137                         if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active))
3138                                 vm_page_activate(m);
3139                 }
3140 #else
3141                 else if (software_reference_bits) {
3142                         if (!m->active && !m->inactive)
3143                                 vm_page_activate(m);
3144                         m->reference = TRUE;
3145                 } else {
3146                         vm_page_activate(m);
3147                 }
3148 #endif
3149                 vm_page_unlock_queues();
3150         }
3151
3152         /*
3153          *      Unlock everything, and return
3154          */
3155
3156         vm_map_verify_done(map, &version);
3157         if(real_map != map)
3158                 vm_map_unlock(real_map);
3159         if(m != VM_PAGE_NULL) {
3160                 PAGE_WAKEUP_DONE(m);
3161                 UNLOCK_AND_DEALLOCATE;
3162         } else {
3163                 vm_fault_cleanup(object, top_page);
3164                 vm_object_deallocate(object);
3165         }
3166         kr = KERN_SUCCESS;
3167
3168 #undef  UNLOCK_AND_DEALLOCATE
3169 #undef  RELEASE_PAGE
3170
3171     done:
3172         if(write_startup_file)
3173                 tws_send_startup_info(current_task());
3174
3175         thread_interrupt_level(interruptible_state);
3176
3177         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3178                               vaddr,
3179                               type_of_fault & 0xff,
3180                               kr,
3181                               type_of_fault >> 8,
3182                               0);
3183
3184         return(kr);
3185 }
3186
3187 /*
3188  *      vm_fault_wire:
3189  *
3190  *      Wire down a range of virtual addresses in a map.
3191  */
3192 kern_return_t
3193 vm_fault_wire(
3194         vm_map_t        map,
3195         vm_map_entry_t  entry,
3196         pmap_t          pmap,
3197         vm_map_offset_t pmap_addr)
3198 {
3199
3200         register vm_map_offset_t        va;
3201         register vm_map_offset_t        end_addr = entry->vme_end;
3202         register kern_return_t  rc;
3203
3204         assert(entry->in_transition);
3205
3206         if ((entry->object.vm_object != NULL) &&
3207                         !entry->is_sub_map &&
3208                         entry->object.vm_object->phys_contiguous) {
3209                 return KERN_SUCCESS;
3210         }
3211
3212         /*
3213          *      Inform the physical mapping system that the
3214          *      range of addresses may not fault, so that
3215          *      page tables and such can be locked down as well.
3216          */
3217
3218         pmap_pageable(pmap, pmap_addr,
3219                 pmap_addr + (end_addr - entry->vme_start), FALSE);
3220
3221         /*
3222          *      We simulate a fault to get the page and enter it
3223          *      in the physical map.
3224          */
3225
3226         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3227                 if ((rc = vm_fault_wire_fast(
3228                         map, va, entry, pmap,
3229                         pmap_addr + (va - entry->vme_start)
3230                         )) != KERN_SUCCESS) {
3231                         rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
3232                                 (pmap == kernel_pmap) ?
3233                                         THREAD_UNINT : THREAD_ABORTSAFE,
3234                                 pmap, pmap_addr + (va - entry->vme_start));
3235                 }
3236
3237                 if (rc != KERN_SUCCESS) {
3238                         struct vm_map_entry     tmp_entry = *entry;
3239
3240                         /* unwire wired pages */
3241                         tmp_entry.vme_end = va;
3242                         vm_fault_unwire(map,
3243                                 &tmp_entry, FALSE, pmap, pmap_addr);
3244
3245                         return rc;
3246                 }
3247         }
3248         return KERN_SUCCESS;
3249 }
3250
3251 /*
3252  *      vm_fault_unwire:
3253  *
3254  *      Unwire a range of virtual addresses in a map.
3255  */
3256 void
3257 vm_fault_unwire(
3258         vm_map_t        map,
3259         vm_map_entry_t  entry,
3260         boolean_t       deallocate,
3261         pmap_t          pmap,
3262         vm_map_offset_t pmap_addr)
3263 {
3264         register vm_map_offset_t        va;
3265         register vm_map_offset_t        end_addr = entry->vme_end;
3266         vm_object_t             object;
3267
3268         object = (entry->is_sub_map)
3269                         ? VM_OBJECT_NULL : entry->object.vm_object;
3270
3271         /*
3272          *      Since the pages are wired down, we must be able to
3273          *      get their mappings from the physical map system.
3274          */
3275
3276         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3277                 pmap_change_wiring(pmap,
3278                         pmap_addr + (va - entry->vme_start), FALSE);
3279
3280                 if (object == VM_OBJECT_NULL) {
3281                         (void) vm_fault(map, va, VM_PROT_NONE,
3282                                         TRUE, THREAD_UNINT, pmap, pmap_addr);
3283                 } else if (object->phys_contiguous) {
3284                         continue;
3285                 } else {
3286                         vm_prot_t       prot;
3287                         vm_page_t       result_page;
3288                         vm_page_t       top_page;
3289                         vm_object_t     result_object;
3290                         vm_fault_return_t result;
3291
3292                         do {
3293                                 prot = VM_PROT_NONE;
3294
3295                                 vm_object_lock(object);
3296                                 vm_object_paging_begin(object);
3297                                 XPR(XPR_VM_FAULT,
3298                                         "vm_fault_unwire -> vm_fault_page\n",
3299                                         0,0,0,0,0);
3300                                 result = vm_fault_page(object,
3301                                                 entry->offset +
3302                                                   (va - entry->vme_start),
3303                                                 VM_PROT_NONE, TRUE,
3304                                                 THREAD_UNINT,
3305                                                 entry->offset,
3306                                                 entry->offset +
3307                                                        (entry->vme_end
3308                                                         - entry->vme_start),
3309                                                 entry->behavior,
3310                                                 &prot,
3311                                                 &result_page,
3312                                                 &top_page,
3313                                                 (int *)0,
3314                                                 0, map->no_zero_fill,
3315                                                 FALSE, NULL, 0);
3316                         } while (result == VM_FAULT_RETRY);
3317
3318                         if (result != VM_FAULT_SUCCESS)
3319                                 panic("vm_fault_unwire: failure");
3320
3321                         result_object = result_page->object;
3322                         if (deallocate) {
3323                                 assert(!result_page->fictitious);
3324                                 pmap_disconnect(result_page->phys_page);
3325                                 VM_PAGE_FREE(result_page);
3326                         } else {
3327                                 vm_page_lock_queues();
3328                                 vm_page_unwire(result_page);
3329                                 vm_page_unlock_queues();
3330                                 PAGE_WAKEUP_DONE(result_page);
3331                         }
3332
3333                         vm_fault_cleanup(result_object, top_page);
3334                 }
3335         }
3336
3337         /*
3338          *      Inform the physical mapping system that the range
3339          *      of addresses may fault, so that page tables and
3340          *      such may be unwired themselves.
3341          */
3342
3343         pmap_pageable(pmap, pmap_addr,
3344                 pmap_addr + (end_addr - entry->vme_start), TRUE);
3345
3346 }
3347
3348 /*
3349  *      vm_fault_wire_fast:
3350  *
3351  *      Handle common case of a wire down page fault at the given address.
3352  *      If successful, the page is inserted into the associated physical map.
3353  *      The map entry is passed in to avoid the overhead of a map lookup.
3354  *
3355  *      NOTE: the given address should be truncated to the
3356  *      proper page address.
3357  *
3358  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
3359  *      a standard error specifying why the fault is fatal is returned.
3360  *
3361  *      The map in question must be referenced, and remains so.
3362  *      Caller has a read lock on the map.
3363  *
3364  *      This is a stripped version of vm_fault() for wiring pages.  Anything
3365  *      other than the common case will return KERN_FAILURE, and the caller
3366  *      is expected to call vm_fault().
3367  */
3368 kern_return_t
3369 vm_fault_wire_fast(
3370         __unused vm_map_t       map,
3371         vm_map_offset_t va,
3372         vm_map_entry_t  entry,
3373         pmap_t                  pmap,
3374         vm_map_offset_t pmap_addr)
3375 {
3376         vm_object_t             object;
3377         vm_object_offset_t      offset;
3378         register vm_page_t      m;
3379         vm_prot_t               prot;
3380         thread_t                thread = current_thread();
3381         unsigned int            cache_attr;
3382
3383         VM_STAT(faults++);
3384
3385         if (thread != THREAD_NULL && thread->task != TASK_NULL)
3386           thread->task->faults++;
3387
3388 /*
3389  *      Recovery actions
3390  */
3391
3392 #undef  RELEASE_PAGE
3393 #define RELEASE_PAGE(m) {                               \
3394         PAGE_WAKEUP_DONE(m);                            \
3395         vm_page_lock_queues();                          \
3396         vm_page_unwire(m);                              \
3397         vm_page_unlock_queues();                        \
3398 }
3399
3400
3401 #undef  UNLOCK_THINGS
3402 #define UNLOCK_THINGS   {                               \
3403         vm_object_paging_end(object);                      \
3404         vm_object_unlock(object);                          \
3405 }
3406
3407 #undef  UNLOCK_AND_DEALLOCATE
3408 #define UNLOCK_AND_DEALLOCATE   {                       \
3409         UNLOCK_THINGS;                                  \
3410         vm_object_deallocate(object);                   \
3411 }
3412 /*
3413  *      Give up and have caller do things the hard way.
3414  */
3415
3416 #define GIVE_UP {                                       \
3417         UNLOCK_AND_DEALLOCATE;                          \
3418         return(KERN_FAILURE);                           \
3419 }
3420
3421
3422         /*
3423          *      If this entry is not directly to a vm_object, bail out.
3424          */
3425         if (entry->is_sub_map)
3426                 return(KERN_FAILURE);
3427
3428         /*
3429          *      Find the backing store object and offset into it.
3430          */
3431
3432         object = entry->object.vm_object;
3433         offset = (va - entry->vme_start) + entry->offset;
3434         prot = entry->protection;
3435
3436         /*
3437          *      Make a reference to this object to prevent its
3438          *      disposal while we are messing with it.
3439          */
3440
3441         vm_object_lock(object);
3442         assert(object->ref_count > 0);
3443         object->ref_count++;
3444         vm_object_res_reference(object);
3445         vm_object_paging_begin(object);
3446
3447         /*
3448          *      INVARIANTS (through entire routine):
3449          *
3450          *      1)      At all times, we must either have the object
3451          *              lock or a busy page in some object to prevent
3452          *              some other thread from trying to bring in
3453          *              the same page.
3454          *
3455          *      2)      Once we have a busy page, we must remove it from
3456          *              the pageout queues, so that the pageout daemon
3457          *              will not grab it away.
3458          *
3459          */
3460
3461         /*
3462          *      Look for page in top-level object.  If it's not there or
3463          *      there's something going on, give up.
3464          * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3465          * decrypt the page before wiring it down.
3466          */
3467         m = vm_page_lookup(object, offset);
3468         if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) ||
3469             (m->unusual && ( m->error || m->restart || m->absent ||
3470                                 prot & m->page_lock))) {
3471
3472                 GIVE_UP;
3473         }
3474         ASSERT_PAGE_DECRYPTED(m);
3475
3476         /*
3477          *      Wire the page down now.  All bail outs beyond this
3478          *      point must unwire the page.
3479          */
3480
3481         vm_page_lock_queues();
3482         vm_page_wire(m);
3483         vm_page_unlock_queues();
3484
3485         /*
3486          *      Mark page busy for other threads.
3487          */
3488         assert(!m->busy);
3489         m->busy = TRUE;
3490         assert(!m->absent);
3491
3492         /*
3493          *      Give up if the page is being written and there's a copy object
3494          */
3495         if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3496                 RELEASE_PAGE(m);
3497                 GIVE_UP;
3498         }
3499
3500         /*
3501          *      Put this page into the physical map.
3502          *      We have to unlock the object because pmap_enter
3503          *      may cause other faults.
3504          */
3505         if (m->no_isync == TRUE) {
3506                 pmap_sync_page_data_phys(m->phys_page);
3507
3508                 m->no_isync = FALSE;
3509         }
3510
3511         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3512
3513         PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3514
3515         /*
3516          *      Unlock everything, and return
3517          */
3518
3519         PAGE_WAKEUP_DONE(m);
3520         UNLOCK_AND_DEALLOCATE;
3521
3522         return(KERN_SUCCESS);
3523
3524 }
3525
3526 /*
3527  *      Routine:        vm_fault_copy_cleanup
3528  *      Purpose:
3529  *              Release a page used by vm_fault_copy.
3530  */
3531
3532 void
3533 vm_fault_copy_cleanup(
3534         vm_page_t       page,
3535         vm_page_t       top_page)
3536 {
3537         vm_object_t     object = page->object;
3538
3539         vm_object_lock(object);
3540         PAGE_WAKEUP_DONE(page);
3541         vm_page_lock_queues();
3542         if (!page->active && !page->inactive)
3543                 vm_page_activate(page);
3544         vm_page_unlock_queues();
3545         vm_fault_cleanup(object, top_page);
3546 }
3547
3548 void
3549 vm_fault_copy_dst_cleanup(
3550         vm_page_t       page)
3551 {
3552         vm_object_t     object;
3553
3554         if (page != VM_PAGE_NULL) {
3555                 object = page->object;
3556                 vm_object_lock(object);
3557                 vm_page_lock_queues();
3558                 vm_page_unwire(page);
3559                 vm_page_unlock_queues();
3560                 vm_object_paging_end(object);
3561                 vm_object_unlock(object);
3562         }
3563 }
3564
3565 /*
3566  *      Routine:        vm_fault_copy
3567  *
3568  *      Purpose:
3569  *              Copy pages from one virtual memory object to another --
3570  *              neither the source nor destination pages need be resident.
3571  *
3572  *              Before actually copying a page, the version associated with
3573  *              the destination address map wil be verified.
3574  *
3575  *      In/out conditions:
3576  *              The caller must hold a reference, but not a lock, to
3577  *              each of the source and destination objects and to the
3578  *              destination map.
3579  *
3580  *      Results:
3581  *              Returns KERN_SUCCESS if no errors were encountered in
3582  *              reading or writing the data.  Returns KERN_INTERRUPTED if
3583  *              the operation was interrupted (only possible if the
3584  *              "interruptible" argument is asserted).  Other return values
3585  *              indicate a permanent error in copying the data.
3586  *
3587  *              The actual amount of data copied will be returned in the
3588  *              "copy_size" argument.  In the event that the destination map
3589  *              verification failed, this amount may be less than the amount
3590  *              requested.
3591  */
3592 kern_return_t
3593 vm_fault_copy(
3594         vm_object_t             src_object,
3595         vm_object_offset_t      src_offset,
3596         vm_map_size_t           *copy_size,             /* INOUT */
3597         vm_object_t             dst_object,
3598         vm_object_offset_t      dst_offset,
3599         vm_map_t                dst_map,
3600         vm_map_version_t         *dst_version,
3601         int                     interruptible)
3602 {
3603         vm_page_t               result_page;
3604
3605         vm_page_t               src_page;
3606         vm_page_t               src_top_page;
3607         vm_prot_t               src_prot;
3608
3609         vm_page_t               dst_page;
3610         vm_page_t               dst_top_page;
3611         vm_prot_t               dst_prot;
3612
3613         vm_map_size_t           amount_left;
3614         vm_object_t             old_copy_object;
3615         kern_return_t           error = 0;
3616
3617         vm_map_size_t           part_size;
3618
3619         /*
3620          * In order not to confuse the clustered pageins, align
3621          * the different offsets on a page boundary.
3622          */
3623         vm_object_offset_t      src_lo_offset = vm_object_trunc_page(src_offset);
3624         vm_object_offset_t      dst_lo_offset = vm_object_trunc_page(dst_offset);
3625         vm_object_offset_t      src_hi_offset = vm_object_round_page(src_offset + *copy_size);
3626         vm_object_offset_t      dst_hi_offset = vm_object_round_page(dst_offset + *copy_size);
3627
3628 #define RETURN(x)                                       \
3629         MACRO_BEGIN                                     \
3630         *copy_size -= amount_left;                      \
3631         MACRO_RETURN(x);                                \
3632         MACRO_END
3633
3634         amount_left = *copy_size;
3635         do { /* while (amount_left > 0) */
3636                 /*
3637                  * There may be a deadlock if both source and destination
3638                  * pages are the same. To avoid this deadlock, the copy must
3639                  * start by getting the destination page in order to apply
3640                  * COW semantics if any.
3641                  */
3642
3643         RetryDestinationFault: ;
3644
3645                 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3646
3647                 vm_object_lock(dst_object);
3648                 vm_object_paging_begin(dst_object);
3649
3650                 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3651                 switch (vm_fault_page(dst_object,
3652                                       vm_object_trunc_page(dst_offset),
3653                                       VM_PROT_WRITE|VM_PROT_READ,
3654                                       FALSE,
3655                                       interruptible,
3656                                       dst_lo_offset,
3657                                       dst_hi_offset,
3658                                       VM_BEHAVIOR_SEQUENTIAL,
3659                                       &dst_prot,
3660                                       &dst_page,
3661                                       &dst_top_page,
3662                                       (int *)0,
3663                                       &error,
3664                                       dst_map->no_zero_fill,
3665                                       FALSE, NULL, 0)) {
3666                 case VM_FAULT_SUCCESS:
3667                         break;
3668                 case VM_FAULT_RETRY:
3669                         goto RetryDestinationFault;
3670                 case VM_FAULT_MEMORY_SHORTAGE:
3671                         if (vm_page_wait(interruptible))
3672                                 goto RetryDestinationFault;
3673                         /* fall thru */
3674                 case VM_FAULT_INTERRUPTED:
3675                         RETURN(MACH_SEND_INTERRUPTED);
3676                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3677                         vm_page_more_fictitious();
3678                         goto RetryDestinationFault;
3679                 case VM_FAULT_MEMORY_ERROR:
3680                         if (error)
3681                                 return (error);
3682                         else
3683                                 return(KERN_MEMORY_ERROR);
3684                 }
3685                 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3686
3687                 old_copy_object = dst_page->object->copy;
3688
3689                 /*
3690                  * There exists the possiblity that the source and
3691                  * destination page are the same.  But we can't
3692                  * easily determine that now.  If they are the
3693                  * same, the call to vm_fault_page() for the
3694                  * destination page will deadlock.  To prevent this we
3695                  * wire the page so we can drop busy without having
3696                  * the page daemon steal the page.  We clean up the
3697                  * top page  but keep the paging reference on the object
3698                  * holding the dest page so it doesn't go away.
3699                  */
3700
3701                 vm_page_lock_queues();
3702                 vm_page_wire(dst_page);
3703                 vm_page_unlock_queues();
3704                 PAGE_WAKEUP_DONE(dst_page);
3705                 vm_object_unlock(dst_page->object);
3706
3707                 if (dst_top_page != VM_PAGE_NULL) {
3708                         vm_object_lock(dst_object);
3709                         VM_PAGE_FREE(dst_top_page);
3710                         vm_object_paging_end(dst_object);
3711                         vm_object_unlock(dst_object);
3712                 }
3713
3714         RetrySourceFault: ;
3715
3716                 if (src_object == VM_OBJECT_NULL) {
3717                         /*
3718                          *      No source object.  We will just
3719                          *      zero-fill the page in dst_object.
3720                          */
3721                         src_page = VM_PAGE_NULL;
3722                         result_page = VM_PAGE_NULL;
3723                 } else {
3724                         vm_object_lock(src_object);
3725                         src_page = vm_page_lookup(src_object,
3726                                                   vm_object_trunc_page(src_offset));
3727                         if (src_page == dst_page) {
3728                                 src_prot = dst_prot;
3729                                 result_page = VM_PAGE_NULL;
3730                         } else {
3731                                 src_prot = VM_PROT_READ;
3732                                 vm_object_paging_begin(src_object);
3733
3734                                 XPR(XPR_VM_FAULT,
3735                                         "vm_fault_copy(2) -> vm_fault_page\n",
3736                                         0,0,0,0,0);
3737                                 switch (vm_fault_page(src_object,
3738                                                       vm_object_trunc_page(src_offset),
3739                                                       VM_PROT_READ,
3740                                                       FALSE,
3741                                                       interruptible,
3742                                                       src_lo_offset,
3743                                                       src_hi_offset,
3744                                                       VM_BEHAVIOR_SEQUENTIAL,
3745                                                       &src_prot,
3746                                                       &result_page,
3747                                                       &src_top_page,
3748                                                       (int *)0,
3749                                                       &error,
3750                                                       FALSE,
3751                                                       FALSE, NULL, 0)) {
3752
3753                                 case VM_FAULT_SUCCESS:
3754                                         break;
3755                                 case VM_FAULT_RETRY:
3756                                         goto RetrySourceFault;
3757                                 case VM_FAULT_MEMORY_SHORTAGE:
3758                                         if (vm_page_wait(interruptible))
3759                                                 goto RetrySourceFault;
3760                                         /* fall thru */
3761                                 case VM_FAULT_INTERRUPTED:
3762                                         vm_fault_copy_dst_cleanup(dst_page);
3763                                         RETURN(MACH_SEND_INTERRUPTED);
3764                                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3765                                         vm_page_more_fictitious();
3766                                         goto RetrySourceFault;
3767                                 case VM_FAULT_MEMORY_ERROR:
3768                                         vm_fault_copy_dst_cleanup(dst_page);
3769                                         if (error)
3770                                                 return (error);
3771                                         else
3772                                                 return(KERN_MEMORY_ERROR);
3773                                 }
3774
3775
3776                                 assert((src_top_page == VM_PAGE_NULL) ==
3777                                        (result_page->object == src_object));
3778                         }
3779                         assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3780                         vm_object_unlock(result_page->object);
3781                 }
3782
3783                 if (!vm_map_verify(dst_map, dst_version)) {
3784                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3785                                 vm_fault_copy_cleanup(result_page, src_top_page);
3786                         vm_fault_copy_dst_cleanup(dst_page);
3787                         break;
3788                 }
3789
3790                 vm_object_lock(dst_page->object);
3791
3792                 if (dst_page->object->copy != old_copy_object) {
3793                         vm_object_unlock(dst_page->object);
3794                         vm_map_verify_done(dst_map, dst_version);
3795                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3796                                 vm_fault_copy_cleanup(result_page, src_top_page);
3797                         vm_fault_copy_dst_cleanup(dst_page);
3798                         break;
3799                 }
3800                 vm_object_unlock(dst_page->object);
3801
3802                 /*
3803                  *      Copy the page, and note that it is dirty
3804                  *      immediately.
3805                  */
3806
3807                 if (!page_aligned(src_offset) ||
3808                         !page_aligned(dst_offset) ||
3809                         !page_aligned(amount_left)) {
3810
3811                         vm_object_offset_t      src_po,
3812                                                 dst_po;
3813
3814                         src_po = src_offset - vm_object_trunc_page(src_offset);
3815                         dst_po = dst_offset - vm_object_trunc_page(dst_offset);
3816
3817                         if (dst_po > src_po) {
3818                                 part_size = PAGE_SIZE - dst_po;
3819                         } else {
3820                                 part_size = PAGE_SIZE - src_po;
3821                         }
3822                         if (part_size > (amount_left)){
3823                                 part_size = amount_left;
3824                         }
3825
3826                         if (result_page == VM_PAGE_NULL) {
3827                                 vm_page_part_zero_fill(dst_page,
3828                                                         dst_po, part_size);
3829                         } else {
3830                                 vm_page_part_copy(result_page, src_po,
3831                                         dst_page, dst_po, part_size);
3832                                 if(!dst_page->dirty){
3833                                         vm_object_lock(dst_object);
3834                                         dst_page->dirty = TRUE;
3835                                         vm_object_unlock(dst_page->object);
3836                                 }
3837
3838                         }
3839                 } else {
3840                         part_size = PAGE_SIZE;
3841
3842                         if (result_page == VM_PAGE_NULL)
3843                                 vm_page_zero_fill(dst_page);
3844                         else{
3845                                 vm_page_copy(result_page, dst_page);
3846                                 if(!dst_page->dirty){
3847                                         vm_object_lock(dst_object);
3848                                         dst_page->dirty = TRUE;
3849                                         vm_object_unlock(dst_page->object);
3850                                 }
3851                         }
3852
3853                 }
3854
3855                 /*
3856                  *      Unlock everything, and return
3857                  */
3858
3859                 vm_map_verify_done(dst_map, dst_version);
3860
3861                 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3862                         vm_fault_copy_cleanup(result_page, src_top_page);
3863                 vm_fault_copy_dst_cleanup(dst_page);
3864
3865                 amount_left -= part_size;
3866                 src_offset += part_size;
3867                 dst_offset += part_size;
3868         } while (amount_left > 0);
3869
3870         RETURN(KERN_SUCCESS);
3871 #undef  RETURN
3872
3873         /*NOTREACHED*/
3874 }
3875
3876 #ifdef  notdef
3877
3878 /*
3879  *      Routine:        vm_fault_page_overwrite
3880  *
3881  *      Description:
3882  *              A form of vm_fault_page that assumes that the
3883  *              resulting page will be overwritten in its entirety,
3884  *              making it unnecessary to obtain the correct *contents*
3885  *              of the page.
3886  *
3887  *      Implementation:
3888  *              XXX Untested.  Also unused.  Eventually, this technology
3889  *              could be used in vm_fault_copy() to advantage.
3890  */
3891 vm_fault_return_t
3892 vm_fault_page_overwrite(
3893         register
3894         vm_object_t             dst_object,
3895         vm_object_offset_t      dst_offset,
3896         vm_page_t               *result_page)   /* OUT */
3897 {
3898         register
3899         vm_page_t       dst_page;
3900         kern_return_t   wait_result;
3901
3902 #define interruptible   THREAD_UNINT    /* XXX */
3903
3904         while (TRUE) {
3905                 /*
3906                  *      Look for a page at this offset
3907                  */
3908
3909                 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3910                                  == VM_PAGE_NULL) {
3911                         /*
3912                          *      No page, no problem... just allocate one.
3913                          */
3914
3915                         dst_page = vm_page_alloc(dst_object, dst_offset);
3916                         if (dst_page == VM_PAGE_NULL) {
3917                                 vm_object_unlock(dst_object);
3918                                 VM_PAGE_WAIT();
3919                                 vm_object_lock(dst_object);
3920                                 continue;
3921                         }
3922
3923                         /*
3924                          *      Pretend that the memory manager
3925                          *      write-protected the page.
3926                          *
3927                          *      Note that we will be asking for write
3928                          *      permission without asking for the data
3929                          *      first.
3930                          */
3931
3932                         dst_page->overwriting = TRUE;
3933                         dst_page->page_lock = VM_PROT_WRITE;
3934                         dst_page->absent = TRUE;
3935                         dst_page->unusual = TRUE;
3936                         dst_object->absent_count++;
3937
3938                         break;
3939
3940                         /*
3941                          *      When we bail out, we might have to throw
3942                          *      away the page created here.
3943                          */
3944
3945 #define DISCARD_PAGE                                            \
3946         MACRO_BEGIN                                             \
3947         vm_object_lock(dst_object);                             \
3948         dst_page = vm_page_lookup(dst_object, dst_offset);      \
3949         if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3950                 VM_PAGE_FREE(dst_page);                         \
3951         vm_object_unlock(dst_object);                           \
3952         MACRO_END
3953                 }
3954
3955                 /*
3956                  *      If the page is write-protected...
3957                  */
3958
3959                 if (dst_page->page_lock & VM_PROT_WRITE) {
3960                         /*
3961                          *      ... and an unlock request hasn't been sent
3962                          */
3963
3964                         if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3965                                 vm_prot_t       u;
3966                                 kern_return_t   rc;
3967
3968                                 /*
3969                                  *      ... then send one now.
3970                                  */
3971
3972                                 if (!dst_object->pager_ready) {
3973                                         wait_result = vm_object_assert_wait(dst_object,
3974                                                                 VM_OBJECT_EVENT_PAGER_READY,
3975                                                                 interruptible);
3976                                         vm_object_unlock(dst_object);
3977                                         if (wait_result == THREAD_WAITING)
3978                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
3979                                         if (wait_result != THREAD_AWAKENED) {
3980                                                 DISCARD_PAGE;
3981                                                 return(VM_FAULT_INTERRUPTED);
3982                                         }
3983                                         continue;
3984                                 }
3985
3986                                 u = dst_page->unlock_request |= VM_PROT_WRITE;
3987                                 vm_object_unlock(dst_object);
3988
3989                                 if ((rc = memory_object_data_unlock(
3990                                                 dst_object->pager,
3991                                                 dst_offset + dst_object->paging_offset,
3992                                                 PAGE_SIZE,
3993                                                 u)) != KERN_SUCCESS) {
3994                                         if (vm_fault_debug)
3995                                             printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3996                                         DISCARD_PAGE;
3997                                         return((rc == MACH_SEND_INTERRUPTED) ?
3998                                                 VM_FAULT_INTERRUPTED :
3999                                                 VM_FAULT_MEMORY_ERROR);
4000                                 }
4001                                 vm_object_lock(dst_object);
4002                                 continue;
4003                         }
4004
4005                         /* ... fall through to wait below */
4006                 } else {
4007                         /*
4008                          *      If the page isn't being used for other
4009                          *      purposes, then we're done.
4010                          */
4011                         if ( ! (dst_page->busy || dst_page->absent ||
4012                                 dst_page->error || dst_page->restart) )
4013                                 break;
4014                 }
4015
4016                 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
4017                 vm_object_unlock(dst_object);
4018                 if (wait_result == THREAD_WAITING)
4019                         wait_result = thread_block(THREAD_CONTINUE_NULL);
4020                 if (wait_result != THREAD_AWAKENED) {
4021                         DISCARD_PAGE;
4022                         return(VM_FAULT_INTERRUPTED);
4023                 }
4024         }
4025
4026         *result_page = dst_page;
4027         return(VM_FAULT_SUCCESS);
4028
4029 #undef  interruptible
4030 #undef  DISCARD_PAGE
4031 }
4032
4033 #endif  /* notdef */
4034
4035 #if     VM_FAULT_CLASSIFY
4036 /*
4037  *      Temporary statistics gathering support.
4038  */
4039
4040 /*
4041  *      Statistics arrays:
4042  */
4043 #define VM_FAULT_TYPES_MAX      5
4044 #define VM_FAULT_LEVEL_MAX      8
4045
4046 int     vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
4047
4048 #define VM_FAULT_TYPE_ZERO_FILL 0
4049 #define VM_FAULT_TYPE_MAP_IN    1
4050 #define VM_FAULT_TYPE_PAGER     2
4051 #define VM_FAULT_TYPE_COPY      3
4052 #define VM_FAULT_TYPE_OTHER     4
4053
4054
4055 void
4056 vm_fault_classify(vm_object_t           object,
4057                   vm_object_offset_t    offset,
4058                   vm_prot_t             fault_type)
4059 {
4060         int             type, level = 0;
4061         vm_page_t       m;
4062
4063         while (TRUE) {
4064                 m = vm_page_lookup(object, offset);
4065                 if (m != VM_PAGE_NULL) {
4066                         if (m->busy || m->error || m->restart || m->absent ||
4067                             fault_type & m->page_lock) {
4068                                 type = VM_FAULT_TYPE_OTHER;
4069                                 break;
4070                         }
4071                         if (((fault_type & VM_PROT_WRITE) == 0) ||
4072                             ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4073                                 type = VM_FAULT_TYPE_MAP_IN;
4074                                 break;
4075                         }
4076                         type = VM_FAULT_TYPE_COPY;
4077                         break;
4078                 }
4079                 else {
4080                         if (object->pager_created) {
4081                                 type = VM_FAULT_TYPE_PAGER;
4082                                 break;
4083                         }
4084                         if (object->shadow == VM_OBJECT_NULL) {
4085                                 type = VM_FAULT_TYPE_ZERO_FILL;
4086                                 break;
4087                         }
4088
4089                         offset += object->shadow_offset;
4090                         object = object->shadow;
4091                         level++;
4092                         continue;
4093                 }
4094         }
4095
4096         if (level > VM_FAULT_LEVEL_MAX)
4097                 level = VM_FAULT_LEVEL_MAX;
4098
4099         vm_fault_stats[type][level] += 1;
4100
4101         return;
4102 }
4103
4104 /* cleanup routine to call from debugger */
4105
4106 void
4107 vm_fault_classify_init(void)
4108 {
4109         int type, level;
4110
4111         for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4112                 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4113                         vm_fault_stats[type][level] = 0;
4114                 }
4115         }
4116
4117         return;
4118 }
4119 #endif  /* VM_FAULT_CLASSIFY */