osfmk/vm/vm_fault.c

   1 /*
   2  * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*
  26  * @OSF_COPYRIGHT@
  27  */
  28 /*
  29  * Mach Operating System
  30  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  31  * All Rights Reserved.
  32  *
  33  * Permission to use, copy, modify and distribute this software and its
  34  * documentation is hereby granted, provided that both the copyright
  35  * notice and this permission notice appear in all copies of the
  36  * software, derivative works or modified versions, and any portions
  37  * thereof, and that both notices appear in supporting documentation.
  38  *
  39  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  40  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  41  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  42  *
  43  * Carnegie Mellon requests users of this software to return to
  44  *
  45  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  46  *  School of Computer Science
  47  *  Carnegie Mellon University
  48  *  Pittsburgh PA 15213-3890
  49  *
  50  * any improvements or extensions that they make and grant Carnegie Mellon
  51  * the rights to redistribute these changes.
  52  */
  53 /*
  54  */
  55 /*
  56  *      File:   vm_fault.c
  57  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  58  *
  59  *      Page fault handling module.
  60  */
  61 #ifdef MACH_BSD
  62 /* remove after component interface available */
  63 extern int      vnode_pager_workaround;
  64 extern int      device_pager_workaround;
  65 #endif
  66
  67 #include <mach_cluster_stats.h>
  68 #include <mach_pagemap.h>
  69 #include <mach_kdb.h>
  70
  71 #include <vm/vm_fault.h>
  72 #include <mach/kern_return.h>
  73 #include <mach/message.h>       /* for error codes */
  74 #include <kern/host_statistics.h>
  75 #include <kern/counters.h>
  76 #include <kern/task.h>
  77 #include <kern/thread.h>
  78 #include <kern/sched_prim.h>
  79 #include <kern/host.h>
  80 #include <kern/xpr.h>
  81 #include <ppc/proc_reg.h>
  82 #include <vm/task_working_set.h>
  83 #include <vm/vm_map.h>
  84 #include <vm/vm_object.h>
  85 #include <vm/vm_page.h>
  86 #include <vm/vm_kern.h>
  87 #include <vm/pmap.h>
  88 #include <vm/vm_pageout.h>
  89 #include <mach/vm_param.h>
  90 #include <mach/vm_behavior.h>
  91 #include <mach/memory_object.h>
  92                                 /* For memory_object_data_{request,unlock} */
  93 #include <kern/mach_param.h>
  94 #include <kern/macro_help.h>
  95 #include <kern/zalloc.h>
  96 #include <kern/misc_protos.h>
  97
  98 #include <sys/kdebug.h>
  99
 100 #define VM_FAULT_CLASSIFY       0
 101 #define VM_FAULT_STATIC_CONFIG  1
 102
 103 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
 104
 105 int             vm_object_absent_max = 50;
 106
 107 int             vm_fault_debug = 0;
 108
 109 #if     !VM_FAULT_STATIC_CONFIG
 110 boolean_t       vm_fault_dirty_handling = FALSE;
 111 boolean_t       vm_fault_interruptible = FALSE;
 112 boolean_t       software_reference_bits = TRUE;
 113 #endif
 114
 115 #if     MACH_KDB
 116 extern struct db_watchpoint *db_watchpoint_list;
 117 #endif  /* MACH_KDB */
 118
 119 /* Forward declarations of internal routines. */
 120 extern kern_return_t vm_fault_wire_fast(
 121                                 vm_map_t        map,
 122                                 vm_offset_t     va,
 123                                 vm_map_entry_t  entry,
 124                                 pmap_t          pmap,
 125                                 vm_offset_t     pmap_addr);
 126
 127 extern void vm_fault_continue(void);
 128
 129 extern void vm_fault_copy_cleanup(
 130                                 vm_page_t       page,
 131                                 vm_page_t       top_page);
 132
 133 extern void vm_fault_copy_dst_cleanup(
 134                                 vm_page_t       page);
 135
 136 #if     VM_FAULT_CLASSIFY
 137 extern void vm_fault_classify(vm_object_t       object,
 138                           vm_object_offset_t    offset,
 139                           vm_prot_t             fault_type);
 140
 141 extern void vm_fault_classify_init(void);
 142 #endif
 143
 144 /*
 145  *      Routine:        vm_fault_init
 146  *      Purpose:
 147  *              Initialize our private data structures.
 148  */
 149 void
 150 vm_fault_init(void)
 151 {
 152 }
 153
 154 /*
 155  *      Routine:        vm_fault_cleanup
 156  *      Purpose:
 157  *              Clean up the result of vm_fault_page.
 158  *      Results:
 159  *              The paging reference for "object" is released.
 160  *              "object" is unlocked.
 161  *              If "top_page" is not null,  "top_page" is
 162  *              freed and the paging reference for the object
 163  *              containing it is released.
 164  *
 165  *      In/out conditions:
 166  *              "object" must be locked.
 167  */
 168 void
 169 vm_fault_cleanup(
 170         register vm_object_t    object,
 171         register vm_page_t      top_page)
 172 {
 173         vm_object_paging_end(object);
 174         vm_object_unlock(object);
 175
 176         if (top_page != VM_PAGE_NULL) {
 177             object = top_page->object;
 178             vm_object_lock(object);
 179             VM_PAGE_FREE(top_page);
 180             vm_object_paging_end(object);
 181             vm_object_unlock(object);
 182         }
 183 }
 184
 185 #if     MACH_CLUSTER_STATS
 186 #define MAXCLUSTERPAGES 16
 187 struct {
 188         unsigned long pages_in_cluster;
 189         unsigned long pages_at_higher_offsets;
 190         unsigned long pages_at_lower_offsets;
 191 } cluster_stats_in[MAXCLUSTERPAGES];
 192 #define CLUSTER_STAT(clause)    clause
 193 #define CLUSTER_STAT_HIGHER(x)  \
 194         ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
 195 #define CLUSTER_STAT_LOWER(x)   \
 196          ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
 197 #define CLUSTER_STAT_CLUSTER(x) \
 198         ((cluster_stats_in[(x)].pages_in_cluster)++)
 199 #else   /* MACH_CLUSTER_STATS */
 200 #define CLUSTER_STAT(clause)
 201 #endif  /* MACH_CLUSTER_STATS */
 202
 203 /* XXX - temporary */
 204 boolean_t vm_allow_clustered_pagein = FALSE;
 205 int vm_pagein_cluster_used = 0;
 206
 207 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
 208
 209
 210 boolean_t       vm_page_deactivate_behind = TRUE;
 211 /*
 212  * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
 213  */
 214 int vm_default_ahead = 0;
 215 int vm_default_behind = MAX_UPL_TRANSFER;
 216
 217 /*
 218  *      vm_page_deactivate_behind
 219  *
 220  *      Determine if sequential access is in progress
 221  *      in accordance with the behavior specified.  If
 222  *      so, compute a potential page to deactive and
 223  *      deactivate it.
 224  *
 225  *      The object must be locked.
 226  */
 227 static
 228 boolean_t
 229 vm_fault_deactivate_behind(
 230         vm_object_t object,
 231         vm_offset_t offset,
 232         vm_behavior_t behavior)
 233 {
 234         vm_page_t m;
 235
 236 #if TRACEFAULTPAGE
 237         dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
 238 #endif
 239
 240         switch (behavior) {
 241         case VM_BEHAVIOR_RANDOM:
 242                 object->sequential = PAGE_SIZE_64;
 243                 m = VM_PAGE_NULL;
 244                 break;
 245         case VM_BEHAVIOR_SEQUENTIAL:
 246                 if (offset &&
 247                         object->last_alloc == offset - PAGE_SIZE_64) {
 248                         object->sequential += PAGE_SIZE_64;
 249                         m = vm_page_lookup(object, offset - PAGE_SIZE_64);
 250                 } else {
 251                         object->sequential = PAGE_SIZE_64; /* reset */
 252                         m = VM_PAGE_NULL;
 253                 }
 254                 break;
 255         case VM_BEHAVIOR_RSEQNTL:
 256                 if (object->last_alloc &&
 257                         object->last_alloc == offset + PAGE_SIZE_64) {
 258                         object->sequential += PAGE_SIZE_64;
 259                         m = vm_page_lookup(object, offset + PAGE_SIZE_64);
 260                 } else {
 261                         object->sequential = PAGE_SIZE_64; /* reset */
 262                         m = VM_PAGE_NULL;
 263                 }
 264                 break;
 265         case VM_BEHAVIOR_DEFAULT:
 266         default:
 267                 if (offset &&
 268                         object->last_alloc == offset - PAGE_SIZE_64) {
 269                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 270
 271                         object->sequential += PAGE_SIZE_64;
 272                         m = (offset >= behind &&
 273                                 object->sequential >= behind) ?
 274                                 vm_page_lookup(object, offset - behind) :
 275                                 VM_PAGE_NULL;
 276                 } else if (object->last_alloc &&
 277                         object->last_alloc == offset + PAGE_SIZE_64) {
 278                         vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
 279
 280                         object->sequential += PAGE_SIZE_64;
 281                         m = (offset < -behind &&
 282                                 object->sequential >= behind) ?
 283                                 vm_page_lookup(object, offset + behind) :
 284                                 VM_PAGE_NULL;
 285                 } else {
 286                         object->sequential = PAGE_SIZE_64;
 287                         m = VM_PAGE_NULL;
 288                 }
 289                 break;
 290         }
 291
 292         object->last_alloc = offset;
 293
 294         if (m) {
 295                 if (!m->busy) {
 296                         vm_page_lock_queues();
 297                         vm_page_deactivate(m);
 298                         vm_page_unlock_queues();
 299 #if TRACEFAULTPAGE
 300                         dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
 301 #endif
 302                 }
 303                 return TRUE;
 304         }
 305         return FALSE;
 306 }
 307
 308
 309 /*
 310  *      Routine:        vm_fault_page
 311  *      Purpose:
 312  *              Find the resident page for the virtual memory
 313  *              specified by the given virtual memory object
 314  *              and offset.
 315  *      Additional arguments:
 316  *              The required permissions for the page is given
 317  *              in "fault_type".  Desired permissions are included
 318  *              in "protection".  The minimum and maximum valid offsets
 319  *              within the object for the relevant map entry are
 320  *              passed in "lo_offset" and "hi_offset" respectively and
 321  *              the expected page reference pattern is passed in "behavior".
 322  *              These three parameters are used to determine pagein cluster
 323  *              limits.
 324  *
 325  *              If the desired page is known to be resident (for
 326  *              example, because it was previously wired down), asserting
 327  *              the "unwiring" parameter will speed the search.
 328  *
 329  *              If the operation can be interrupted (by thread_abort
 330  *              or thread_terminate), then the "interruptible"
 331  *              parameter should be asserted.
 332  *
 333  *      Results:
 334  *              The page containing the proper data is returned
 335  *              in "result_page".
 336  *
 337  *      In/out conditions:
 338  *              The source object must be locked and referenced,
 339  *              and must donate one paging reference.  The reference
 340  *              is not affected.  The paging reference and lock are
 341  *              consumed.
 342  *
 343  *              If the call succeeds, the object in which "result_page"
 344  *              resides is left locked and holding a paging reference.
 345  *              If this is not the original object, a busy page in the
 346  *              original object is returned in "top_page", to prevent other
 347  *              callers from pursuing this same data, along with a paging
 348  *              reference for the original object.  The "top_page" should
 349  *              be destroyed when this guarantee is no longer required.
 350  *              The "result_page" is also left busy.  It is not removed
 351  *              from the pageout queues.
 352  */
 353
 354 vm_fault_return_t
 355 vm_fault_page(
 356         /* Arguments: */
 357         vm_object_t     first_object,   /* Object to begin search */
 358         vm_object_offset_t first_offset,        /* Offset into object */
 359         vm_prot_t       fault_type,     /* What access is requested */
 360         boolean_t       must_be_resident,/* Must page be resident? */
 361         int             interruptible,  /* how may fault be interrupted? */
 362         vm_object_offset_t lo_offset,   /* Map entry start */
 363         vm_object_offset_t hi_offset,   /* Map entry end */
 364         vm_behavior_t   behavior,       /* Page reference behavior */
 365         /* Modifies in place: */
 366         vm_prot_t       *protection,    /* Protection for mapping */
 367         /* Returns: */
 368         vm_page_t       *result_page,   /* Page found, if successful */
 369         vm_page_t       *top_page,      /* Page in top object, if
 370                                          * not result_page.  */
 371         int             *type_of_fault, /* if non-null, fill in with type of fault
 372                                          * COW, zero-fill, etc... returned in trace point */
 373         /* More arguments: */
 374         kern_return_t   *error_code,    /* code if page is in error */
 375         boolean_t       no_zero_fill,   /* don't zero fill absent pages */
 376         boolean_t       data_supply,    /* treat as data_supply if
 377                                          * it is a write fault and a full
 378                                          * page is provided */
 379         vm_map_t        map,
 380         vm_offset_t     vaddr)
 381 {
 382         register
 383         vm_page_t               m;
 384         register
 385         vm_object_t             object;
 386         register
 387         vm_object_offset_t      offset;
 388         vm_page_t               first_m;
 389         vm_object_t             next_object;
 390         vm_object_t             copy_object;
 391         boolean_t               look_for_page;
 392         vm_prot_t               access_required = fault_type;
 393         vm_prot_t               wants_copy_flag;
 394         vm_size_t               cluster_size, length;
 395         vm_object_offset_t      cluster_offset;
 396         vm_object_offset_t      cluster_start, cluster_end, paging_offset;
 397         vm_object_offset_t      align_offset;
 398         CLUSTER_STAT(int pages_at_higher_offsets;)
 399         CLUSTER_STAT(int pages_at_lower_offsets;)
 400         kern_return_t   wait_result;
 401         boolean_t               interruptible_state;
 402         boolean_t               bumped_pagein = FALSE;
 403
 404
 405 #if     MACH_PAGEMAP
 406 /*
 407  * MACH page map - an optional optimization where a bit map is maintained
 408  * by the VM subsystem for internal objects to indicate which pages of
 409  * the object currently reside on backing store.  This existence map
 410  * duplicates information maintained by the vnode pager.  It is
 411  * created at the time of the first pageout against the object, i.e.
 412  * at the same time pager for the object is created.  The optimization
 413  * is designed to eliminate pager interaction overhead, if it is
 414  * 'known' that the page does not exist on backing store.
 415  *
 416  * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
 417  * either marked as paged out in the existence map for the object or no
 418  * existence map exists for the object.  LOOK_FOR() is one of the
 419  * criteria in the decision to invoke the pager.   It is also used as one
 420  * of the criteria to terminate the scan for adjacent pages in a clustered
 421  * pagein operation.  Note that LOOK_FOR() always evaluates to TRUE for
 422  * permanent objects.  Note also that if the pager for an internal object
 423  * has not been created, the pager is not invoked regardless of the value
 424  * of LOOK_FOR() and that clustered pagein scans are only done on an object
 425  * for which a pager has been created.
 426  *
 427  * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
 428  * is marked as paged out in the existence map for the object.  PAGED_OUT()
 429  * PAGED_OUT() is used to determine if a page has already been pushed
 430  * into a copy object in order to avoid a redundant page out operation.
 431  */
 432 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 433                         != VM_EXTERNAL_STATE_ABSENT)
 434 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
 435                         == VM_EXTERNAL_STATE_EXISTS)
 436 #else /* MACH_PAGEMAP */
 437 /*
 438  * If the MACH page map optimization is not enabled,
 439  * LOOK_FOR() always evaluates to TRUE.  The pager will always be
 440  * invoked to resolve missing pages in an object, assuming the pager
 441  * has been created for the object.  In a clustered page operation, the
 442  * absence of a page on backing backing store cannot be used to terminate
 443  * a scan for adjacent pages since that information is available only in
 444  * the pager.  Hence pages that may not be paged out are potentially
 445  * included in a clustered request.  The vnode pager is coded to deal
 446  * with any combination of absent/present pages in a clustered
 447  * pagein request.  PAGED_OUT() always evaluates to FALSE, i.e. the pager
 448  * will always be invoked to push a dirty page into a copy object assuming
 449  * a pager has been created.  If the page has already been pushed, the
 450  * pager will ingore the new request.
 451  */
 452 #define LOOK_FOR(o, f) TRUE
 453 #define PAGED_OUT(o, f) FALSE
 454 #endif /* MACH_PAGEMAP */
 455
 456 /*
 457  *      Recovery actions
 458  */
 459 #define PREPARE_RELEASE_PAGE(m)                         \
 460         MACRO_BEGIN                                     \
 461         vm_page_lock_queues();                          \
 462         MACRO_END
 463
 464 #define DO_RELEASE_PAGE(m)                              \
 465         MACRO_BEGIN                                     \
 466         PAGE_WAKEUP_DONE(m);                            \
 467         if (!m->active && !m->inactive)                 \
 468                 vm_page_activate(m);                    \
 469         vm_page_unlock_queues();                        \
 470         MACRO_END
 471
 472 #define RELEASE_PAGE(m)                                 \
 473         MACRO_BEGIN                                     \
 474         PREPARE_RELEASE_PAGE(m);                        \
 475         DO_RELEASE_PAGE(m);                             \
 476         MACRO_END
 477
 478 #if TRACEFAULTPAGE
 479         dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
 480 #endif
 481
 482
 483
 484 #if     !VM_FAULT_STATIC_CONFIG
 485         if (vm_fault_dirty_handling
 486 #if     MACH_KDB
 487                 /*
 488                  *      If there are watchpoints set, then
 489                  *      we don't want to give away write permission
 490                  *      on a read fault.  Make the task write fault,
 491                  *      so that the watchpoint code notices the access.
 492                  */
 493             || db_watchpoint_list
 494 #endif  /* MACH_KDB */
 495             ) {
 496                 /*
 497                  *      If we aren't asking for write permission,
 498                  *      then don't give it away.  We're using write
 499                  *      faults to set the dirty bit.
 500                  */
 501                 if (!(fault_type & VM_PROT_WRITE))
 502                         *protection &= ~VM_PROT_WRITE;
 503         }
 504
 505         if (!vm_fault_interruptible)
 506                 interruptible = THREAD_UNINT;
 507 #else   /* STATIC_CONFIG */
 508 #if     MACH_KDB
 509                 /*
 510                  *      If there are watchpoints set, then
 511                  *      we don't want to give away write permission
 512                  *      on a read fault.  Make the task write fault,
 513                  *      so that the watchpoint code notices the access.
 514                  */
 515             if (db_watchpoint_list) {
 516                 /*
 517                  *      If we aren't asking for write permission,
 518                  *      then don't give it away.  We're using write
 519                  *      faults to set the dirty bit.
 520                  */
 521                 if (!(fault_type & VM_PROT_WRITE))
 522                         *protection &= ~VM_PROT_WRITE;
 523         }
 524
 525 #endif  /* MACH_KDB */
 526 #endif  /* STATIC_CONFIG */
 527
 528         interruptible_state = thread_interrupt_level(interruptible);
 529
 530         /*
 531          *      INVARIANTS (through entire routine):
 532          *
 533          *      1)      At all times, we must either have the object
 534          *              lock or a busy page in some object to prevent
 535          *              some other thread from trying to bring in
 536          *              the same page.
 537          *
 538          *              Note that we cannot hold any locks during the
 539          *              pager access or when waiting for memory, so
 540          *              we use a busy page then.
 541          *
 542          *              Note also that we aren't as concerned about more than
 543          *              one thread attempting to memory_object_data_unlock
 544          *              the same page at once, so we don't hold the page
 545          *              as busy then, but do record the highest unlock
 546          *              value so far.  [Unlock requests may also be delivered
 547          *              out of order.]
 548          *
 549          *      2)      To prevent another thread from racing us down the
 550          *              shadow chain and entering a new page in the top
 551          *              object before we do, we must keep a busy page in
 552          *              the top object while following the shadow chain.
 553          *
 554          *      3)      We must increment paging_in_progress on any object
 555          *              for which we have a busy page
 556          *
 557          *      4)      We leave busy pages on the pageout queues.
 558          *              If the pageout daemon comes across a busy page,
 559          *              it will remove the page from the pageout queues.
 560          */
 561
 562         /*
 563          *      Search for the page at object/offset.
 564          */
 565
 566         object = first_object;
 567         offset = first_offset;
 568         first_m = VM_PAGE_NULL;
 569         access_required = fault_type;
 570
 571         XPR(XPR_VM_FAULT,
 572                 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
 573                 (integer_t)object, offset, fault_type, *protection, 0);
 574
 575         /*
 576          *      See whether this page is resident
 577          */
 578
 579         while (TRUE) {
 580 #if TRACEFAULTPAGE
 581                 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
 582 #endif
 583                 if (!object->alive) {
 584                         vm_fault_cleanup(object, first_m);
 585                         thread_interrupt_level(interruptible_state);
 586                         return(VM_FAULT_MEMORY_ERROR);
 587                 }
 588                 m = vm_page_lookup(object, offset);
 589 #if TRACEFAULTPAGE
 590                 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
 591 #endif
 592                 if (m != VM_PAGE_NULL) {
 593                         /*
 594                          *      If the page was pre-paged as part of a
 595                          *      cluster, record the fact.
 596                          */
 597                         if (m->clustered) {
 598                                 vm_pagein_cluster_used++;
 599                                 m->clustered = FALSE;
 600                         }
 601
 602                         /*
 603                          *      If the page is being brought in,
 604                          *      wait for it and then retry.
 605                          *
 606                          *      A possible optimization: if the page
 607                          *      is known to be resident, we can ignore
 608                          *      pages that are absent (regardless of
 609                          *      whether they're busy).
 610                          */
 611
 612                         if (m->busy) {
 613 #if TRACEFAULTPAGE
 614                                 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 615 #endif
 616                                 wait_result = PAGE_SLEEP(object, m, interruptible);
 617                                 XPR(XPR_VM_FAULT,
 618                                     "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
 619                                         (integer_t)object, offset,
 620                                         (integer_t)m, 0, 0);
 621                                 counter(c_vm_fault_page_block_busy_kernel++);
 622
 623                                 if (wait_result != THREAD_AWAKENED) {
 624                                         vm_fault_cleanup(object, first_m);
 625                                         thread_interrupt_level(interruptible_state);
 626                                         if (wait_result == THREAD_RESTART)
 627                                           {
 628                                                 return(VM_FAULT_RETRY);
 629                                           }
 630                                         else
 631                                           {
 632                                                 return(VM_FAULT_INTERRUPTED);
 633                                           }
 634                                 }
 635                                 continue;
 636                         }
 637
 638                         /*
 639                          *      If the page is in error, give up now.
 640                          */
 641
 642                         if (m->error) {
 643 #if TRACEFAULTPAGE
 644                                 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code);      /* (TEST/DEBUG) */
 645 #endif
 646                                 if (error_code)
 647                                         *error_code = m->page_error;
 648                                 VM_PAGE_FREE(m);
 649                                 vm_fault_cleanup(object, first_m);
 650                                 thread_interrupt_level(interruptible_state);
 651                                 return(VM_FAULT_MEMORY_ERROR);
 652                         }
 653
 654                         /*
 655                          *      If the pager wants us to restart
 656                          *      at the top of the chain,
 657                          *      typically because it has moved the
 658                          *      page to another pager, then do so.
 659                          */
 660
 661                         if (m->restart) {
 662 #if TRACEFAULTPAGE
 663                                 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 664 #endif
 665                                 VM_PAGE_FREE(m);
 666                                 vm_fault_cleanup(object, first_m);
 667                                 thread_interrupt_level(interruptible_state);
 668                                 return(VM_FAULT_RETRY);
 669                         }
 670
 671                         /*
 672                          *      If the page isn't busy, but is absent,
 673                          *      then it was deemed "unavailable".
 674                          */
 675
 676                         if (m->absent) {
 677                                 /*
 678                                  * Remove the non-existent page (unless it's
 679                                  * in the top object) and move on down to the
 680                                  * next object (if there is one).
 681                                  */
 682 #if TRACEFAULTPAGE
 683                                 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow);  /* (TEST/DEBUG) */
 684 #endif
 685
 686                                 next_object = object->shadow;
 687                                 if (next_object == VM_OBJECT_NULL) {
 688                                         vm_page_t real_m;
 689
 690                                         assert(!must_be_resident);
 691
 692                                         if (object->shadow_severed) {
 693                                                 vm_fault_cleanup(
 694                                                         object, first_m);
 695                                                 thread_interrupt_level(interruptible_state);
 696                                                 return VM_FAULT_MEMORY_ERROR;
 697                                         }
 698
 699                                         /*
 700                                          * Absent page at bottom of shadow
 701                                          * chain; zero fill the page we left
 702                                          * busy in the first object, and flush
 703                                          * the absent page.  But first we
 704                                          * need to allocate a real page.
 705                                          */
 706                                         if (VM_PAGE_THROTTLED() ||
 707                                             (real_m = vm_page_grab())
 708                                                         == VM_PAGE_NULL) {
 709                                                 vm_fault_cleanup(
 710                                                         object, first_m);
 711                                                 thread_interrupt_level(
 712                                                         interruptible_state);
 713                                                 return(
 714                                                    VM_FAULT_MEMORY_SHORTAGE);
 715                                         }
 716
 717                                         /*
 718                                          * are we protecting the system from
 719                                          * backing store exhaustion.  If so
 720                                          * sleep unless we are privileged.
 721                                          */
 722
 723                                         if(vm_backing_store_low) {
 724                                            if(!(current_task()->priv_flags
 725                                                 & VM_BACKING_STORE_PRIV)) {
 726                                                 assert_wait((event_t)
 727                                                         &vm_backing_store_low,
 728                                                         THREAD_UNINT);
 729                                                 vm_fault_cleanup(object,
 730                                                                     first_m);
 731                                                 thread_block((void(*)(void)) 0);
 732                                                 thread_interrupt_level(
 733                                                         interruptible_state);
 734                                                 return(VM_FAULT_RETRY);
 735                                            }
 736                                         }
 737
 738
 739                                         XPR(XPR_VM_FAULT,
 740               "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
 741                                                 (integer_t)object, offset,
 742                                                 (integer_t)m,
 743                                                 (integer_t)first_object, 0);
 744                                         if (object != first_object) {
 745                                                 VM_PAGE_FREE(m);
 746                                                 vm_object_paging_end(object);
 747                                                 vm_object_unlock(object);
 748                                                 object = first_object;
 749                                                 offset = first_offset;
 750                                                 m = first_m;
 751                                                 first_m = VM_PAGE_NULL;
 752                                                 vm_object_lock(object);
 753                                         }
 754
 755                                         VM_PAGE_FREE(m);
 756                                         assert(real_m->busy);
 757                                         vm_page_insert(real_m, object, offset);
 758                                         m = real_m;
 759
 760                                         /*
 761                                          *  Drop the lock while zero filling
 762                                          *  page.  Then break because this
 763                                          *  is the page we wanted.  Checking
 764                                          *  the page lock is a waste of time;
 765                                          *  this page was either absent or
 766                                          *  newly allocated -- in both cases
 767                                          *  it can't be page locked by a pager.
 768                                          */
 769                                         m->no_isync = FALSE;
 770
 771                                         if (!no_zero_fill) {
 772                                                 vm_object_unlock(object);
 773                                                 vm_page_zero_fill(m);
 774                                                 vm_object_lock(object);
 775                                         }
 776                                         if (type_of_fault)
 777                                                 *type_of_fault = DBG_ZERO_FILL_FAULT;
 778                                         VM_STAT(zero_fill_count++);
 779
 780                                         if (bumped_pagein == TRUE) {
 781                                                 VM_STAT(pageins--);
 782                                                 current_task()->pageins--;
 783                                         }
 784 #if 0
 785                                         pmap_clear_modify(m->phys_page);
 786 #endif
 787                                         vm_page_lock_queues();
 788                                         VM_PAGE_QUEUES_REMOVE(m);
 789                                         m->page_ticket = vm_page_ticket;
 790                                         if(m->object->size > 0x80000) {
 791                                                 m->zero_fill = TRUE;
 792                                                 /* depends on the queues lock */
 793                                                 vm_zf_count += 1;
 794                                                 queue_enter(&vm_page_queue_zf,
 795                                                         m, vm_page_t, pageq);
 796                                         } else {
 797                                                 queue_enter(
 798                                                         &vm_page_queue_inactive,
 799                                                         m, vm_page_t, pageq);
 800                                         }
 801                                         vm_page_ticket_roll++;
 802                                         if(vm_page_ticket_roll ==
 803                                                 VM_PAGE_TICKETS_IN_ROLL) {
 804                                                 vm_page_ticket_roll = 0;
 805                                                 if(vm_page_ticket ==
 806                                                      VM_PAGE_TICKET_ROLL_IDS)
 807                                                         vm_page_ticket= 0;
 808                                                 else
 809                                                         vm_page_ticket++;
 810                                         }
 811                                         m->inactive = TRUE;
 812                                         vm_page_inactive_count++;
 813                                         vm_page_unlock_queues();
 814                                         break;
 815                                 } else {
 816                                         if (must_be_resident) {
 817                                                 vm_object_paging_end(object);
 818                                         } else if (object != first_object) {
 819                                                 vm_object_paging_end(object);
 820                                                 VM_PAGE_FREE(m);
 821                                         } else {
 822                                                 first_m = m;
 823                                                 m->absent = FALSE;
 824                                                 m->unusual = FALSE;
 825                                                 vm_object_absent_release(object);
 826                                                 m->busy = TRUE;
 827
 828                                                 vm_page_lock_queues();
 829                                                 VM_PAGE_QUEUES_REMOVE(m);
 830                                                 vm_page_unlock_queues();
 831                                         }
 832                                         XPR(XPR_VM_FAULT,
 833                                             "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
 834                                                 (integer_t)object, offset,
 835                                                 (integer_t)next_object,
 836                                                 offset+object->shadow_offset,0);
 837                                         offset += object->shadow_offset;
 838                                         hi_offset += object->shadow_offset;
 839                                         lo_offset += object->shadow_offset;
 840                                         access_required = VM_PROT_READ;
 841                                         vm_object_lock(next_object);
 842                                         vm_object_unlock(object);
 843                                         object = next_object;
 844                                         vm_object_paging_begin(object);
 845                                         continue;
 846                                 }
 847                         }
 848
 849                         if ((m->cleaning)
 850                                 && ((object != first_object) ||
 851                                     (object->copy != VM_OBJECT_NULL))
 852                                 && (fault_type & VM_PROT_WRITE)) {
 853                                 /*
 854                                  * This is a copy-on-write fault that will
 855                                  * cause us to revoke access to this page, but
 856                                  * this page is in the process of being cleaned
 857                                  * in a clustered pageout. We must wait until
 858                                  * the cleaning operation completes before
 859                                  * revoking access to the original page,
 860                                  * otherwise we might attempt to remove a
 861                                  * wired mapping.
 862                                  */
 863 #if TRACEFAULTPAGE
 864                                 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset);  /* (TEST/DEBUG) */
 865 #endif
 866                                 XPR(XPR_VM_FAULT,
 867                                     "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
 868                                         (integer_t)object, offset,
 869                                         (integer_t)m, 0, 0);
 870                                 /* take an extra ref so that object won't die */
 871                                 assert(object->ref_count > 0);
 872                                 object->ref_count++;
 873                                 vm_object_res_reference(object);
 874                                 vm_fault_cleanup(object, first_m);
 875                                 counter(c_vm_fault_page_block_backoff_kernel++);
 876                                 vm_object_lock(object);
 877                                 assert(object->ref_count > 0);
 878                                 m = vm_page_lookup(object, offset);
 879                                 if (m != VM_PAGE_NULL && m->cleaning) {
 880                                         PAGE_ASSERT_WAIT(m, interruptible);
 881                                         vm_object_unlock(object);
 882                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
 883                                         vm_object_deallocate(object);
 884                                         goto backoff;
 885                                 } else {
 886                                         vm_object_unlock(object);
 887                                         vm_object_deallocate(object);
 888                                         thread_interrupt_level(interruptible_state);
 889                                         return VM_FAULT_RETRY;
 890                                 }
 891                         }
 892
 893                         /*
 894                          *      If the desired access to this page has
 895                          *      been locked out, request that it be unlocked.
 896                          */
 897
 898                         if (access_required & m->page_lock) {
 899                                 if ((access_required & m->unlock_request) != access_required) {
 900                                         vm_prot_t       new_unlock_request;
 901                                         kern_return_t   rc;
 902
 903 #if TRACEFAULTPAGE
 904                                         dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready);     /* (TEST/DEBUG) */
 905 #endif
 906                                         if (!object->pager_ready) {
 907                                         XPR(XPR_VM_FAULT,
 908                                             "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
 909                                                 access_required,
 910                                                 (integer_t)object, offset,
 911                                                 (integer_t)m, 0);
 912                                                 /* take an extra ref */
 913                                                 assert(object->ref_count > 0);
 914                                                 object->ref_count++;
 915                                                 vm_object_res_reference(object);
 916                                                 vm_fault_cleanup(object,
 917                                                                  first_m);
 918                                                 counter(c_vm_fault_page_block_backoff_kernel++);
 919                                                 vm_object_lock(object);
 920                                                 assert(object->ref_count > 0);
 921                                                 if (!object->pager_ready) {
 922                                                         wait_result = vm_object_assert_wait(
 923                                                                 object,
 924                                                                 VM_OBJECT_EVENT_PAGER_READY,
 925                                                                 interruptible);
 926                                                         vm_object_unlock(object);
 927                                                         if (wait_result == THREAD_WAITING)
 928                                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
 929                                                         vm_object_deallocate(object);
 930                                                         goto backoff;
 931                                                 } else {
 932                                                         vm_object_unlock(object);
 933                                                         vm_object_deallocate(object);
 934                                                         thread_interrupt_level(interruptible_state);
 935                                                         return VM_FAULT_RETRY;
 936                                                 }
 937                                         }
 938
 939                                         new_unlock_request = m->unlock_request =
 940                                                 (access_required | m->unlock_request);
 941                                         vm_object_unlock(object);
 942                                         XPR(XPR_VM_FAULT,
 943                                             "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
 944                                         (integer_t)object, offset,
 945                                         (integer_t)m, new_unlock_request, 0);
 946                                         if ((rc = memory_object_data_unlock(
 947                                                 object->pager,
 948                                                 offset + object->paging_offset,
 949                                                 PAGE_SIZE,
 950                                                 new_unlock_request))
 951                                              != KERN_SUCCESS) {
 952                                                 if (vm_fault_debug)
 953                                                     printf("vm_fault: memory_object_data_unlock failed\n");
 954                                                 vm_object_lock(object);
 955                                                 vm_fault_cleanup(object, first_m);
 956                                                 thread_interrupt_level(interruptible_state);
 957                                                 return((rc == MACH_SEND_INTERRUPTED) ?
 958                                                         VM_FAULT_INTERRUPTED :
 959                                                         VM_FAULT_MEMORY_ERROR);
 960                                         }
 961                                         vm_object_lock(object);
 962                                         continue;
 963                                 }
 964
 965                                 XPR(XPR_VM_FAULT,
 966         "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
 967                                         access_required, (integer_t)object,
 968                                         offset, (integer_t)m, 0);
 969                                 /* take an extra ref so object won't die */
 970                                 assert(object->ref_count > 0);
 971                                 object->ref_count++;
 972                                 vm_object_res_reference(object);
 973                                 vm_fault_cleanup(object, first_m);
 974                                 counter(c_vm_fault_page_block_backoff_kernel++);
 975                                 vm_object_lock(object);
 976                                 assert(object->ref_count > 0);
 977                                 m = vm_page_lookup(object, offset);
 978                                 if (m != VM_PAGE_NULL &&
 979                                     (access_required & m->page_lock) &&
 980                                     !((access_required & m->unlock_request) != access_required)) {
 981                                         PAGE_ASSERT_WAIT(m, interruptible);
 982                                         vm_object_unlock(object);
 983                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
 984                                         vm_object_deallocate(object);
 985                                         goto backoff;
 986                                 } else {
 987                                         vm_object_unlock(object);
 988                                         vm_object_deallocate(object);
 989                                         thread_interrupt_level(interruptible_state);
 990                                         return VM_FAULT_RETRY;
 991                                 }
 992                         }
 993                         /*
 994                          *      We mark the page busy and leave it on
 995                          *      the pageout queues.  If the pageout
 996                          *      deamon comes across it, then it will
 997                          *      remove the page.
 998                          */
 999
1000 #if TRACEFAULTPAGE
1001                         dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1002 #endif
1003
1004 #if     !VM_FAULT_STATIC_CONFIG
1005                         if (!software_reference_bits) {
1006                                 vm_page_lock_queues();
1007                                 if (m->inactive)
1008                                         vm_stat.reactivations++;
1009
1010                                 VM_PAGE_QUEUES_REMOVE(m);
1011                                 vm_page_unlock_queues();
1012                         }
1013 #endif
1014                         XPR(XPR_VM_FAULT,
1015                             "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1016                                 (integer_t)object, offset, (integer_t)m, 0, 0);
1017                         assert(!m->busy);
1018                         m->busy = TRUE;
1019                         assert(!m->absent);
1020                         break;
1021                 }
1022
1023                 look_for_page =
1024                         (object->pager_created) &&
1025                           LOOK_FOR(object, offset) &&
1026                             (!data_supply);
1027
1028 #if TRACEFAULTPAGE
1029                 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);      /* (TEST/DEBUG) */
1030 #endif
1031                 if ((look_for_page || (object == first_object))
1032                                 && !must_be_resident
1033                                 && !(object->phys_contiguous))  {
1034                         /*
1035                          *      Allocate a new page for this object/offset
1036                          *      pair.
1037                          */
1038
1039                         m = vm_page_grab_fictitious();
1040 #if TRACEFAULTPAGE
1041                         dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */
1042 #endif
1043                         if (m == VM_PAGE_NULL) {
1044                                 vm_fault_cleanup(object, first_m);
1045                                 thread_interrupt_level(interruptible_state);
1046                                 return(VM_FAULT_FICTITIOUS_SHORTAGE);
1047                         }
1048                         vm_page_insert(m, object, offset);
1049                 }
1050
1051                 if ((look_for_page && !must_be_resident)) {
1052                         kern_return_t   rc;
1053
1054                         /*
1055                          *      If the memory manager is not ready, we
1056                          *      cannot make requests.
1057                          */
1058                         if (!object->pager_ready) {
1059 #if TRACEFAULTPAGE
1060                                 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */
1061 #endif
1062                                 if(m != VM_PAGE_NULL)
1063                                         VM_PAGE_FREE(m);
1064                                 XPR(XPR_VM_FAULT,
1065                                 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1066                                         (integer_t)object, offset, 0, 0, 0);
1067                                 /* take an extra ref so object won't die */
1068                                 assert(object->ref_count > 0);
1069                                 object->ref_count++;
1070                                 vm_object_res_reference(object);
1071                                 vm_fault_cleanup(object, first_m);
1072                                 counter(c_vm_fault_page_block_backoff_kernel++);
1073                                 vm_object_lock(object);
1074                                 assert(object->ref_count > 0);
1075                                 if (!object->pager_ready) {
1076                                         wait_result = vm_object_assert_wait(object,
1077                                                               VM_OBJECT_EVENT_PAGER_READY,
1078                                                               interruptible);
1079                                         vm_object_unlock(object);
1080                                         if (wait_result == THREAD_WAITING)
1081                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
1082                                         vm_object_deallocate(object);
1083                                         goto backoff;
1084                                 } else {
1085                                         vm_object_unlock(object);
1086                                         vm_object_deallocate(object);
1087                                         thread_interrupt_level(interruptible_state);
1088                                         return VM_FAULT_RETRY;
1089                                 }
1090                         }
1091
1092                         if(object->phys_contiguous) {
1093                                 if(m != VM_PAGE_NULL) {
1094                                         VM_PAGE_FREE(m);
1095                                         m = VM_PAGE_NULL;
1096                                 }
1097                                 goto no_clustering;
1098                         }
1099                         if (object->internal) {
1100                                 /*
1101                                  *      Requests to the default pager
1102                                  *      must reserve a real page in advance,
1103                                  *      because the pager's data-provided
1104                                  *      won't block for pages.  IMPORTANT:
1105                                  *      this acts as a throttling mechanism
1106                                  *      for data_requests to the default
1107                                  *      pager.
1108                                  */
1109
1110 #if TRACEFAULTPAGE
1111                                 dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1112 #endif
1113                                 if (m->fictitious && !vm_page_convert(m)) {
1114                                         VM_PAGE_FREE(m);
1115                                         vm_fault_cleanup(object, first_m);
1116                                         thread_interrupt_level(interruptible_state);
1117                                         return(VM_FAULT_MEMORY_SHORTAGE);
1118                                 }
1119                         } else if (object->absent_count >
1120                                                 vm_object_absent_max) {
1121                                 /*
1122                                  *      If there are too many outstanding page
1123                                  *      requests pending on this object, we
1124                                  *      wait for them to be resolved now.
1125                                  */
1126
1127 #if TRACEFAULTPAGE
1128                                 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
1129 #endif
1130                                 if(m != VM_PAGE_NULL)
1131                                         VM_PAGE_FREE(m);
1132                                 /* take an extra ref so object won't die */
1133                                 assert(object->ref_count > 0);
1134                                 object->ref_count++;
1135                                 vm_object_res_reference(object);
1136                                 vm_fault_cleanup(object, first_m);
1137                                 counter(c_vm_fault_page_block_backoff_kernel++);
1138                                 vm_object_lock(object);
1139                                 assert(object->ref_count > 0);
1140                                 if (object->absent_count > vm_object_absent_max) {
1141                                         vm_object_absent_assert_wait(object,
1142                                                                      interruptible);
1143                                         vm_object_unlock(object);
1144                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1145                                         vm_object_deallocate(object);
1146                                         goto backoff;
1147                                 } else {
1148                                         vm_object_unlock(object);
1149                                         vm_object_deallocate(object);
1150                                         thread_interrupt_level(interruptible_state);
1151                                         return VM_FAULT_RETRY;
1152                                 }
1153                         }
1154
1155                         /*
1156                          *      Indicate that the page is waiting for data
1157                          *      from the memory manager.
1158                          */
1159
1160                         if(m != VM_PAGE_NULL) {
1161
1162                                 m->list_req_pending = TRUE;
1163                                 m->absent = TRUE;
1164                                 m->unusual = TRUE;
1165                                 object->absent_count++;
1166
1167                         }
1168
1169 no_clustering:
1170                         cluster_start = offset;
1171                         length = PAGE_SIZE;
1172
1173                         /*
1174                          * lengthen the cluster by the pages in the working set
1175                          */
1176                         if((map != NULL) &&
1177                                 (current_task()->dynamic_working_set != 0)) {
1178                                 cluster_end = cluster_start + length;
1179                                 /* tws values for start and end are just a
1180                                  * suggestions.  Therefore, as long as
1181                                  * build_cluster does not use pointers or
1182                                  * take action based on values that
1183                                  * could be affected by re-entrance we
1184                                  * do not need to take the map lock.
1185                                  */
1186                                 cluster_end = offset + PAGE_SIZE_64;
1187                                 tws_build_cluster((tws_hash_t)
1188                                         current_task()->dynamic_working_set,
1189                                         object, &cluster_start,
1190                                         &cluster_end, 0x40000);
1191                                 length = cluster_end - cluster_start;
1192                         }
1193 #if TRACEFAULTPAGE
1194                         dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0);  /* (TEST/DEBUG) */
1195 #endif
1196                         /*
1197                          *      We have a busy page, so we can
1198                          *      release the object lock.
1199                          */
1200                         vm_object_unlock(object);
1201
1202                         /*
1203                          *      Call the memory manager to retrieve the data.
1204                          */
1205
1206                         if (type_of_fault)
1207                                 *type_of_fault = (length << 8) | DBG_PAGEIN_FAULT;
1208                         VM_STAT(pageins++);
1209                         current_task()->pageins++;
1210                         bumped_pagein = TRUE;
1211
1212                         /*
1213                          *      If this object uses a copy_call strategy,
1214                          *      and we are interested in a copy of this object
1215                          *      (having gotten here only by following a
1216                          *      shadow chain), then tell the memory manager
1217                          *      via a flag added to the desired_access
1218                          *      parameter, so that it can detect a race
1219                          *      between our walking down the shadow chain
1220                          *      and its pushing pages up into a copy of
1221                          *      the object that it manages.
1222                          */
1223
1224                         if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1225                             object != first_object) {
1226                                 wants_copy_flag = VM_PROT_WANTS_COPY;
1227                         } else {
1228                                 wants_copy_flag = VM_PROT_NONE;
1229                         }
1230
1231                         XPR(XPR_VM_FAULT,
1232                             "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1233                                 (integer_t)object, offset, (integer_t)m,
1234                                 access_required | wants_copy_flag, 0);
1235
1236                         rc = memory_object_data_request(object->pager,
1237                                         cluster_start + object->paging_offset,
1238                                         length,
1239                                         access_required | wants_copy_flag);
1240
1241
1242 #if TRACEFAULTPAGE
1243                         dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1244 #endif
1245                         if (rc != KERN_SUCCESS) {
1246                                 if (rc != MACH_SEND_INTERRUPTED
1247                                     && vm_fault_debug)
1248                                         printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
1249                                                 "memory_object_data_request",
1250                                                 object->pager,
1251                                                 cluster_start + object->paging_offset,
1252                                                 length, access_required, rc);
1253                                 /*
1254                                  *      Don't want to leave a busy page around,
1255                                  *      but the data request may have blocked,
1256                                  *      so check if it's still there and busy.
1257                                  */
1258                                 if(!object->phys_contiguous) {
1259                                    vm_object_lock(object);
1260                                    for (; length; length -= PAGE_SIZE,
1261                                       cluster_start += PAGE_SIZE_64) {
1262                                       vm_page_t p;
1263                                       if ((p = vm_page_lookup(object,
1264                                                                 cluster_start))
1265                                             && p->absent && p->busy
1266                                             && p != first_m) {
1267                                          VM_PAGE_FREE(p);
1268                                       }
1269                                    }
1270                                 }
1271                                 vm_fault_cleanup(object, first_m);
1272                                 thread_interrupt_level(interruptible_state);
1273                                 return((rc == MACH_SEND_INTERRUPTED) ?
1274                                         VM_FAULT_INTERRUPTED :
1275                                         VM_FAULT_MEMORY_ERROR);
1276                         } else {
1277 #ifdef notdefcdy
1278                                 tws_hash_line_t line;
1279                                 task_t          task;
1280
1281                                 task = current_task();
1282
1283                                 if((map != NULL) &&
1284                                         (task->dynamic_working_set != 0))
1285                                                 && !(object->private)) {
1286                                         vm_object_t     base_object;
1287                                         vm_object_offset_t base_offset;
1288                                         base_object = object;
1289                                         base_offset = offset;
1290                                         while(base_object->shadow) {
1291                                                 base_offset +=
1292                                                   base_object->shadow_offset;
1293                                                 base_object =
1294                                                   base_object->shadow;
1295                                         }
1296                                         if(tws_lookup
1297                                                 ((tws_hash_t)
1298                                                 task->dynamic_working_set,
1299                                                 base_offset, base_object,
1300                                                 &line) == KERN_SUCCESS) {
1301                                                 tws_line_signal((tws_hash_t)
1302                                                 task->dynamic_working_set,
1303                                                         map, line, vaddr);
1304                                         }
1305                                 }
1306 #endif
1307                         }
1308
1309                         /*
1310                          * Retry with same object/offset, since new data may
1311                          * be in a different page (i.e., m is meaningless at
1312                          * this point).
1313                          */
1314                         vm_object_lock(object);
1315                         if ((interruptible != THREAD_UNINT) &&
1316                             (current_thread()->state & TH_ABORT)) {
1317                                 vm_fault_cleanup(object, first_m);
1318                                 thread_interrupt_level(interruptible_state);
1319                                 return(VM_FAULT_INTERRUPTED);
1320                         }
1321                         if(m == VM_PAGE_NULL)
1322                                 break;
1323                         continue;
1324                 }
1325
1326                 /*
1327                  * The only case in which we get here is if
1328                  * object has no pager (or unwiring).  If the pager doesn't
1329                  * have the page this is handled in the m->absent case above
1330                  * (and if you change things here you should look above).
1331                  */
1332 #if TRACEFAULTPAGE
1333                 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1334 #endif
1335                 if (object == first_object)
1336                         first_m = m;
1337                 else
1338                         assert(m == VM_PAGE_NULL);
1339
1340                 XPR(XPR_VM_FAULT,
1341                     "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1342                         (integer_t)object, offset, (integer_t)m,
1343                         (integer_t)object->shadow, 0);
1344                 /*
1345                  *      Move on to the next object.  Lock the next
1346                  *      object before unlocking the current one.
1347                  */
1348                 next_object = object->shadow;
1349                 if (next_object == VM_OBJECT_NULL) {
1350                         assert(!must_be_resident);
1351                         /*
1352                          *      If there's no object left, fill the page
1353                          *      in the top object with zeros.  But first we
1354                          *      need to allocate a real page.
1355                          */
1356
1357                         if (object != first_object) {
1358                                 vm_object_paging_end(object);
1359                                 vm_object_unlock(object);
1360
1361                                 object = first_object;
1362                                 offset = first_offset;
1363                                 vm_object_lock(object);
1364                         }
1365
1366                         m = first_m;
1367                         assert(m->object == object);
1368                         first_m = VM_PAGE_NULL;
1369
1370                         if(m == VM_PAGE_NULL) {
1371                                 m = vm_page_grab();
1372                                 if (m == VM_PAGE_NULL) {
1373                                         vm_fault_cleanup(
1374                                                 object, VM_PAGE_NULL);
1375                                         thread_interrupt_level(
1376                                                 interruptible_state);
1377                                         return(VM_FAULT_MEMORY_SHORTAGE);
1378                                 }
1379                                 vm_page_insert(
1380                                         m, object, offset);
1381                         }
1382
1383                         if (object->shadow_severed) {
1384                                 VM_PAGE_FREE(m);
1385                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1386                                 thread_interrupt_level(interruptible_state);
1387                                 return VM_FAULT_MEMORY_ERROR;
1388                         }
1389
1390                         /*
1391                          * are we protecting the system from
1392                          * backing store exhaustion.  If so
1393                          * sleep unless we are privileged.
1394                          */
1395
1396                         if(vm_backing_store_low) {
1397                                 if(!(current_task()->priv_flags
1398                                                 & VM_BACKING_STORE_PRIV)) {
1399                                         assert_wait((event_t)
1400                                                 &vm_backing_store_low,
1401                                                 THREAD_UNINT);
1402                                         VM_PAGE_FREE(m);
1403                                         vm_fault_cleanup(object, VM_PAGE_NULL);
1404                                         thread_block((void (*)(void)) 0);
1405                                         thread_interrupt_level(
1406                                                 interruptible_state);
1407                                         return(VM_FAULT_RETRY);
1408                                 }
1409                         }
1410
1411                         if (VM_PAGE_THROTTLED() ||
1412                             (m->fictitious && !vm_page_convert(m))) {
1413                                 VM_PAGE_FREE(m);
1414                                 vm_fault_cleanup(object, VM_PAGE_NULL);
1415                                 thread_interrupt_level(interruptible_state);
1416                                 return(VM_FAULT_MEMORY_SHORTAGE);
1417                         }
1418                         m->no_isync = FALSE;
1419
1420                         if (!no_zero_fill) {
1421                                 vm_object_unlock(object);
1422                                 vm_page_zero_fill(m);
1423                                 vm_object_lock(object);
1424                         }
1425                         if (type_of_fault)
1426                                 *type_of_fault = DBG_ZERO_FILL_FAULT;
1427                         VM_STAT(zero_fill_count++);
1428
1429                         if (bumped_pagein == TRUE) {
1430                                 VM_STAT(pageins--);
1431                                 current_task()->pageins--;
1432                         }
1433
1434                         vm_page_lock_queues();
1435                         VM_PAGE_QUEUES_REMOVE(m);
1436                         if(m->object->size > 0x80000) {
1437                                 m->zero_fill = TRUE;
1438                                 /* depends on the queues lock */
1439                                 vm_zf_count += 1;
1440                                 queue_enter(&vm_page_queue_zf,
1441                                         m, vm_page_t, pageq);
1442                         } else {
1443                                 queue_enter(
1444                                         &vm_page_queue_inactive,
1445                                         m, vm_page_t, pageq);
1446                         }
1447                         m->page_ticket = vm_page_ticket;
1448                         vm_page_ticket_roll++;
1449                         if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1450                                 vm_page_ticket_roll = 0;
1451                                 if(vm_page_ticket ==
1452                                         VM_PAGE_TICKET_ROLL_IDS)
1453                                         vm_page_ticket= 0;
1454                                 else
1455                                         vm_page_ticket++;
1456                         }
1457                         m->inactive = TRUE;
1458                         vm_page_inactive_count++;
1459                         vm_page_unlock_queues();
1460 #if 0
1461                         pmap_clear_modify(m->phys_page);
1462 #endif
1463                         break;
1464                 }
1465                 else {
1466                         if ((object != first_object) || must_be_resident)
1467                                 vm_object_paging_end(object);
1468                         offset += object->shadow_offset;
1469                         hi_offset += object->shadow_offset;
1470                         lo_offset += object->shadow_offset;
1471                         access_required = VM_PROT_READ;
1472                         vm_object_lock(next_object);
1473                         vm_object_unlock(object);
1474                         object = next_object;
1475                         vm_object_paging_begin(object);
1476                 }
1477         }
1478
1479         /*
1480          *      PAGE HAS BEEN FOUND.
1481          *
1482          *      This page (m) is:
1483          *              busy, so that we can play with it;
1484          *              not absent, so that nobody else will fill it;
1485          *              possibly eligible for pageout;
1486          *
1487          *      The top-level page (first_m) is:
1488          *              VM_PAGE_NULL if the page was found in the
1489          *               top-level object;
1490          *              busy, not absent, and ineligible for pageout.
1491          *
1492          *      The current object (object) is locked.  A paging
1493          *      reference is held for the current and top-level
1494          *      objects.
1495          */
1496
1497 #if TRACEFAULTPAGE
1498         dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */
1499 #endif
1500 #if     EXTRA_ASSERTIONS
1501         if(m != VM_PAGE_NULL) {
1502                 assert(m->busy && !m->absent);
1503                 assert((first_m == VM_PAGE_NULL) ||
1504                         (first_m->busy && !first_m->absent &&
1505                          !first_m->active && !first_m->inactive));
1506         }
1507 #endif  /* EXTRA_ASSERTIONS */
1508
1509         XPR(XPR_VM_FAULT,
1510        "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1511                 (integer_t)object, offset, (integer_t)m,
1512                 (integer_t)first_object, (integer_t)first_m);
1513         /*
1514          *      If the page is being written, but isn't
1515          *      already owned by the top-level object,
1516          *      we have to copy it into a new page owned
1517          *      by the top-level object.
1518          */
1519
1520         if ((object != first_object) && (m != VM_PAGE_NULL)) {
1521                 /*
1522                  *      We only really need to copy if we
1523                  *      want to write it.
1524                  */
1525
1526 #if TRACEFAULTPAGE
1527                         dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1528 #endif
1529                 if (fault_type & VM_PROT_WRITE) {
1530                         vm_page_t copy_m;
1531
1532                         assert(!must_be_resident);
1533
1534                         /*
1535                          * are we protecting the system from
1536                          * backing store exhaustion.  If so
1537                          * sleep unless we are privileged.
1538                          */
1539
1540                         if(vm_backing_store_low) {
1541                                 if(!(current_task()->priv_flags
1542                                                 & VM_BACKING_STORE_PRIV)) {
1543                                         assert_wait((event_t)
1544                                                 &vm_backing_store_low,
1545                                                 THREAD_UNINT);
1546                                         RELEASE_PAGE(m);
1547                                         vm_fault_cleanup(object, first_m);
1548                                         thread_block((void (*)(void)) 0);
1549                                         thread_interrupt_level(
1550                                                 interruptible_state);
1551                                         return(VM_FAULT_RETRY);
1552                                 }
1553                         }
1554
1555                         /*
1556                          *      If we try to collapse first_object at this
1557                          *      point, we may deadlock when we try to get
1558                          *      the lock on an intermediate object (since we
1559                          *      have the bottom object locked).  We can't
1560                          *      unlock the bottom object, because the page
1561                          *      we found may move (by collapse) if we do.
1562                          *
1563                          *      Instead, we first copy the page.  Then, when
1564                          *      we have no more use for the bottom object,
1565                          *      we unlock it and try to collapse.
1566                          *
1567                          *      Note that we copy the page even if we didn't
1568                          *      need to... that's the breaks.
1569                          */
1570
1571                         /*
1572                          *      Allocate a page for the copy
1573                          */
1574                         copy_m = vm_page_grab();
1575                         if (copy_m == VM_PAGE_NULL) {
1576                                 RELEASE_PAGE(m);
1577                                 vm_fault_cleanup(object, first_m);
1578                                 thread_interrupt_level(interruptible_state);
1579                                 return(VM_FAULT_MEMORY_SHORTAGE);
1580                         }
1581
1582
1583                         XPR(XPR_VM_FAULT,
1584                             "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1585                                 (integer_t)object, offset,
1586                                 (integer_t)m, (integer_t)copy_m, 0);
1587                         vm_page_copy(m, copy_m);
1588
1589                         /*
1590                          *      If another map is truly sharing this
1591                          *      page with us, we have to flush all
1592                          *      uses of the original page, since we
1593                          *      can't distinguish those which want the
1594                          *      original from those which need the
1595                          *      new copy.
1596                          *
1597                          *      XXXO If we know that only one map has
1598                          *      access to this page, then we could
1599                          *      avoid the pmap_page_protect() call.
1600                          */
1601
1602                         vm_page_lock_queues();
1603                         assert(!m->cleaning);
1604                         pmap_page_protect(m->phys_page, VM_PROT_NONE);
1605                         vm_page_deactivate(m);
1606                         copy_m->dirty = TRUE;
1607                         /*
1608                          * Setting reference here prevents this fault from
1609                          * being counted as a (per-thread) reactivate as well
1610                          * as a copy-on-write.
1611                          */
1612                         first_m->reference = TRUE;
1613                         vm_page_unlock_queues();
1614
1615                         /*
1616                          *      We no longer need the old page or object.
1617                          */
1618
1619                         PAGE_WAKEUP_DONE(m);
1620                         vm_object_paging_end(object);
1621                         vm_object_unlock(object);
1622
1623                         if (type_of_fault)
1624                                 *type_of_fault = DBG_COW_FAULT;
1625                         VM_STAT(cow_faults++);
1626                         current_task()->cow_faults++;
1627                         object = first_object;
1628                         offset = first_offset;
1629
1630                         vm_object_lock(object);
1631                         VM_PAGE_FREE(first_m);
1632                         first_m = VM_PAGE_NULL;
1633                         assert(copy_m->busy);
1634                         vm_page_insert(copy_m, object, offset);
1635                         m = copy_m;
1636
1637                         /*
1638                          *      Now that we've gotten the copy out of the
1639                          *      way, let's try to collapse the top object.
1640                          *      But we have to play ugly games with
1641                          *      paging_in_progress to do that...
1642                          */
1643
1644                         vm_object_paging_end(object);
1645                         vm_object_collapse(object, offset);
1646                         vm_object_paging_begin(object);
1647
1648                 }
1649                 else {
1650                         *protection &= (~VM_PROT_WRITE);
1651                 }
1652         }
1653
1654         /*
1655          *      Now check whether the page needs to be pushed into the
1656          *      copy object.  The use of asymmetric copy on write for
1657          *      shared temporary objects means that we may do two copies to
1658          *      satisfy the fault; one above to get the page from a
1659          *      shadowed object, and one here to push it into the copy.
1660          */
1661
1662         while ((copy_object = first_object->copy) != VM_OBJECT_NULL &&
1663                    (m!= VM_PAGE_NULL)) {
1664                 vm_object_offset_t      copy_offset;
1665                 vm_page_t               copy_m;
1666
1667 #if TRACEFAULTPAGE
1668                 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type);    /* (TEST/DEBUG) */
1669 #endif
1670                 /*
1671                  *      If the page is being written, but hasn't been
1672                  *      copied to the copy-object, we have to copy it there.
1673                  */
1674
1675                 if ((fault_type & VM_PROT_WRITE) == 0) {
1676                         *protection &= ~VM_PROT_WRITE;
1677                         break;
1678                 }
1679
1680                 /*
1681                  *      If the page was guaranteed to be resident,
1682                  *      we must have already performed the copy.
1683                  */
1684
1685                 if (must_be_resident)
1686                         break;
1687
1688                 /*
1689                  *      Try to get the lock on the copy_object.
1690                  */
1691                 if (!vm_object_lock_try(copy_object)) {
1692                         vm_object_unlock(object);
1693
1694                         mutex_pause();  /* wait a bit */
1695
1696                         vm_object_lock(object);
1697                         continue;
1698                 }
1699
1700                 /*
1701                  *      Make another reference to the copy-object,
1702                  *      to keep it from disappearing during the
1703                  *      copy.
1704                  */
1705                 assert(copy_object->ref_count > 0);
1706                 copy_object->ref_count++;
1707                 VM_OBJ_RES_INCR(copy_object);
1708
1709                 /*
1710                  *      Does the page exist in the copy?
1711                  */
1712                 copy_offset = first_offset - copy_object->shadow_offset;
1713                 if (copy_object->size <= copy_offset)
1714                         /*
1715                          * Copy object doesn't cover this page -- do nothing.
1716                          */
1717                         ;
1718                 else if ((copy_m =
1719                         vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1720                         /* Page currently exists in the copy object */
1721                         if (copy_m->busy) {
1722                                 /*
1723                                  *      If the page is being brought
1724                                  *      in, wait for it and then retry.
1725                                  */
1726                                 RELEASE_PAGE(m);
1727                                 /* take an extra ref so object won't die */
1728                                 assert(copy_object->ref_count > 0);
1729                                 copy_object->ref_count++;
1730                                 vm_object_res_reference(copy_object);
1731                                 vm_object_unlock(copy_object);
1732                                 vm_fault_cleanup(object, first_m);
1733                                 counter(c_vm_fault_page_block_backoff_kernel++);
1734                                 vm_object_lock(copy_object);
1735                                 assert(copy_object->ref_count > 0);
1736                                 VM_OBJ_RES_DECR(copy_object);
1737                                 copy_object->ref_count--;
1738                                 assert(copy_object->ref_count > 0);
1739                                 copy_m = vm_page_lookup(copy_object, copy_offset);
1740                                 if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1741                                         PAGE_ASSERT_WAIT(copy_m, interruptible);
1742                                         vm_object_unlock(copy_object);
1743                                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1744                                         vm_object_deallocate(copy_object);
1745                                         goto backoff;
1746                                 } else {
1747                                         vm_object_unlock(copy_object);
1748                                         vm_object_deallocate(copy_object);
1749                                         thread_interrupt_level(interruptible_state);
1750                                         return VM_FAULT_RETRY;
1751                                 }
1752                         }
1753                 }
1754                 else if (!PAGED_OUT(copy_object, copy_offset)) {
1755                         /*
1756                          * If PAGED_OUT is TRUE, then the page used to exist
1757                          * in the copy-object, and has already been paged out.
1758                          * We don't need to repeat this. If PAGED_OUT is
1759                          * FALSE, then either we don't know (!pager_created,
1760                          * for example) or it hasn't been paged out.
1761                          * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1762                          * We must copy the page to the copy object.
1763                          */
1764
1765                         /*
1766                          * are we protecting the system from
1767                          * backing store exhaustion.  If so
1768                          * sleep unless we are privileged.
1769                          */
1770
1771                         if(vm_backing_store_low) {
1772                                 if(!(current_task()->priv_flags
1773                                                 & VM_BACKING_STORE_PRIV)) {
1774                                         assert_wait((event_t)
1775                                                 &vm_backing_store_low,
1776                                                 THREAD_UNINT);
1777                                         RELEASE_PAGE(m);
1778                                         VM_OBJ_RES_DECR(copy_object);
1779                                         copy_object->ref_count--;
1780                                         assert(copy_object->ref_count > 0);
1781                                         vm_object_unlock(copy_object);
1782                                         vm_fault_cleanup(object, first_m);
1783                                         thread_block((void (*)(void)) 0);
1784                                         thread_interrupt_level(
1785                                                 interruptible_state);
1786                                         return(VM_FAULT_RETRY);
1787                                 }
1788                         }
1789
1790                         /*
1791                          *      Allocate a page for the copy
1792                          */
1793                         copy_m = vm_page_alloc(copy_object, copy_offset);
1794                         if (copy_m == VM_PAGE_NULL) {
1795                                 RELEASE_PAGE(m);
1796                                 VM_OBJ_RES_DECR(copy_object);
1797                                 copy_object->ref_count--;
1798                                 assert(copy_object->ref_count > 0);
1799                                 vm_object_unlock(copy_object);
1800                                 vm_fault_cleanup(object, first_m);
1801                                 thread_interrupt_level(interruptible_state);
1802                                 return(VM_FAULT_MEMORY_SHORTAGE);
1803                         }
1804
1805                         /*
1806                          *      Must copy page into copy-object.
1807                          */
1808
1809                         vm_page_copy(m, copy_m);
1810
1811                         /*
1812                          *      If the old page was in use by any users
1813                          *      of the copy-object, it must be removed
1814                          *      from all pmaps.  (We can't know which
1815                          *      pmaps use it.)
1816                          */
1817
1818                         vm_page_lock_queues();
1819                         assert(!m->cleaning);
1820                         pmap_page_protect(m->phys_page, VM_PROT_NONE);
1821                         copy_m->dirty = TRUE;
1822                         vm_page_unlock_queues();
1823
1824                         /*
1825                          *      If there's a pager, then immediately
1826                          *      page out this page, using the "initialize"
1827                          *      option.  Else, we use the copy.
1828                          */
1829
1830                         if
1831 #if     MACH_PAGEMAP
1832                           ((!copy_object->pager_created) ||
1833                                 vm_external_state_get(
1834                                         copy_object->existence_map, copy_offset)
1835                                 == VM_EXTERNAL_STATE_ABSENT)
1836 #else
1837                           (!copy_object->pager_created)
1838 #endif
1839                                 {
1840                                 vm_page_lock_queues();
1841                                 vm_page_activate(copy_m);
1842                                 vm_page_unlock_queues();
1843                                 PAGE_WAKEUP_DONE(copy_m);
1844                         }
1845                         else {
1846                                 assert(copy_m->busy == TRUE);
1847
1848                                 /*
1849                                  *      The page is already ready for pageout:
1850                                  *      not on pageout queues and busy.
1851                                  *      Unlock everything except the
1852                                  *      copy_object itself.
1853                                  */
1854
1855                                 vm_object_unlock(object);
1856
1857                                 /*
1858                                  *      Write the page to the copy-object,
1859                                  *      flushing it from the kernel.
1860                                  */
1861
1862                                 vm_pageout_initialize_page(copy_m);
1863
1864                                 /*
1865                                  *      Since the pageout may have
1866                                  *      temporarily dropped the
1867                                  *      copy_object's lock, we
1868                                  *      check whether we'll have
1869                                  *      to deallocate the hard way.
1870                                  */
1871
1872                                 if ((copy_object->shadow != object) ||
1873                                     (copy_object->ref_count == 1)) {
1874                                         vm_object_unlock(copy_object);
1875                                         vm_object_deallocate(copy_object);
1876                                         vm_object_lock(object);
1877                                         continue;
1878                                 }
1879
1880                                 /*
1881                                  *      Pick back up the old object's
1882                                  *      lock.  [It is safe to do so,
1883                                  *      since it must be deeper in the
1884                                  *      object tree.]
1885                                  */
1886
1887                                 vm_object_lock(object);
1888                         }
1889
1890                         /*
1891                          *      Because we're pushing a page upward
1892                          *      in the object tree, we must restart
1893                          *      any faults that are waiting here.
1894                          *      [Note that this is an expansion of
1895                          *      PAGE_WAKEUP that uses the THREAD_RESTART
1896                          *      wait result].  Can't turn off the page's
1897                          *      busy bit because we're not done with it.
1898                          */
1899
1900                         if (m->wanted) {
1901                                 m->wanted = FALSE;
1902                                 thread_wakeup_with_result((event_t) m,
1903                                         THREAD_RESTART);
1904                         }
1905                 }
1906
1907                 /*
1908                  *      The reference count on copy_object must be
1909                  *      at least 2: one for our extra reference,
1910                  *      and at least one from the outside world
1911                  *      (we checked that when we last locked
1912                  *      copy_object).
1913                  */
1914                 copy_object->ref_count--;
1915                 assert(copy_object->ref_count > 0);
1916                 VM_OBJ_RES_DECR(copy_object);
1917                 vm_object_unlock(copy_object);
1918
1919                 break;
1920         }
1921
1922         *result_page = m;
1923         *top_page = first_m;
1924
1925         XPR(XPR_VM_FAULT,
1926                 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1927                 (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1928         /*
1929          *      If the page can be written, assume that it will be.
1930          *      [Earlier, we restrict the permission to allow write
1931          *      access only if the fault so required, so we don't
1932          *      mark read-only data as dirty.]
1933          */
1934
1935
1936         if(m != VM_PAGE_NULL) {
1937 #if     !VM_FAULT_STATIC_CONFIG
1938                 if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1939                         m->dirty = TRUE;
1940 #endif
1941                 if (vm_page_deactivate_behind)
1942                         vm_fault_deactivate_behind(object, offset, behavior);
1943         } else {
1944                 vm_object_unlock(object);
1945         }
1946         thread_interrupt_level(interruptible_state);
1947
1948 #if TRACEFAULTPAGE
1949         dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0);       /* (TEST/DEBUG) */
1950 #endif
1951         return(VM_FAULT_SUCCESS);
1952
1953 #if 0
1954     block_and_backoff:
1955         vm_fault_cleanup(object, first_m);
1956
1957         counter(c_vm_fault_page_block_backoff_kernel++);
1958         thread_block(THREAD_CONTINUE_NULL);
1959 #endif
1960
1961     backoff:
1962         thread_interrupt_level(interruptible_state);
1963         if (wait_result == THREAD_INTERRUPTED)
1964                 return VM_FAULT_INTERRUPTED;
1965         return VM_FAULT_RETRY;
1966
1967 #undef  RELEASE_PAGE
1968 }
1969
1970 /*
1971  *      Routine:        vm_fault_tws_insert
1972  *      Purpose:
1973  *              Add fault information to the task working set.
1974  *      Implementation:
1975  *              We always insert the base object/offset pair
1976  *              rather the actual object/offset.
1977  *      Assumptions:
1978  *              Map and pmap_map locked.
1979  *              Object locked and referenced.
1980  *      Returns:
1981  *              TRUE if startup file should be written.
1982  *              With object locked and still referenced.
1983  *              But we may drop the object lock temporarily.
1984  */
1985 static boolean_t
1986 vm_fault_tws_insert(
1987         vm_map_t map,
1988         vm_map_t pmap_map,
1989         vm_offset_t vaddr,
1990         vm_object_t object,
1991         vm_object_offset_t offset)
1992 {
1993         tws_hash_line_t line;
1994         task_t          task;
1995         kern_return_t   kr;
1996         boolean_t       result = FALSE;
1997         extern vm_map_t kalloc_map;
1998
1999         /* Avoid possible map lock deadlock issues */
2000         if (map == kernel_map || map == kalloc_map ||
2001             pmap_map == kernel_map || pmap_map == kalloc_map)
2002                 return result;
2003
2004         task = current_task();
2005         if (task->dynamic_working_set != 0) {
2006                 vm_object_t     base_object;
2007                 vm_object_t     base_shadow;
2008                 vm_object_offset_t base_offset;
2009                 base_object = object;
2010                 base_offset = offset;
2011                 while(base_shadow = base_object->shadow) {
2012                         vm_object_lock(base_shadow);
2013                         vm_object_unlock(base_object);
2014                         base_offset +=
2015                          base_object->shadow_offset;
2016                         base_object = base_shadow;
2017                 }
2018                 kr = tws_lookup((tws_hash_t)
2019                         task->dynamic_working_set,
2020                         base_offset, base_object,
2021                         &line);
2022                 if (kr == KERN_OPERATION_TIMED_OUT){
2023                         result = TRUE;
2024                         if (base_object != object) {
2025                                 vm_object_unlock(base_object);
2026                                 vm_object_lock(object);
2027                         }
2028                 } else if (kr != KERN_SUCCESS) {
2029                         if(base_object != object)
2030                                 vm_object_reference_locked(base_object);
2031                         kr = tws_insert((tws_hash_t)
2032                                    task->dynamic_working_set,
2033                                    base_offset, base_object,
2034                                    vaddr, pmap_map);
2035                         if(base_object != object) {
2036                                 vm_object_unlock(base_object);
2037                                 vm_object_deallocate(base_object);
2038                         }
2039                         if(kr == KERN_NO_SPACE) {
2040                                 if (base_object == object)
2041                                         vm_object_unlock(object);
2042                                 tws_expand_working_set(
2043                                    task->dynamic_working_set,
2044                                    TWS_HASH_LINE_COUNT,
2045                                    FALSE);
2046                                 if (base_object == object)
2047                                         vm_object_lock(object);
2048                         } else if(kr == KERN_OPERATION_TIMED_OUT) {
2049                                 result = TRUE;
2050                         }
2051                         if(base_object != object)
2052                                 vm_object_lock(object);
2053                 } else if (base_object != object) {
2054                         vm_object_unlock(base_object);
2055                         vm_object_lock(object);
2056                 }
2057         }
2058         return result;
2059 }
2060
2061 /*
2062  *      Routine:        vm_fault
2063  *      Purpose:
2064  *              Handle page faults, including pseudo-faults
2065  *              used to change the wiring status of pages.
2066  *      Returns:
2067  *              Explicit continuations have been removed.
2068  *      Implementation:
2069  *              vm_fault and vm_fault_page save mucho state
2070  *              in the moral equivalent of a closure.  The state
2071  *              structure is allocated when first entering vm_fault
2072  *              and deallocated when leaving vm_fault.
2073  */
2074
2075 kern_return_t
2076 vm_fault(
2077         vm_map_t        map,
2078         vm_offset_t     vaddr,
2079         vm_prot_t       fault_type,
2080         boolean_t       change_wiring,
2081         int             interruptible,
2082         pmap_t          caller_pmap,
2083         vm_offset_t     caller_pmap_addr)
2084 {
2085         vm_map_version_t        version;        /* Map version for verificiation */
2086         boolean_t               wired;          /* Should mapping be wired down? */
2087         vm_object_t             object;         /* Top-level object */
2088         vm_object_offset_t      offset;         /* Top-level offset */
2089         vm_prot_t               prot;           /* Protection for mapping */
2090         vm_behavior_t           behavior;       /* Expected paging behavior */
2091         vm_object_offset_t      lo_offset, hi_offset;
2092         vm_object_t             old_copy_object; /* Saved copy object */
2093         vm_page_t               result_page;    /* Result of vm_fault_page */
2094         vm_page_t               top_page;       /* Placeholder page */
2095         kern_return_t           kr;
2096
2097         register
2098         vm_page_t               m;      /* Fast access to result_page */
2099         kern_return_t           error_code;     /* page error reasons */
2100         register
2101         vm_object_t             cur_object;
2102         register
2103         vm_object_offset_t      cur_offset;
2104         vm_page_t               cur_m;
2105         vm_object_t             new_object;
2106         int                     type_of_fault;
2107         vm_map_t                pmap_map = map;
2108         vm_map_t                original_map = map;
2109         pmap_t                  pmap = NULL;
2110         boolean_t               funnel_set = FALSE;
2111         funnel_t                *curflock;
2112         thread_t                cur_thread;
2113         boolean_t               interruptible_state;
2114         unsigned int            cache_attr;
2115         int                     write_startup_file = 0;
2116         vm_prot_t               full_fault_type;
2117
2118         if (get_preemption_level() != 0)
2119                 return (KERN_FAILURE);
2120
2121         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
2122                               vaddr,
2123                               0,
2124                               0,
2125                               0,
2126                               0);
2127
2128         /* at present we do not fully check for execute permission */
2129         /* we generally treat it is read except in certain device  */
2130         /* memory settings */
2131         full_fault_type = fault_type;
2132         if(fault_type & VM_PROT_EXECUTE) {
2133                 fault_type &= ~VM_PROT_EXECUTE;
2134                 fault_type |= VM_PROT_READ;
2135         }
2136
2137         interruptible_state = thread_interrupt_level(interruptible);
2138
2139         /*
2140          * assume we will hit a page in the cache
2141          * otherwise, explicitly override with
2142          * the real fault type once we determine it
2143          */
2144         type_of_fault = DBG_CACHE_HIT_FAULT;
2145
2146         VM_STAT(faults++);
2147         current_task()->faults++;
2148
2149         /*
2150          * drop funnel if it is already held. Then restore while returning
2151          */
2152         cur_thread = current_thread();
2153
2154         if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
2155                 funnel_set = TRUE;
2156                 curflock = cur_thread->funnel_lock;
2157                 thread_funnel_set( curflock , FALSE);
2158         }
2159
2160     RetryFault: ;
2161
2162         /*
2163          *      Find the backing store object and offset into
2164          *      it to begin the search.
2165          */
2166         map = original_map;
2167         vm_map_lock_read(map);
2168         kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
2169                                 &object, &offset,
2170                                 &prot, &wired,
2171                                 &behavior, &lo_offset, &hi_offset, &pmap_map);
2172
2173         pmap = pmap_map->pmap;
2174
2175         if (kr != KERN_SUCCESS) {
2176                 vm_map_unlock_read(map);
2177                 goto done;
2178         }
2179
2180         /*
2181          *      If the page is wired, we must fault for the current protection
2182          *      value, to avoid further faults.
2183          */
2184
2185         if (wired)
2186                 fault_type = prot | VM_PROT_WRITE;
2187
2188 #if     VM_FAULT_CLASSIFY
2189         /*
2190          *      Temporary data gathering code
2191          */
2192         vm_fault_classify(object, offset, fault_type);
2193 #endif
2194         /*
2195          *      Fast fault code.  The basic idea is to do as much as
2196          *      possible while holding the map lock and object locks.
2197          *      Busy pages are not used until the object lock has to
2198          *      be dropped to do something (copy, zero fill, pmap enter).
2199          *      Similarly, paging references aren't acquired until that
2200          *      point, and object references aren't used.
2201          *
2202          *      If we can figure out what to do
2203          *      (zero fill, copy on write, pmap enter) while holding
2204          *      the locks, then it gets done.  Otherwise, we give up,
2205          *      and use the original fault path (which doesn't hold
2206          *      the map lock, and relies on busy pages).
2207          *      The give up cases include:
2208          *              - Have to talk to pager.
2209          *              - Page is busy, absent or in error.
2210          *              - Pager has locked out desired access.
2211          *              - Fault needs to be restarted.
2212          *              - Have to push page into copy object.
2213          *
2214          *      The code is an infinite loop that moves one level down
2215          *      the shadow chain each time.  cur_object and cur_offset
2216          *      refer to the current object being examined. object and offset
2217          *      are the original object from the map.  The loop is at the
2218          *      top level if and only if object and cur_object are the same.
2219          *
2220          *      Invariants:  Map lock is held throughout.  Lock is held on
2221          *              original object and cur_object (if different) when
2222          *              continuing or exiting loop.
2223          *
2224          */
2225
2226
2227         /*
2228          *      If this page is to be inserted in a copy delay object
2229          *      for writing, and if the object has a copy, then the
2230          *      copy delay strategy is implemented in the slow fault page.
2231          */
2232         if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2233             object->copy == VM_OBJECT_NULL ||
2234             (fault_type & VM_PROT_WRITE) == 0) {
2235         cur_object = object;
2236         cur_offset = offset;
2237
2238         while (TRUE) {
2239                 m = vm_page_lookup(cur_object, cur_offset);
2240                 if (m != VM_PAGE_NULL) {
2241                         if (m->busy) {
2242                                 wait_result_t   result;
2243
2244                                 if (object != cur_object)
2245                                         vm_object_unlock(object);
2246
2247                                 vm_map_unlock_read(map);
2248                                 if (pmap_map != map)
2249                                         vm_map_unlock(pmap_map);
2250
2251 #if     !VM_FAULT_STATIC_CONFIG
2252                                 if (!vm_fault_interruptible)
2253                                         interruptible = THREAD_UNINT;
2254 #endif
2255                                 result = PAGE_ASSERT_WAIT(m, interruptible);
2256
2257                                 vm_object_unlock(cur_object);
2258
2259                                 if (result == THREAD_WAITING) {
2260                                         result = thread_block(THREAD_CONTINUE_NULL);
2261
2262                                         counter(c_vm_fault_page_block_busy_kernel++);
2263                                 }
2264                                 if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2265                                         goto RetryFault;
2266
2267                                 kr = KERN_ABORTED;
2268                                 goto done;
2269                         }
2270                         if (m->unusual && (m->error || m->restart || m->private
2271                             || m->absent || (fault_type & m->page_lock))) {
2272
2273                                 /*
2274                                  *      Unusual case. Give up.
2275                                  */
2276                                 break;
2277                         }
2278
2279                         /*
2280                          *      Two cases of map in faults:
2281                          *          - At top level w/o copy object.
2282                          *          - Read fault anywhere.
2283                          *              --> must disallow write.
2284                          */
2285
2286                         if (object == cur_object &&
2287                             object->copy == VM_OBJECT_NULL)
2288                                 goto FastMapInFault;
2289
2290                         if ((fault_type & VM_PROT_WRITE) == 0) {
2291                                 boolean_t sequential;
2292
2293                                 prot &= ~VM_PROT_WRITE;
2294
2295                                 /*
2296                                  *      Set up to map the page ...
2297                                  *      mark the page busy, drop
2298                                  *      locks and take a paging reference
2299                                  *      on the object with the page.
2300                                  */
2301
2302                                 if (object != cur_object) {
2303                                         vm_object_unlock(object);
2304                                         object = cur_object;
2305                                 }
2306 FastMapInFault:
2307                                 m->busy = TRUE;
2308
2309                                 vm_object_paging_begin(object);
2310
2311 FastPmapEnter:
2312                                 /*
2313                                  *      Check a couple of global reasons to
2314                                  *      be conservative about write access.
2315                                  *      Then do the pmap_enter.
2316                                  */
2317 #if     !VM_FAULT_STATIC_CONFIG
2318                                 if (vm_fault_dirty_handling
2319 #if     MACH_KDB
2320                                     || db_watchpoint_list
2321 #endif
2322                                     && (fault_type & VM_PROT_WRITE) == 0)
2323                                         prot &= ~VM_PROT_WRITE;
2324 #else   /* STATIC_CONFIG */
2325 #if     MACH_KDB
2326                                 if (db_watchpoint_list
2327                                     && (fault_type & VM_PROT_WRITE) == 0)
2328                                         prot &= ~VM_PROT_WRITE;
2329 #endif  /* MACH_KDB */
2330 #endif  /* STATIC_CONFIG */
2331                                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2332
2333                                 sequential = FALSE;
2334                                 if (m->no_isync == TRUE) {
2335                                         m->no_isync = FALSE;
2336                                         pmap_sync_caches_phys(m->phys_page);
2337                                         if (type_of_fault == DBG_CACHE_HIT_FAULT) {
2338                                                 /*
2339                                                  * found it in the cache, but this
2340                                                  * is the first fault-in of the page (no_isync == TRUE)
2341                                                  * so it must have come in as part of
2342                                                  * a cluster... account 1 pagein against it
2343                                                  */
2344                                                 VM_STAT(pageins++);
2345                                                 current_task()->pageins++;
2346                                                 type_of_fault = DBG_PAGEIN_FAULT;
2347                                                 sequential = TRUE;
2348                                         }
2349                                 } else if (cache_attr != VM_WIMG_DEFAULT) {
2350                                         pmap_sync_caches_phys(m->phys_page);
2351                                 }
2352
2353                                 if(caller_pmap) {
2354                                         PMAP_ENTER(caller_pmap,
2355                                                 caller_pmap_addr, m,
2356                                                 prot, cache_attr, wired);
2357                                 } else {
2358                                         PMAP_ENTER(pmap, vaddr, m,
2359                                                 prot, cache_attr, wired);
2360                                 }
2361
2362                                 /*
2363                                  *      Hold queues lock to manipulate
2364                                  *      the page queues.  Change wiring
2365                                  *      case is obvious.  In soft ref bits
2366                                  *      case activate page only if it fell
2367                                  *      off paging queues, otherwise just
2368                                  *      activate it if it's inactive.
2369                                  *
2370                                  *      NOTE: original vm_fault code will
2371                                  *      move active page to back of active
2372                                  *      queue.  This code doesn't.
2373                                  */
2374                                 vm_page_lock_queues();
2375                                 if (m->clustered) {
2376                                         vm_pagein_cluster_used++;
2377                                         m->clustered = FALSE;
2378                                 }
2379                                 m->reference = TRUE;
2380
2381                                 if (change_wiring) {
2382                                         if (wired)
2383                                                 vm_page_wire(m);
2384                                         else
2385                                                 vm_page_unwire(m);
2386                                 }
2387 #if VM_FAULT_STATIC_CONFIG
2388                                 else {
2389                                         if (!m->active && !m->inactive)
2390                                                 vm_page_activate(m);
2391                                 }
2392 #else
2393                                 else if (software_reference_bits) {
2394                                         if (!m->active && !m->inactive)
2395                                                 vm_page_activate(m);
2396                                 }
2397                                 else if (!m->active) {
2398                                         vm_page_activate(m);
2399                                 }
2400 #endif
2401                                 vm_page_unlock_queues();
2402
2403                                 /*
2404                                  *      That's it, clean up and return.
2405                                  */
2406                                 PAGE_WAKEUP_DONE(m);
2407
2408                                 sequential = (sequential && vm_page_deactivate_behind) ?
2409                                         vm_fault_deactivate_behind(object, cur_offset, behavior) :
2410                                         FALSE;
2411
2412                                 /*
2413                                  * Add non-sequential pages to the working set.
2414                                  * The sequential pages will be brought in through
2415                                  * normal clustering behavior.
2416                                  */
2417                                 if (!sequential && !object->private) {
2418                                         write_startup_file =
2419                                                 vm_fault_tws_insert(map, pmap_map, vaddr,
2420                                                                 object, cur_offset);
2421                                 }
2422
2423                                 vm_object_paging_end(object);
2424                                 vm_object_unlock(object);
2425
2426                                 vm_map_unlock_read(map);
2427                                 if(pmap_map != map)
2428                                         vm_map_unlock(pmap_map);
2429
2430                                 if(write_startup_file)
2431                                         tws_send_startup_info(current_task());
2432
2433                                 if (funnel_set)
2434                                         thread_funnel_set( curflock, TRUE);
2435
2436                                 thread_interrupt_level(interruptible_state);
2437
2438
2439                                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2440                                                       vaddr,
2441                                                       type_of_fault & 0xff,
2442                                                       KERN_SUCCESS,
2443                                                       type_of_fault >> 8,
2444                                                       0);
2445
2446                                 return KERN_SUCCESS;
2447                         }
2448
2449                         /*
2450                          *      Copy on write fault.  If objects match, then
2451                          *      object->copy must not be NULL (else control
2452                          *      would be in previous code block), and we
2453                          *      have a potential push into the copy object
2454                          *      with which we won't cope here.
2455                          */
2456
2457                         if (cur_object == object)
2458                                 break;
2459                         /*
2460                          *      This is now a shadow based copy on write
2461                          *      fault -- it requires a copy up the shadow
2462                          *      chain.
2463                          *
2464                          *      Allocate a page in the original top level
2465                          *      object. Give up if allocate fails.  Also
2466                          *      need to remember current page, as it's the
2467                          *      source of the copy.
2468                          */
2469                         cur_m = m;
2470                         m = vm_page_grab();
2471                         if (m == VM_PAGE_NULL) {
2472                                 break;
2473                         }
2474                         /*
2475                          *      Now do the copy.  Mark the source busy
2476                          *      and take out paging references on both
2477                          *      objects.
2478                          *
2479                          *      NOTE: This code holds the map lock across
2480                          *      the page copy.
2481                          */
2482
2483                         cur_m->busy = TRUE;
2484                         vm_page_copy(cur_m, m);
2485                         vm_page_insert(m, object, offset);
2486
2487                         vm_object_paging_begin(cur_object);
2488                         vm_object_paging_begin(object);
2489
2490                         type_of_fault = DBG_COW_FAULT;
2491                         VM_STAT(cow_faults++);
2492                         current_task()->cow_faults++;
2493
2494                         /*
2495                          *      Now cope with the source page and object
2496                          *      If the top object has a ref count of 1
2497                          *      then no other map can access it, and hence
2498                          *      it's not necessary to do the pmap_page_protect.
2499                          */
2500
2501
2502                         vm_page_lock_queues();
2503                         vm_page_deactivate(cur_m);
2504                         m->dirty = TRUE;
2505                         pmap_page_protect(cur_m->phys_page,
2506                                                   VM_PROT_NONE);
2507                         vm_page_unlock_queues();
2508
2509                         PAGE_WAKEUP_DONE(cur_m);
2510                         vm_object_paging_end(cur_object);
2511                         vm_object_unlock(cur_object);
2512
2513                         /*
2514                          *      Slight hack to call vm_object collapse
2515                          *      and then reuse common map in code.
2516                          *      note that the object lock was taken above.
2517                          */
2518
2519                         vm_object_paging_end(object);
2520                         vm_object_collapse(object, offset);
2521                         vm_object_paging_begin(object);
2522
2523                         goto FastPmapEnter;
2524                 }
2525                 else {
2526
2527                         /*
2528                          *      No page at cur_object, cur_offset
2529                          */
2530
2531                         if (cur_object->pager_created) {
2532
2533                                 /*
2534                                  *      Have to talk to the pager.  Give up.
2535                                  */
2536                                 break;
2537                         }
2538
2539
2540                         if (cur_object->shadow == VM_OBJECT_NULL) {
2541
2542                                 if (cur_object->shadow_severed) {
2543                                         vm_object_paging_end(object);
2544                                         vm_object_unlock(object);
2545                                         vm_map_unlock_read(map);
2546                                         if(pmap_map != map)
2547                                                 vm_map_unlock(pmap_map);
2548
2549                                         if(write_startup_file)
2550                                                 tws_send_startup_info(
2551                                                                 current_task());
2552
2553                                         if (funnel_set) {
2554                                                 thread_funnel_set( curflock, TRUE);
2555                                                 funnel_set = FALSE;
2556                                         }
2557                                         thread_interrupt_level(interruptible_state);
2558
2559                                         return VM_FAULT_MEMORY_ERROR;
2560                                 }
2561
2562                                 /*
2563                                  *      Zero fill fault.  Page gets
2564                                  *      filled in top object. Insert
2565                                  *      page, then drop any lower lock.
2566                                  *      Give up if no page.
2567                                  */
2568                                 if (VM_PAGE_THROTTLED()) {
2569                                         break;
2570                                 }
2571
2572                                 /*
2573                                  * are we protecting the system from
2574                                  * backing store exhaustion.  If so
2575                                  * sleep unless we are privileged.
2576                                  */
2577                                 if(vm_backing_store_low) {
2578                                         if(!(current_task()->priv_flags
2579                                                 & VM_BACKING_STORE_PRIV))
2580                                         break;
2581                                 }
2582                                 m = vm_page_alloc(object, offset);
2583                                 if (m == VM_PAGE_NULL) {
2584                                         break;
2585                                 }
2586                                 /*
2587                                  * This is a zero-fill or initial fill
2588                                  * page fault.  As such, we consider it
2589                                  * undefined with respect to instruction
2590                                  * execution.  i.e. it is the responsibility
2591                                  * of higher layers to call for an instruction
2592                                  * sync after changing the contents and before
2593                                  * sending a program into this area.  We
2594                                  * choose this approach for performance
2595                                  */
2596
2597                                 m->no_isync = FALSE;
2598
2599                                 if (cur_object != object)
2600                                         vm_object_unlock(cur_object);
2601
2602                                 vm_object_paging_begin(object);
2603                                 vm_object_unlock(object);
2604
2605                                 /*
2606                                  *      Now zero fill page and map it.
2607                                  *      the page is probably going to
2608                                  *      be written soon, so don't bother
2609                                  *      to clear the modified bit
2610                                  *
2611                                  *      NOTE: This code holds the map
2612                                  *      lock across the zero fill.
2613                                  */
2614
2615                                 if (!map->no_zero_fill) {
2616                                         vm_page_zero_fill(m);
2617                                         type_of_fault = DBG_ZERO_FILL_FAULT;
2618                                         VM_STAT(zero_fill_count++);
2619                                 }
2620                                 vm_page_lock_queues();
2621                                 VM_PAGE_QUEUES_REMOVE(m);
2622
2623                                 m->page_ticket = vm_page_ticket;
2624                                 if(m->object->size > 0x80000) {
2625                                         m->zero_fill = TRUE;
2626                                         /* depends on the queues lock */
2627                                         vm_zf_count += 1;
2628                                         queue_enter(&vm_page_queue_zf,
2629                                                 m, vm_page_t, pageq);
2630                                 } else {
2631                                         queue_enter(
2632                                                 &vm_page_queue_inactive,
2633                                                 m, vm_page_t, pageq);
2634                                 }
2635                                 vm_page_ticket_roll++;
2636                                 if(vm_page_ticket_roll ==
2637                                                 VM_PAGE_TICKETS_IN_ROLL) {
2638                                         vm_page_ticket_roll = 0;
2639                                         if(vm_page_ticket ==
2640                                                 VM_PAGE_TICKET_ROLL_IDS)
2641                                                 vm_page_ticket= 0;
2642                                         else
2643                                                 vm_page_ticket++;
2644                                 }
2645
2646                                 m->inactive = TRUE;
2647                                 vm_page_inactive_count++;
2648                                 vm_page_unlock_queues();
2649                                 vm_object_lock(object);
2650
2651                                 goto FastPmapEnter;
2652                         }
2653
2654                         /*
2655                          *      On to the next level
2656                          */
2657
2658                         cur_offset += cur_object->shadow_offset;
2659                         new_object = cur_object->shadow;
2660                         vm_object_lock(new_object);
2661                         if (cur_object != object)
2662                                 vm_object_unlock(cur_object);
2663                         cur_object = new_object;
2664
2665                         continue;
2666                 }
2667         }
2668
2669         /*
2670          *      Cleanup from fast fault failure.  Drop any object
2671          *      lock other than original and drop map lock.
2672          */
2673
2674         if (object != cur_object)
2675                 vm_object_unlock(cur_object);
2676         }
2677         vm_map_unlock_read(map);
2678
2679         if(pmap_map != map)
2680                 vm_map_unlock(pmap_map);
2681
2682         /*
2683          *      Make a reference to this object to
2684          *      prevent its disposal while we are messing with
2685          *      it.  Once we have the reference, the map is free
2686          *      to be diddled.  Since objects reference their
2687          *      shadows (and copies), they will stay around as well.
2688          */
2689
2690         assert(object->ref_count > 0);
2691         object->ref_count++;
2692         vm_object_res_reference(object);
2693         vm_object_paging_begin(object);
2694
2695         XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2696
2697         if (!object->private) {
2698                 write_startup_file =
2699                         vm_fault_tws_insert(map, pmap_map, vaddr, object, offset);
2700         }
2701
2702         kr = vm_fault_page(object, offset, fault_type,
2703                            (change_wiring && !wired),
2704                            interruptible,
2705                            lo_offset, hi_offset, behavior,
2706                            &prot, &result_page, &top_page,
2707                            &type_of_fault,
2708                            &error_code, map->no_zero_fill, FALSE, map, vaddr);
2709
2710         /*
2711          *      If we didn't succeed, lose the object reference immediately.
2712          */
2713
2714         if (kr != VM_FAULT_SUCCESS)
2715                 vm_object_deallocate(object);
2716
2717         /*
2718          *      See why we failed, and take corrective action.
2719          */
2720
2721         switch (kr) {
2722                 case VM_FAULT_SUCCESS:
2723                         break;
2724                 case VM_FAULT_MEMORY_SHORTAGE:
2725                         if (vm_page_wait((change_wiring) ?
2726                                          THREAD_UNINT :
2727                                          THREAD_ABORTSAFE))
2728                                 goto RetryFault;
2729                         /* fall thru */
2730                 case VM_FAULT_INTERRUPTED:
2731                         kr = KERN_ABORTED;
2732                         goto done;
2733                 case VM_FAULT_RETRY:
2734                         goto RetryFault;
2735                 case VM_FAULT_FICTITIOUS_SHORTAGE:
2736                         vm_page_more_fictitious();
2737                         goto RetryFault;
2738                 case VM_FAULT_MEMORY_ERROR:
2739                         if (error_code)
2740                                 kr = error_code;
2741                         else
2742                                 kr = KERN_MEMORY_ERROR;
2743                         goto done;
2744         }
2745
2746         m = result_page;
2747
2748         if(m != VM_PAGE_NULL) {
2749                 assert((change_wiring && !wired) ?
2750                     (top_page == VM_PAGE_NULL) :
2751                     ((top_page == VM_PAGE_NULL) == (m->object == object)));
2752         }
2753
2754         /*
2755          *      How to clean up the result of vm_fault_page.  This
2756          *      happens whether the mapping is entered or not.
2757          */
2758
2759 #define UNLOCK_AND_DEALLOCATE                           \
2760         MACRO_BEGIN                                     \
2761         vm_fault_cleanup(m->object, top_page);          \
2762         vm_object_deallocate(object);                   \
2763         MACRO_END
2764
2765         /*
2766          *      What to do with the resulting page from vm_fault_page
2767          *      if it doesn't get entered into the physical map:
2768          */
2769
2770 #define RELEASE_PAGE(m)                                 \
2771         MACRO_BEGIN                                     \
2772         PAGE_WAKEUP_DONE(m);                            \
2773         vm_page_lock_queues();                          \
2774         if (!m->active && !m->inactive)                 \
2775                 vm_page_activate(m);                    \
2776         vm_page_unlock_queues();                        \
2777         MACRO_END
2778
2779         /*
2780          *      We must verify that the maps have not changed
2781          *      since our last lookup.
2782          */
2783
2784         if(m != VM_PAGE_NULL) {
2785                 old_copy_object = m->object->copy;
2786                 vm_object_unlock(m->object);
2787         } else {
2788                 old_copy_object = VM_OBJECT_NULL;
2789         }
2790         if ((map != original_map) || !vm_map_verify(map, &version)) {
2791                 vm_object_t             retry_object;
2792                 vm_object_offset_t      retry_offset;
2793                 vm_prot_t               retry_prot;
2794
2795                 /*
2796                  *      To avoid trying to write_lock the map while another
2797                  *      thread has it read_locked (in vm_map_pageable), we
2798                  *      do not try for write permission.  If the page is
2799                  *      still writable, we will get write permission.  If it
2800                  *      is not, or has been marked needs_copy, we enter the
2801                  *      mapping without write permission, and will merely
2802                  *      take another fault.
2803                  */
2804                 map = original_map;
2805                 vm_map_lock_read(map);
2806                 kr = vm_map_lookup_locked(&map, vaddr,
2807                                    fault_type & ~VM_PROT_WRITE, &version,
2808                                    &retry_object, &retry_offset, &retry_prot,
2809                                    &wired, &behavior, &lo_offset, &hi_offset,
2810                                    &pmap_map);
2811                 pmap = pmap_map->pmap;
2812
2813                 if (kr != KERN_SUCCESS) {
2814                         vm_map_unlock_read(map);
2815                         if(m != VM_PAGE_NULL) {
2816                                 vm_object_lock(m->object);
2817                                 RELEASE_PAGE(m);
2818                                 UNLOCK_AND_DEALLOCATE;
2819                         } else {
2820                                 vm_object_deallocate(object);
2821                         }
2822                         goto done;
2823                 }
2824
2825                 vm_object_unlock(retry_object);
2826                 if(m != VM_PAGE_NULL) {
2827                         vm_object_lock(m->object);
2828                 } else {
2829                         vm_object_lock(object);
2830                 }
2831
2832                 if ((retry_object != object) ||
2833                     (retry_offset != offset)) {
2834                         vm_map_unlock_read(map);
2835                         if(pmap_map != map)
2836                                 vm_map_unlock(pmap_map);
2837                         if(m != VM_PAGE_NULL) {
2838                                 RELEASE_PAGE(m);
2839                                 UNLOCK_AND_DEALLOCATE;
2840                         } else {
2841                                 vm_object_deallocate(object);
2842                         }
2843                         goto RetryFault;
2844                 }
2845
2846                 /*
2847                  *      Check whether the protection has changed or the object
2848                  *      has been copied while we left the map unlocked.
2849                  */
2850                 prot &= retry_prot;
2851                 if(m != VM_PAGE_NULL) {
2852                         vm_object_unlock(m->object);
2853                 } else {
2854                         vm_object_unlock(object);
2855                 }
2856         }
2857         if(m != VM_PAGE_NULL) {
2858                 vm_object_lock(m->object);
2859         } else {
2860                 vm_object_lock(object);
2861         }
2862
2863         /*
2864          *      If the copy object changed while the top-level object
2865          *      was unlocked, then we must take away write permission.
2866          */
2867
2868         if(m != VM_PAGE_NULL) {
2869                 if (m->object->copy != old_copy_object)
2870                         prot &= ~VM_PROT_WRITE;
2871         }
2872
2873         /*
2874          *      If we want to wire down this page, but no longer have
2875          *      adequate permissions, we must start all over.
2876          */
2877
2878         if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2879                 vm_map_verify_done(map, &version);
2880                 if(pmap_map != map)
2881                         vm_map_unlock(pmap_map);
2882                 if(m != VM_PAGE_NULL) {
2883                         RELEASE_PAGE(m);
2884                         UNLOCK_AND_DEALLOCATE;
2885                 } else {
2886                         vm_object_deallocate(object);
2887                 }
2888                 goto RetryFault;
2889         }
2890
2891         /*
2892          *      Put this page into the physical map.
2893          *      We had to do the unlock above because pmap_enter
2894          *      may cause other faults.  The page may be on
2895          *      the pageout queues.  If the pageout daemon comes
2896          *      across the page, it will remove it from the queues.
2897          */
2898         if (m != VM_PAGE_NULL) {
2899                 if (m->no_isync == TRUE) {
2900                         pmap_sync_caches_phys(m->phys_page);
2901
2902                         if (type_of_fault == DBG_CACHE_HIT_FAULT) {
2903                                 /*
2904                                  * found it in the cache, but this
2905                                  * is the first fault-in of the page (no_isync == TRUE)
2906                                  * so it must have come in as part of
2907                                  * a cluster... account 1 pagein against it
2908                                  */
2909                                  VM_STAT(pageins++);
2910                                  current_task()->pageins++;
2911
2912                                  type_of_fault = DBG_PAGEIN_FAULT;
2913                         }
2914                         m->no_isync = FALSE;
2915                 }
2916                 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2917
2918                 if(caller_pmap) {
2919                         PMAP_ENTER(caller_pmap,
2920                                         caller_pmap_addr, m,
2921                                         prot, cache_attr, wired);
2922                 } else {
2923                         PMAP_ENTER(pmap, vaddr, m,
2924                                         prot, cache_attr, wired);
2925                 }
2926
2927                 /*
2928                  * Add working set information for private objects here.
2929                  */
2930                 if (m->object->private) {
2931                         write_startup_file =
2932                                 vm_fault_tws_insert(map, pmap_map, vaddr,
2933                                             m->object, m->offset);
2934                 }
2935         } else {
2936
2937 #ifndef i386
2938                 int                     memattr;
2939                 vm_map_entry_t          entry;
2940                 vm_offset_t             laddr;
2941                 vm_offset_t             ldelta, hdelta;
2942
2943                 /*
2944                  * do a pmap block mapping from the physical address
2945                  * in the object
2946                  */
2947
2948                 /* While we do not worry about execution protection in   */
2949                 /* general, certian pages may have instruction execution */
2950                 /* disallowed.  We will check here, and if not allowed   */
2951                 /* to execute, we return with a protection failure.      */
2952
2953                 if((full_fault_type & VM_PROT_EXECUTE) &&
2954                         (pmap_canExecute((ppnum_t)
2955                                 (object->shadow_offset >> 12)) < 1)) {
2956
2957                         vm_map_verify_done(map, &version);
2958                         if(pmap_map != map)
2959                                 vm_map_unlock(pmap_map);
2960                         vm_fault_cleanup(object, top_page);
2961                         vm_object_deallocate(object);
2962                         kr = KERN_PROTECTION_FAILURE;
2963                         goto done;
2964                 }
2965
2966                 if(pmap_map != map) {
2967                         vm_map_unlock(pmap_map);
2968                 }
2969                 if (original_map != map) {
2970                         vm_map_unlock_read(map);
2971                         vm_map_lock_read(original_map);
2972                         map = original_map;
2973                 }
2974                 pmap_map = map;
2975
2976                 laddr = vaddr;
2977                 hdelta = 0xFFFFF000;
2978                 ldelta = 0xFFFFF000;
2979
2980
2981                 while(vm_map_lookup_entry(map, laddr, &entry)) {
2982                         if(ldelta > (laddr - entry->vme_start))
2983                                 ldelta = laddr - entry->vme_start;
2984                         if(hdelta > (entry->vme_end - laddr))
2985                                 hdelta = entry->vme_end - laddr;
2986                         if(entry->is_sub_map) {
2987
2988                                 laddr = (laddr - entry->vme_start)
2989                                                         + entry->offset;
2990                                 vm_map_lock_read(entry->object.sub_map);
2991                                 if(map != pmap_map)
2992                                         vm_map_unlock_read(map);
2993                                 if(entry->use_pmap) {
2994                                         vm_map_unlock_read(pmap_map);
2995                                         pmap_map = entry->object.sub_map;
2996                                 }
2997                                 map = entry->object.sub_map;
2998
2999                         } else {
3000                                 break;
3001                         }
3002                 }
3003
3004                 if(vm_map_lookup_entry(map, laddr, &entry) &&
3005                                         (entry->object.vm_object != NULL) &&
3006                                         (entry->object.vm_object == object)) {
3007
3008
3009                         if(caller_pmap) {
3010                                 /* Set up a block mapped area */
3011                                 pmap_map_block(caller_pmap,
3012                                         (addr64_t)(caller_pmap_addr - ldelta),
3013                                         (((vm_offset_t)
3014                                     (entry->object.vm_object->shadow_offset))
3015                                         + entry->offset +
3016                                         (laddr - entry->vme_start)
3017                                                         - ldelta)>>12,
3018                                 ldelta + hdelta, prot,
3019                                 (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3020                         } else {
3021                                 /* Set up a block mapped area */
3022                                 pmap_map_block(pmap_map->pmap,
3023                                    (addr64_t)(vaddr - ldelta),
3024                                    (((vm_offset_t)
3025                                     (entry->object.vm_object->shadow_offset))
3026                                        + entry->offset +
3027                                        (laddr - entry->vme_start) - ldelta)>>12,
3028                                    ldelta + hdelta, prot,
3029                                    (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3030                         }
3031                 }
3032 #else
3033 #ifdef notyet
3034                 if(caller_pmap) {
3035                         pmap_enter(caller_pmap, caller_pmap_addr,
3036                                 object->shadow_offset>>12, prot, 0, TRUE);
3037                 } else {
3038                         pmap_enter(pmap, vaddr,
3039                                 object->shadow_offset>>12, prot, 0, TRUE);
3040                 }
3041                         /* Map it in */
3042 #endif
3043 #endif
3044
3045         }
3046
3047         /*
3048          *      If the page is not wired down and isn't already
3049          *      on a pageout queue, then put it where the
3050          *      pageout daemon can find it.
3051          */
3052         if(m != VM_PAGE_NULL) {
3053                 vm_page_lock_queues();
3054
3055                 if (change_wiring) {
3056                         if (wired)
3057                                 vm_page_wire(m);
3058                         else
3059                                 vm_page_unwire(m);
3060                 }
3061 #if     VM_FAULT_STATIC_CONFIG
3062                 else {
3063                         if (!m->active && !m->inactive)
3064                                 vm_page_activate(m);
3065                         m->reference = TRUE;
3066                 }
3067 #else
3068                 else if (software_reference_bits) {
3069                         if (!m->active && !m->inactive)
3070                                 vm_page_activate(m);
3071                         m->reference = TRUE;
3072                 } else {
3073                         vm_page_activate(m);
3074                 }
3075 #endif
3076                 vm_page_unlock_queues();
3077         }
3078
3079         /*
3080          *      Unlock everything, and return
3081          */
3082
3083         vm_map_verify_done(map, &version);
3084         if(pmap_map != map)
3085                 vm_map_unlock(pmap_map);
3086         if(m != VM_PAGE_NULL) {
3087                 PAGE_WAKEUP_DONE(m);
3088                 UNLOCK_AND_DEALLOCATE;
3089         } else {
3090                 vm_fault_cleanup(object, top_page);
3091                 vm_object_deallocate(object);
3092         }
3093         kr = KERN_SUCCESS;
3094
3095 #undef  UNLOCK_AND_DEALLOCATE
3096 #undef  RELEASE_PAGE
3097
3098     done:
3099         if(write_startup_file)
3100                 tws_send_startup_info(current_task());
3101         if (funnel_set) {
3102                 thread_funnel_set( curflock, TRUE);
3103                 funnel_set = FALSE;
3104         }
3105         thread_interrupt_level(interruptible_state);
3106
3107         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
3108                               vaddr,
3109                               type_of_fault & 0xff,
3110                               kr,
3111                               type_of_fault >> 8,
3112                               0);
3113
3114         return(kr);
3115 }
3116
3117 /*
3118  *      vm_fault_wire:
3119  *
3120  *      Wire down a range of virtual addresses in a map.
3121  */
3122 kern_return_t
3123 vm_fault_wire(
3124         vm_map_t        map,
3125         vm_map_entry_t  entry,
3126         pmap_t          pmap,
3127         vm_offset_t     pmap_addr)
3128 {
3129
3130         register vm_offset_t    va;
3131         register vm_offset_t    end_addr = entry->vme_end;
3132         register kern_return_t  rc;
3133
3134         assert(entry->in_transition);
3135
3136         if ((entry->object.vm_object != NULL) &&
3137                         !entry->is_sub_map &&
3138                         entry->object.vm_object->phys_contiguous) {
3139                 return KERN_SUCCESS;
3140         }
3141
3142         /*
3143          *      Inform the physical mapping system that the
3144          *      range of addresses may not fault, so that
3145          *      page tables and such can be locked down as well.
3146          */
3147
3148         pmap_pageable(pmap, pmap_addr,
3149                 pmap_addr + (end_addr - entry->vme_start), FALSE);
3150
3151         /*
3152          *      We simulate a fault to get the page and enter it
3153          *      in the physical map.
3154          */
3155
3156         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3157                 if ((rc = vm_fault_wire_fast(
3158                         map, va, entry, pmap,
3159                         pmap_addr + (va - entry->vme_start)
3160                         )) != KERN_SUCCESS) {
3161                         rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
3162                                 (pmap == kernel_pmap) ?
3163                                         THREAD_UNINT : THREAD_ABORTSAFE,
3164                                 pmap, pmap_addr + (va - entry->vme_start));
3165                 }
3166
3167                 if (rc != KERN_SUCCESS) {
3168                         struct vm_map_entry     tmp_entry = *entry;
3169
3170                         /* unwire wired pages */
3171                         tmp_entry.vme_end = va;
3172                         vm_fault_unwire(map,
3173                                 &tmp_entry, FALSE, pmap, pmap_addr);
3174
3175                         return rc;
3176                 }
3177         }
3178         return KERN_SUCCESS;
3179 }
3180
3181 /*
3182  *      vm_fault_unwire:
3183  *
3184  *      Unwire a range of virtual addresses in a map.
3185  */
3186 void
3187 vm_fault_unwire(
3188         vm_map_t        map,
3189         vm_map_entry_t  entry,
3190         boolean_t       deallocate,
3191         pmap_t          pmap,
3192         vm_offset_t     pmap_addr)
3193 {
3194         register vm_offset_t    va;
3195         register vm_offset_t    end_addr = entry->vme_end;
3196         vm_object_t             object;
3197
3198         object = (entry->is_sub_map)
3199                         ? VM_OBJECT_NULL : entry->object.vm_object;
3200
3201         /*
3202          *      Since the pages are wired down, we must be able to
3203          *      get their mappings from the physical map system.
3204          */
3205
3206         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3207                 pmap_change_wiring(pmap,
3208                         pmap_addr + (va - entry->vme_start), FALSE);
3209
3210                 if (object == VM_OBJECT_NULL) {
3211                         (void) vm_fault(map, va, VM_PROT_NONE,
3212                                         TRUE, THREAD_UNINT, pmap, pmap_addr);
3213                 } else if (object->phys_contiguous) {
3214                         continue;
3215                 } else {
3216                         vm_prot_t       prot;
3217                         vm_page_t       result_page;
3218                         vm_page_t       top_page;
3219                         vm_object_t     result_object;
3220                         vm_fault_return_t result;
3221
3222                         do {
3223                                 prot = VM_PROT_NONE;
3224
3225                                 vm_object_lock(object);
3226                                 vm_object_paging_begin(object);
3227                                 XPR(XPR_VM_FAULT,
3228                                         "vm_fault_unwire -> vm_fault_page\n",
3229                                         0,0,0,0,0);
3230                                 result = vm_fault_page(object,
3231                                                 entry->offset +
3232                                                   (va - entry->vme_start),
3233                                                 VM_PROT_NONE, TRUE,
3234                                                 THREAD_UNINT,
3235                                                 entry->offset,
3236                                                 entry->offset +
3237                                                        (entry->vme_end
3238                                                         - entry->vme_start),
3239                                                 entry->behavior,
3240                                                 &prot,
3241                                                 &result_page,
3242                                                 &top_page,
3243                                                 (int *)0,
3244                                                 0, map->no_zero_fill,
3245                                                 FALSE, NULL, 0);
3246                         } while (result == VM_FAULT_RETRY);
3247
3248                         if (result != VM_FAULT_SUCCESS)
3249                                 panic("vm_fault_unwire: failure");
3250
3251                         result_object = result_page->object;
3252                         if (deallocate) {
3253                                 assert(!result_page->fictitious);
3254                                 pmap_page_protect(result_page->phys_page,
3255                                                 VM_PROT_NONE);
3256                                 VM_PAGE_FREE(result_page);
3257                         } else {
3258                                 vm_page_lock_queues();
3259                                 vm_page_unwire(result_page);
3260                                 vm_page_unlock_queues();
3261                                 PAGE_WAKEUP_DONE(result_page);
3262                         }
3263
3264                         vm_fault_cleanup(result_object, top_page);
3265                 }
3266         }
3267
3268         /*
3269          *      Inform the physical mapping system that the range
3270          *      of addresses may fault, so that page tables and
3271          *      such may be unwired themselves.
3272          */
3273
3274         pmap_pageable(pmap, pmap_addr,
3275                 pmap_addr + (end_addr - entry->vme_start), TRUE);
3276
3277 }
3278
3279 /*
3280  *      vm_fault_wire_fast:
3281  *
3282  *      Handle common case of a wire down page fault at the given address.
3283  *      If successful, the page is inserted into the associated physical map.
3284  *      The map entry is passed in to avoid the overhead of a map lookup.
3285  *
3286  *      NOTE: the given address should be truncated to the
3287  *      proper page address.
3288  *
3289  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
3290  *      a standard error specifying why the fault is fatal is returned.
3291  *
3292  *      The map in question must be referenced, and remains so.
3293  *      Caller has a read lock on the map.
3294  *
3295  *      This is a stripped version of vm_fault() for wiring pages.  Anything
3296  *      other than the common case will return KERN_FAILURE, and the caller
3297  *      is expected to call vm_fault().
3298  */
3299 kern_return_t
3300 vm_fault_wire_fast(
3301         vm_map_t        map,
3302         vm_offset_t     va,
3303         vm_map_entry_t  entry,
3304         pmap_t          pmap,
3305         vm_offset_t     pmap_addr)
3306 {
3307         vm_object_t             object;
3308         vm_object_offset_t      offset;
3309         register vm_page_t      m;
3310         vm_prot_t               prot;
3311         thread_act_t            thr_act;
3312         unsigned int            cache_attr;
3313
3314         VM_STAT(faults++);
3315
3316         if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
3317           thr_act->task->faults++;
3318
3319 /*
3320  *      Recovery actions
3321  */
3322
3323 #undef  RELEASE_PAGE
3324 #define RELEASE_PAGE(m) {                               \
3325         PAGE_WAKEUP_DONE(m);                            \
3326         vm_page_lock_queues();                          \
3327         vm_page_unwire(m);                              \
3328         vm_page_unlock_queues();                        \
3329 }
3330
3331
3332 #undef  UNLOCK_THINGS
3333 #define UNLOCK_THINGS   {                               \
3334         object->paging_in_progress--;                   \
3335         vm_object_unlock(object);                       \
3336 }
3337
3338 #undef  UNLOCK_AND_DEALLOCATE
3339 #define UNLOCK_AND_DEALLOCATE   {                       \
3340         UNLOCK_THINGS;                                  \
3341         vm_object_deallocate(object);                   \
3342 }
3343 /*
3344  *      Give up and have caller do things the hard way.
3345  */
3346
3347 #define GIVE_UP {                                       \
3348         UNLOCK_AND_DEALLOCATE;                          \
3349         return(KERN_FAILURE);                           \
3350 }
3351
3352
3353         /*
3354          *      If this entry is not directly to a vm_object, bail out.
3355          */
3356         if (entry->is_sub_map)
3357                 return(KERN_FAILURE);
3358
3359         /*
3360          *      Find the backing store object and offset into it.
3361          */
3362
3363         object = entry->object.vm_object;
3364         offset = (va - entry->vme_start) + entry->offset;
3365         prot = entry->protection;
3366
3367         /*
3368          *      Make a reference to this object to prevent its
3369          *      disposal while we are messing with it.
3370          */
3371
3372         vm_object_lock(object);
3373         assert(object->ref_count > 0);
3374         object->ref_count++;
3375         vm_object_res_reference(object);
3376         object->paging_in_progress++;
3377
3378         /*
3379          *      INVARIANTS (through entire routine):
3380          *
3381          *      1)      At all times, we must either have the object
3382          *              lock or a busy page in some object to prevent
3383          *              some other thread from trying to bring in
3384          *              the same page.
3385          *
3386          *      2)      Once we have a busy page, we must remove it from
3387          *              the pageout queues, so that the pageout daemon
3388          *              will not grab it away.
3389          *
3390          */
3391
3392         /*
3393          *      Look for page in top-level object.  If it's not there or
3394          *      there's something going on, give up.
3395          */
3396         m = vm_page_lookup(object, offset);
3397         if ((m == VM_PAGE_NULL) || (m->busy) ||
3398             (m->unusual && ( m->error || m->restart || m->absent ||
3399                                 prot & m->page_lock))) {
3400
3401                 GIVE_UP;
3402         }
3403
3404         /*
3405          *      Wire the page down now.  All bail outs beyond this
3406          *      point must unwire the page.
3407          */
3408
3409         vm_page_lock_queues();
3410         vm_page_wire(m);
3411         vm_page_unlock_queues();
3412
3413         /*
3414          *      Mark page busy for other threads.
3415          */
3416         assert(!m->busy);
3417         m->busy = TRUE;
3418         assert(!m->absent);
3419
3420         /*
3421          *      Give up if the page is being written and there's a copy object
3422          */
3423         if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3424                 RELEASE_PAGE(m);
3425                 GIVE_UP;
3426         }
3427
3428         /*
3429          *      Put this page into the physical map.
3430          *      We have to unlock the object because pmap_enter
3431          *      may cause other faults.
3432          */
3433         if (m->no_isync == TRUE) {
3434                 pmap_sync_caches_phys(m->phys_page);
3435
3436                 m->no_isync = FALSE;
3437         }
3438
3439         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3440
3441         PMAP_ENTER(pmap, pmap_addr, m, prot, cache_attr, TRUE);
3442
3443         /*
3444          *      Unlock everything, and return
3445          */
3446
3447         PAGE_WAKEUP_DONE(m);
3448         UNLOCK_AND_DEALLOCATE;
3449
3450         return(KERN_SUCCESS);
3451
3452 }
3453
3454 /*
3455  *      Routine:        vm_fault_copy_cleanup
3456  *      Purpose:
3457  *              Release a page used by vm_fault_copy.
3458  */
3459
3460 void
3461 vm_fault_copy_cleanup(
3462         vm_page_t       page,
3463         vm_page_t       top_page)
3464 {
3465         vm_object_t     object = page->object;
3466
3467         vm_object_lock(object);
3468         PAGE_WAKEUP_DONE(page);
3469         vm_page_lock_queues();
3470         if (!page->active && !page->inactive)
3471                 vm_page_activate(page);
3472         vm_page_unlock_queues();
3473         vm_fault_cleanup(object, top_page);
3474 }
3475
3476 void
3477 vm_fault_copy_dst_cleanup(
3478         vm_page_t       page)
3479 {
3480         vm_object_t     object;
3481
3482         if (page != VM_PAGE_NULL) {
3483                 object = page->object;
3484                 vm_object_lock(object);
3485                 vm_page_lock_queues();
3486                 vm_page_unwire(page);
3487                 vm_page_unlock_queues();
3488                 vm_object_paging_end(object);
3489                 vm_object_unlock(object);
3490         }
3491 }
3492
3493 /*
3494  *      Routine:        vm_fault_copy
3495  *
3496  *      Purpose:
3497  *              Copy pages from one virtual memory object to another --
3498  *              neither the source nor destination pages need be resident.
3499  *
3500  *              Before actually copying a page, the version associated with
3501  *              the destination address map wil be verified.
3502  *
3503  *      In/out conditions:
3504  *              The caller must hold a reference, but not a lock, to
3505  *              each of the source and destination objects and to the
3506  *              destination map.
3507  *
3508  *      Results:
3509  *              Returns KERN_SUCCESS if no errors were encountered in
3510  *              reading or writing the data.  Returns KERN_INTERRUPTED if
3511  *              the operation was interrupted (only possible if the
3512  *              "interruptible" argument is asserted).  Other return values
3513  *              indicate a permanent error in copying the data.
3514  *
3515  *              The actual amount of data copied will be returned in the
3516  *              "copy_size" argument.  In the event that the destination map
3517  *              verification failed, this amount may be less than the amount
3518  *              requested.
3519  */
3520 kern_return_t
3521 vm_fault_copy(
3522         vm_object_t             src_object,
3523         vm_object_offset_t      src_offset,
3524         vm_size_t               *src_size,              /* INOUT */
3525         vm_object_t             dst_object,
3526         vm_object_offset_t      dst_offset,
3527         vm_map_t                dst_map,
3528         vm_map_version_t         *dst_version,
3529         int                     interruptible)
3530 {
3531         vm_page_t               result_page;
3532
3533         vm_page_t               src_page;
3534         vm_page_t               src_top_page;
3535         vm_prot_t               src_prot;
3536
3537         vm_page_t               dst_page;
3538         vm_page_t               dst_top_page;
3539         vm_prot_t               dst_prot;
3540
3541         vm_size_t               amount_left;
3542         vm_object_t             old_copy_object;
3543         kern_return_t           error = 0;
3544
3545         vm_size_t               part_size;
3546
3547         /*
3548          * In order not to confuse the clustered pageins, align
3549          * the different offsets on a page boundary.
3550          */
3551         vm_object_offset_t      src_lo_offset = trunc_page_64(src_offset);
3552         vm_object_offset_t      dst_lo_offset = trunc_page_64(dst_offset);
3553         vm_object_offset_t      src_hi_offset = round_page_64(src_offset + *src_size);
3554         vm_object_offset_t      dst_hi_offset = round_page_64(dst_offset + *src_size);
3555
3556 #define RETURN(x)                                       \
3557         MACRO_BEGIN                                     \
3558         *src_size -= amount_left;                       \
3559         MACRO_RETURN(x);                                \
3560         MACRO_END
3561
3562         amount_left = *src_size;
3563         do { /* while (amount_left > 0) */
3564                 /*
3565                  * There may be a deadlock if both source and destination
3566                  * pages are the same. To avoid this deadlock, the copy must
3567                  * start by getting the destination page in order to apply
3568                  * COW semantics if any.
3569                  */
3570
3571         RetryDestinationFault: ;
3572
3573                 dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3574
3575                 vm_object_lock(dst_object);
3576                 vm_object_paging_begin(dst_object);
3577
3578                 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3579                 switch (vm_fault_page(dst_object,
3580                                       trunc_page_64(dst_offset),
3581                                       VM_PROT_WRITE|VM_PROT_READ,
3582                                       FALSE,
3583                                       interruptible,
3584                                       dst_lo_offset,
3585                                       dst_hi_offset,
3586                                       VM_BEHAVIOR_SEQUENTIAL,
3587                                       &dst_prot,
3588                                       &dst_page,
3589                                       &dst_top_page,
3590                                       (int *)0,
3591                                       &error,
3592                                       dst_map->no_zero_fill,
3593                                       FALSE, NULL, 0)) {
3594                 case VM_FAULT_SUCCESS:
3595                         break;
3596                 case VM_FAULT_RETRY:
3597                         goto RetryDestinationFault;
3598                 case VM_FAULT_MEMORY_SHORTAGE:
3599                         if (vm_page_wait(interruptible))
3600                                 goto RetryDestinationFault;
3601                         /* fall thru */
3602                 case VM_FAULT_INTERRUPTED:
3603                         RETURN(MACH_SEND_INTERRUPTED);
3604                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3605                         vm_page_more_fictitious();
3606                         goto RetryDestinationFault;
3607                 case VM_FAULT_MEMORY_ERROR:
3608                         if (error)
3609                                 return (error);
3610                         else
3611                                 return(KERN_MEMORY_ERROR);
3612                 }
3613                 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3614
3615                 old_copy_object = dst_page->object->copy;
3616
3617                 /*
3618                  * There exists the possiblity that the source and
3619                  * destination page are the same.  But we can't
3620                  * easily determine that now.  If they are the
3621                  * same, the call to vm_fault_page() for the
3622                  * destination page will deadlock.  To prevent this we
3623                  * wire the page so we can drop busy without having
3624                  * the page daemon steal the page.  We clean up the
3625                  * top page  but keep the paging reference on the object
3626                  * holding the dest page so it doesn't go away.
3627                  */
3628
3629                 vm_page_lock_queues();
3630                 vm_page_wire(dst_page);
3631                 vm_page_unlock_queues();
3632                 PAGE_WAKEUP_DONE(dst_page);
3633                 vm_object_unlock(dst_page->object);
3634
3635                 if (dst_top_page != VM_PAGE_NULL) {
3636                         vm_object_lock(dst_object);
3637                         VM_PAGE_FREE(dst_top_page);
3638                         vm_object_paging_end(dst_object);
3639                         vm_object_unlock(dst_object);
3640                 }
3641
3642         RetrySourceFault: ;
3643
3644                 if (src_object == VM_OBJECT_NULL) {
3645                         /*
3646                          *      No source object.  We will just
3647                          *      zero-fill the page in dst_object.
3648                          */
3649                         src_page = VM_PAGE_NULL;
3650                         result_page = VM_PAGE_NULL;
3651                 } else {
3652                         vm_object_lock(src_object);
3653                         src_page = vm_page_lookup(src_object,
3654                                                   trunc_page_64(src_offset));
3655                         if (src_page == dst_page) {
3656                                 src_prot = dst_prot;
3657                                 result_page = VM_PAGE_NULL;
3658                         } else {
3659                                 src_prot = VM_PROT_READ;
3660                                 vm_object_paging_begin(src_object);
3661
3662                                 XPR(XPR_VM_FAULT,
3663                                         "vm_fault_copy(2) -> vm_fault_page\n",
3664                                         0,0,0,0,0);
3665                                 switch (vm_fault_page(src_object,
3666                                                       trunc_page_64(src_offset),
3667                                                       VM_PROT_READ,
3668                                                       FALSE,
3669                                                       interruptible,
3670                                                       src_lo_offset,
3671                                                       src_hi_offset,
3672                                                       VM_BEHAVIOR_SEQUENTIAL,
3673                                                       &src_prot,
3674                                                       &result_page,
3675                                                       &src_top_page,
3676                                                       (int *)0,
3677                                                       &error,
3678                                                       FALSE,
3679                                                       FALSE, NULL, 0)) {
3680
3681                                 case VM_FAULT_SUCCESS:
3682                                         break;
3683                                 case VM_FAULT_RETRY:
3684                                         goto RetrySourceFault;
3685                                 case VM_FAULT_MEMORY_SHORTAGE:
3686                                         if (vm_page_wait(interruptible))
3687                                                 goto RetrySourceFault;
3688                                         /* fall thru */
3689                                 case VM_FAULT_INTERRUPTED:
3690                                         vm_fault_copy_dst_cleanup(dst_page);
3691                                         RETURN(MACH_SEND_INTERRUPTED);
3692                                 case VM_FAULT_FICTITIOUS_SHORTAGE:
3693                                         vm_page_more_fictitious();
3694                                         goto RetrySourceFault;
3695                                 case VM_FAULT_MEMORY_ERROR:
3696                                         vm_fault_copy_dst_cleanup(dst_page);
3697                                         if (error)
3698                                                 return (error);
3699                                         else
3700                                                 return(KERN_MEMORY_ERROR);
3701                                 }
3702
3703
3704                                 assert((src_top_page == VM_PAGE_NULL) ==
3705                                        (result_page->object == src_object));
3706                         }
3707                         assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3708                         vm_object_unlock(result_page->object);
3709                 }
3710
3711                 if (!vm_map_verify(dst_map, dst_version)) {
3712                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3713                                 vm_fault_copy_cleanup(result_page, src_top_page);
3714                         vm_fault_copy_dst_cleanup(dst_page);
3715                         break;
3716                 }
3717
3718                 vm_object_lock(dst_page->object);
3719
3720                 if (dst_page->object->copy != old_copy_object) {
3721                         vm_object_unlock(dst_page->object);
3722                         vm_map_verify_done(dst_map, dst_version);
3723                         if (result_page != VM_PAGE_NULL && src_page != dst_page)
3724                                 vm_fault_copy_cleanup(result_page, src_top_page);
3725                         vm_fault_copy_dst_cleanup(dst_page);
3726                         break;
3727                 }
3728                 vm_object_unlock(dst_page->object);
3729
3730                 /*
3731                  *      Copy the page, and note that it is dirty
3732                  *      immediately.
3733                  */
3734
3735                 if (!page_aligned(src_offset) ||
3736                         !page_aligned(dst_offset) ||
3737                         !page_aligned(amount_left)) {
3738
3739                         vm_object_offset_t      src_po,
3740                                                 dst_po;
3741
3742                         src_po = src_offset - trunc_page_64(src_offset);
3743                         dst_po = dst_offset - trunc_page_64(dst_offset);
3744
3745                         if (dst_po > src_po) {
3746                                 part_size = PAGE_SIZE - dst_po;
3747                         } else {
3748                                 part_size = PAGE_SIZE - src_po;
3749                         }
3750                         if (part_size > (amount_left)){
3751                                 part_size = amount_left;
3752                         }
3753
3754                         if (result_page == VM_PAGE_NULL) {
3755                                 vm_page_part_zero_fill(dst_page,
3756                                                         dst_po, part_size);
3757                         } else {
3758                                 vm_page_part_copy(result_page, src_po,
3759                                         dst_page, dst_po, part_size);
3760                                 if(!dst_page->dirty){
3761                                         vm_object_lock(dst_object);
3762                                         dst_page->dirty = TRUE;
3763                                         vm_object_unlock(dst_page->object);
3764                                 }
3765
3766                         }
3767                 } else {
3768                         part_size = PAGE_SIZE;
3769
3770                         if (result_page == VM_PAGE_NULL)
3771                                 vm_page_zero_fill(dst_page);
3772                         else{
3773                                 vm_page_copy(result_page, dst_page);
3774                                 if(!dst_page->dirty){
3775                                         vm_object_lock(dst_object);
3776                                         dst_page->dirty = TRUE;
3777                                         vm_object_unlock(dst_page->object);
3778                                 }
3779                         }
3780
3781                 }
3782
3783                 /*
3784                  *      Unlock everything, and return
3785                  */
3786
3787                 vm_map_verify_done(dst_map, dst_version);
3788
3789                 if (result_page != VM_PAGE_NULL && src_page != dst_page)
3790                         vm_fault_copy_cleanup(result_page, src_top_page);
3791                 vm_fault_copy_dst_cleanup(dst_page);
3792
3793                 amount_left -= part_size;
3794                 src_offset += part_size;
3795                 dst_offset += part_size;
3796         } while (amount_left > 0);
3797
3798         RETURN(KERN_SUCCESS);
3799 #undef  RETURN
3800
3801         /*NOTREACHED*/
3802 }
3803
3804 #ifdef  notdef
3805
3806 /*
3807  *      Routine:        vm_fault_page_overwrite
3808  *
3809  *      Description:
3810  *              A form of vm_fault_page that assumes that the
3811  *              resulting page will be overwritten in its entirety,
3812  *              making it unnecessary to obtain the correct *contents*
3813  *              of the page.
3814  *
3815  *      Implementation:
3816  *              XXX Untested.  Also unused.  Eventually, this technology
3817  *              could be used in vm_fault_copy() to advantage.
3818  */
3819 vm_fault_return_t
3820 vm_fault_page_overwrite(
3821         register
3822         vm_object_t             dst_object,
3823         vm_object_offset_t      dst_offset,
3824         vm_page_t               *result_page)   /* OUT */
3825 {
3826         register
3827         vm_page_t       dst_page;
3828         kern_return_t   wait_result;
3829
3830 #define interruptible   THREAD_UNINT    /* XXX */
3831
3832         while (TRUE) {
3833                 /*
3834                  *      Look for a page at this offset
3835                  */
3836
3837                 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3838                                  == VM_PAGE_NULL) {
3839                         /*
3840                          *      No page, no problem... just allocate one.
3841                          */
3842
3843                         dst_page = vm_page_alloc(dst_object, dst_offset);
3844                         if (dst_page == VM_PAGE_NULL) {
3845                                 vm_object_unlock(dst_object);
3846                                 VM_PAGE_WAIT();
3847                                 vm_object_lock(dst_object);
3848                                 continue;
3849                         }
3850
3851                         /*
3852                          *      Pretend that the memory manager
3853                          *      write-protected the page.
3854                          *
3855                          *      Note that we will be asking for write
3856                          *      permission without asking for the data
3857                          *      first.
3858                          */
3859
3860                         dst_page->overwriting = TRUE;
3861                         dst_page->page_lock = VM_PROT_WRITE;
3862                         dst_page->absent = TRUE;
3863                         dst_page->unusual = TRUE;
3864                         dst_object->absent_count++;
3865
3866                         break;
3867
3868                         /*
3869                          *      When we bail out, we might have to throw
3870                          *      away the page created here.
3871                          */
3872
3873 #define DISCARD_PAGE                                            \
3874         MACRO_BEGIN                                             \
3875         vm_object_lock(dst_object);                             \
3876         dst_page = vm_page_lookup(dst_object, dst_offset);      \
3877         if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3878                 VM_PAGE_FREE(dst_page);                         \
3879         vm_object_unlock(dst_object);                           \
3880         MACRO_END
3881                 }
3882
3883                 /*
3884                  *      If the page is write-protected...
3885                  */
3886
3887                 if (dst_page->page_lock & VM_PROT_WRITE) {
3888                         /*
3889                          *      ... and an unlock request hasn't been sent
3890                          */
3891
3892                         if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3893                                 vm_prot_t       u;
3894                                 kern_return_t   rc;
3895
3896                                 /*
3897                                  *      ... then send one now.
3898                                  */
3899
3900                                 if (!dst_object->pager_ready) {
3901                                         wait_result = vm_object_assert_wait(dst_object,
3902                                                                 VM_OBJECT_EVENT_PAGER_READY,
3903                                                                 interruptible);
3904                                         vm_object_unlock(dst_object);
3905                                         if (wait_result == THREAD_WAITING)
3906                                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
3907                                         if (wait_result != THREAD_AWAKENED) {
3908                                                 DISCARD_PAGE;
3909                                                 return(VM_FAULT_INTERRUPTED);
3910                                         }
3911                                         continue;
3912                                 }
3913
3914                                 u = dst_page->unlock_request |= VM_PROT_WRITE;
3915                                 vm_object_unlock(dst_object);
3916
3917                                 if ((rc = memory_object_data_unlock(
3918                                                 dst_object->pager,
3919                                                 dst_offset + dst_object->paging_offset,
3920                                                 PAGE_SIZE,
3921                                                 u)) != KERN_SUCCESS) {
3922                                         if (vm_fault_debug)
3923                                             printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3924                                         DISCARD_PAGE;
3925                                         return((rc == MACH_SEND_INTERRUPTED) ?
3926                                                 VM_FAULT_INTERRUPTED :
3927                                                 VM_FAULT_MEMORY_ERROR);
3928                                 }
3929                                 vm_object_lock(dst_object);
3930                                 continue;
3931                         }
3932
3933                         /* ... fall through to wait below */
3934                 } else {
3935                         /*
3936                          *      If the page isn't being used for other
3937                          *      purposes, then we're done.
3938                          */
3939                         if ( ! (dst_page->busy || dst_page->absent ||
3940                                 dst_page->error || dst_page->restart) )
3941                                 break;
3942                 }
3943
3944                 wait_result = PAGE_ASSERT_WAIT(dst_page, interruptible);
3945                 vm_object_unlock(dst_object);
3946                 if (wait_result == THREAD_WAITING)
3947                         wait_result = thread_block(THREAD_CONTINUE_NULL);
3948                 if (wait_result != THREAD_AWAKENED) {
3949                         DISCARD_PAGE;
3950                         return(VM_FAULT_INTERRUPTED);
3951                 }
3952         }
3953
3954         *result_page = dst_page;
3955         return(VM_FAULT_SUCCESS);
3956
3957 #undef  interruptible
3958 #undef  DISCARD_PAGE
3959 }
3960
3961 #endif  /* notdef */
3962
3963 #if     VM_FAULT_CLASSIFY
3964 /*
3965  *      Temporary statistics gathering support.
3966  */
3967
3968 /*
3969  *      Statistics arrays:
3970  */
3971 #define VM_FAULT_TYPES_MAX      5
3972 #define VM_FAULT_LEVEL_MAX      8
3973
3974 int     vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3975
3976 #define VM_FAULT_TYPE_ZERO_FILL 0
3977 #define VM_FAULT_TYPE_MAP_IN    1
3978 #define VM_FAULT_TYPE_PAGER     2
3979 #define VM_FAULT_TYPE_COPY      3
3980 #define VM_FAULT_TYPE_OTHER     4
3981
3982
3983 void
3984 vm_fault_classify(vm_object_t           object,
3985                   vm_object_offset_t    offset,
3986                   vm_prot_t             fault_type)
3987 {
3988         int             type, level = 0;
3989         vm_page_t       m;
3990
3991         while (TRUE) {
3992                 m = vm_page_lookup(object, offset);
3993                 if (m != VM_PAGE_NULL) {
3994                         if (m->busy || m->error || m->restart || m->absent ||
3995                             fault_type & m->page_lock) {
3996                                 type = VM_FAULT_TYPE_OTHER;
3997                                 break;
3998                         }
3999                         if (((fault_type & VM_PROT_WRITE) == 0) ||
4000                             ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4001                                 type = VM_FAULT_TYPE_MAP_IN;
4002                                 break;
4003                         }
4004                         type = VM_FAULT_TYPE_COPY;
4005                         break;
4006                 }
4007                 else {
4008                         if (object->pager_created) {
4009                                 type = VM_FAULT_TYPE_PAGER;
4010                                 break;
4011                         }
4012                         if (object->shadow == VM_OBJECT_NULL) {
4013                                 type = VM_FAULT_TYPE_ZERO_FILL;
4014                                 break;
4015                         }
4016
4017                         offset += object->shadow_offset;
4018                         object = object->shadow;
4019                         level++;
4020                         continue;
4021                 }
4022         }
4023
4024         if (level > VM_FAULT_LEVEL_MAX)
4025                 level = VM_FAULT_LEVEL_MAX;
4026
4027         vm_fault_stats[type][level] += 1;
4028
4029         return;
4030 }
4031
4032 /* cleanup routine to call from debugger */
4033
4034 void
4035 vm_fault_classify_init(void)
4036 {
4037         int type, level;
4038
4039         for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4040                 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4041                         vm_fault_stats[type][level] = 0;
4042                 }
4043         }
4044
4045         return;
4046 }
4047 #endif  /* VM_FAULT_CLASSIFY */