osfmk/vm/vm_pageout.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_pageout.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      The proverbial page-out daemon.
  64  */
  65
  66 #include <stdint.h>
  67
  68 #include <debug.h>
  69 #include <mach_pagemap.h>
  70 #include <mach_cluster_stats.h>
  71 #include <mach_kdb.h>
  72 #include <advisory_pageout.h>
  73
  74 #include <mach/mach_types.h>
  75 #include <mach/memory_object.h>
  76 #include <mach/memory_object_default.h>
  77 #include <mach/memory_object_control_server.h>
  78 #include <mach/mach_host_server.h>
  79 #include <mach/upl.h>
  80 #include <mach/vm_map.h>
  81 #include <mach/vm_param.h>
  82 #include <mach/vm_statistics.h>
  83
  84 #include <kern/kern_types.h>
  85 #include <kern/counters.h>
  86 #include <kern/host_statistics.h>
  87 #include <kern/machine.h>
  88 #include <kern/misc_protos.h>
  89 #include <kern/thread.h>
  90 #include <kern/xpr.h>
  91 #include <kern/kalloc.h>
  92
  93 #include <machine/vm_tuning.h>
  94
  95 #include <vm/pmap.h>
  96 #include <vm/vm_fault.h>
  97 #include <vm/vm_map.h>
  98 #include <vm/vm_object.h>
  99 #include <vm/vm_page.h>
 100 #include <vm/vm_pageout.h>
 101 #include <vm/vm_protos.h> /* must be last */
 102
 103 /*
 104  * ENCRYPTED SWAP:
 105  */
 106 #ifdef __ppc__
 107 #include <ppc/mappings.h>
 108 #endif /* __ppc__ */
 109 #include <../bsd/crypto/aes/aes.h>
 110
 111 extern ipc_port_t       memory_manager_default;
 112
 113
 114 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE
 115 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  10000  /* maximum iterations of the active queue to move pages to inactive */
 116 #endif
 117
 118 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE
 119 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
 120 #endif
 121
 122 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
 123 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
 124 #endif
 125
 126 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
 127 #define VM_PAGEOUT_INACTIVE_RELIEF 50   /* minimum number of pages to move to the inactive q */
 128 #endif
 129
 130 #ifndef VM_PAGE_LAUNDRY_MAX
 131 #define VM_PAGE_LAUNDRY_MAX     16UL    /* maximum pageouts on a given pageout queue */
 132 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
 133
 134 #ifndef VM_PAGEOUT_BURST_WAIT
 135 #define VM_PAGEOUT_BURST_WAIT   30      /* milliseconds per page */
 136 #endif  /* VM_PAGEOUT_BURST_WAIT */
 137
 138 #ifndef VM_PAGEOUT_EMPTY_WAIT
 139 #define VM_PAGEOUT_EMPTY_WAIT   200     /* milliseconds */
 140 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
 141
 142 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
 143 #define VM_PAGEOUT_DEADLOCK_WAIT        300     /* milliseconds */
 144 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
 145
 146 #ifndef VM_PAGEOUT_IDLE_WAIT
 147 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
 148 #endif  /* VM_PAGEOUT_IDLE_WAIT */
 149
 150
 151 /*
 152  *      To obtain a reasonable LRU approximation, the inactive queue
 153  *      needs to be large enough to give pages on it a chance to be
 154  *      referenced a second time.  This macro defines the fraction
 155  *      of active+inactive pages that should be inactive.
 156  *      The pageout daemon uses it to update vm_page_inactive_target.
 157  *
 158  *      If vm_page_free_count falls below vm_page_free_target and
 159  *      vm_page_inactive_count is below vm_page_inactive_target,
 160  *      then the pageout daemon starts running.
 161  */
 162
 163 #ifndef VM_PAGE_INACTIVE_TARGET
 164 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 3)
 165 #endif  /* VM_PAGE_INACTIVE_TARGET */
 166
 167 /*
 168  *      Once the pageout daemon starts running, it keeps going
 169  *      until vm_page_free_count meets or exceeds vm_page_free_target.
 170  */
 171
 172 #ifndef VM_PAGE_FREE_TARGET
 173 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
 174 #endif  /* VM_PAGE_FREE_TARGET */
 175
 176 /*
 177  *      The pageout daemon always starts running once vm_page_free_count
 178  *      falls below vm_page_free_min.
 179  */
 180
 181 #ifndef VM_PAGE_FREE_MIN
 182 #define VM_PAGE_FREE_MIN(free)  (10 + (free) / 100)
 183 #endif  /* VM_PAGE_FREE_MIN */
 184
 185 /*
 186  *      When vm_page_free_count falls below vm_page_free_reserved,
 187  *      only vm-privileged threads can allocate pages.  vm-privilege
 188  *      allows the pageout daemon and default pager (and any other
 189  *      associated threads needed for default pageout) to continue
 190  *      operation by dipping into the reserved pool of pages.
 191  */
 192
 193 #ifndef VM_PAGE_FREE_RESERVED
 194 #define VM_PAGE_FREE_RESERVED(n)        \
 195         ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
 196 #endif  /* VM_PAGE_FREE_RESERVED */
 197
 198
 199 /*
 200  * must hold the page queues lock to
 201  * manipulate this structure
 202  */
 203 struct vm_pageout_queue {
 204         queue_head_t    pgo_pending;    /* laundry pages to be processed by pager's iothread */
 205         unsigned int    pgo_laundry;    /* current count of laundry pages on queue or in flight */
 206         unsigned int    pgo_maxlaundry;
 207
 208         unsigned int    pgo_idle:1,     /* iothread is blocked waiting for work to do */
 209                         pgo_busy:1,     /* iothread is currently processing request from pgo_pending */
 210                         pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
 211                         :0;
 212 };
 213
 214 #define VM_PAGE_Q_THROTTLED(q)          \
 215         ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
 216
 217
 218 /*
 219  * Exported variable used to broadcast the activation of the pageout scan
 220  * Working Set uses this to throttle its use of pmap removes.  In this
 221  * way, code which runs within memory in an uncontested context does
 222  * not keep encountering soft faults.
 223  */
 224
 225 unsigned int    vm_pageout_scan_event_counter = 0;
 226
 227 /*
 228  * Forward declarations for internal routines.
 229  */
 230
 231 static void vm_pageout_garbage_collect(int);
 232 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
 233 static void vm_pageout_iothread_external(void);
 234 static void vm_pageout_iothread_internal(void);
 235 static void vm_pageout_queue_steal(vm_page_t);
 236
 237 extern void vm_pageout_continue(void);
 238 extern void vm_pageout_scan(void);
 239
 240 unsigned int vm_pageout_reserved_internal = 0;
 241 unsigned int vm_pageout_reserved_really = 0;
 242
 243 unsigned int vm_pageout_idle_wait = 0;          /* milliseconds */
 244 unsigned int vm_pageout_empty_wait = 0;         /* milliseconds */
 245 unsigned int vm_pageout_burst_wait = 0;         /* milliseconds */
 246 unsigned int vm_pageout_deadlock_wait = 0;      /* milliseconds */
 247 unsigned int vm_pageout_deadlock_relief = 0;
 248 unsigned int vm_pageout_inactive_relief = 0;
 249 unsigned int vm_pageout_burst_active_throttle = 0;
 250 unsigned int vm_pageout_burst_inactive_throttle = 0;
 251
 252 /*
 253  *      Protection against zero fill flushing live working sets derived
 254  *      from existing backing store and files
 255  */
 256 unsigned int vm_accellerate_zf_pageout_trigger = 400;
 257 unsigned int vm_zf_iterator;
 258 unsigned int vm_zf_iterator_count = 40;
 259 unsigned int last_page_zf;
 260 unsigned int vm_zf_count = 0;
 261
 262 /*
 263  *      These variables record the pageout daemon's actions:
 264  *      how many pages it looks at and what happens to those pages.
 265  *      No locking needed because only one thread modifies the variables.
 266  */
 267
 268 unsigned int vm_pageout_active = 0;             /* debugging */
 269 unsigned int vm_pageout_inactive = 0;           /* debugging */
 270 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
 271 unsigned int vm_pageout_inactive_forced = 0;    /* debugging */
 272 unsigned int vm_pageout_inactive_nolock = 0;    /* debugging */
 273 unsigned int vm_pageout_inactive_avoid = 0;     /* debugging */
 274 unsigned int vm_pageout_inactive_busy = 0;      /* debugging */
 275 unsigned int vm_pageout_inactive_absent = 0;    /* debugging */
 276 unsigned int vm_pageout_inactive_used = 0;      /* debugging */
 277 unsigned int vm_pageout_inactive_clean = 0;     /* debugging */
 278 unsigned int vm_pageout_inactive_dirty = 0;     /* debugging */
 279 unsigned int vm_pageout_dirty_no_pager = 0;     /* debugging */
 280 unsigned int vm_pageout_purged_objects = 0;     /* debugging */
 281 unsigned int vm_stat_discard = 0;               /* debugging */
 282 unsigned int vm_stat_discard_sent = 0;          /* debugging */
 283 unsigned int vm_stat_discard_failure = 0;       /* debugging */
 284 unsigned int vm_stat_discard_throttle = 0;      /* debugging */
 285
 286 unsigned int vm_pageout_scan_active_throttled = 0;
 287 unsigned int vm_pageout_scan_inactive_throttled = 0;
 288 unsigned int vm_pageout_scan_throttle = 0;                      /* debugging */
 289 unsigned int vm_pageout_scan_burst_throttle = 0;                /* debugging */
 290 unsigned int vm_pageout_scan_empty_throttle = 0;                /* debugging */
 291 unsigned int vm_pageout_scan_deadlock_detected = 0;             /* debugging */
 292 unsigned int vm_pageout_scan_active_throttle_success = 0;       /* debugging */
 293 unsigned int vm_pageout_scan_inactive_throttle_success = 0;     /* debugging */
 294 /*
 295  * Backing store throttle when BS is exhausted
 296  */
 297 unsigned int    vm_backing_store_low = 0;
 298
 299 unsigned int vm_pageout_out_of_line  = 0;
 300 unsigned int vm_pageout_in_place  = 0;
 301
 302 /*
 303  * ENCRYPTED SWAP:
 304  * counters and statistics...
 305  */
 306 unsigned long vm_page_decrypt_counter = 0;
 307 unsigned long vm_page_decrypt_for_upl_counter = 0;
 308 unsigned long vm_page_encrypt_counter = 0;
 309 unsigned long vm_page_encrypt_abort_counter = 0;
 310 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
 311 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
 312
 313
 314 struct  vm_pageout_queue vm_pageout_queue_internal;
 315 struct  vm_pageout_queue vm_pageout_queue_external;
 316
 317
 318 /*
 319  *      Routine:        vm_backing_store_disable
 320  *      Purpose:
 321  *              Suspend non-privileged threads wishing to extend
 322  *              backing store when we are low on backing store
 323  *              (Synchronized by caller)
 324  */
 325 void
 326 vm_backing_store_disable(
 327         boolean_t       disable)
 328 {
 329         if(disable) {
 330                 vm_backing_store_low = 1;
 331         } else {
 332                 if(vm_backing_store_low) {
 333                         vm_backing_store_low = 0;
 334                         thread_wakeup((event_t) &vm_backing_store_low);
 335                 }
 336         }
 337 }
 338
 339
 340 /*
 341  *      Routine:        vm_pageout_object_allocate
 342  *      Purpose:
 343  *              Allocate an object for use as out-of-line memory in a
 344  *              data_return/data_initialize message.
 345  *              The page must be in an unlocked object.
 346  *
 347  *              If the page belongs to a trusted pager, cleaning in place
 348  *              will be used, which utilizes a special "pageout object"
 349  *              containing private alias pages for the real page frames.
 350  *              Untrusted pagers use normal out-of-line memory.
 351  */
 352 vm_object_t
 353 vm_pageout_object_allocate(
 354         vm_page_t               m,
 355         vm_size_t               size,
 356         vm_object_offset_t      offset)
 357 {
 358         vm_object_t     object = m->object;
 359         vm_object_t     new_object;
 360
 361         assert(object->pager_ready);
 362
 363         new_object = vm_object_allocate(size);
 364
 365         if (object->pager_trusted) {
 366                 assert (offset < object->size);
 367
 368                 vm_object_lock(new_object);
 369                 new_object->pageout = TRUE;
 370                 new_object->shadow = object;
 371                 new_object->can_persist = FALSE;
 372                 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
 373                 new_object->shadow_offset = offset;
 374                 vm_object_unlock(new_object);
 375
 376                 /*
 377                  * Take a paging reference on the object. This will be dropped
 378                  * in vm_pageout_object_terminate()
 379                  */
 380                 vm_object_lock(object);
 381                 vm_object_paging_begin(object);
 382                 vm_page_lock_queues();
 383                 vm_page_unlock_queues();
 384                 vm_object_unlock(object);
 385
 386                 vm_pageout_in_place++;
 387         } else
 388                 vm_pageout_out_of_line++;
 389         return(new_object);
 390 }
 391
 392 #if MACH_CLUSTER_STATS
 393 unsigned long vm_pageout_cluster_dirtied = 0;
 394 unsigned long vm_pageout_cluster_cleaned = 0;
 395 unsigned long vm_pageout_cluster_collisions = 0;
 396 unsigned long vm_pageout_cluster_clusters = 0;
 397 unsigned long vm_pageout_cluster_conversions = 0;
 398 unsigned long vm_pageout_target_collisions = 0;
 399 unsigned long vm_pageout_target_page_dirtied = 0;
 400 unsigned long vm_pageout_target_page_freed = 0;
 401 #define CLUSTER_STAT(clause)    clause
 402 #else   /* MACH_CLUSTER_STATS */
 403 #define CLUSTER_STAT(clause)
 404 #endif  /* MACH_CLUSTER_STATS */
 405
 406 /*
 407  *      Routine:        vm_pageout_object_terminate
 408  *      Purpose:
 409  *              Destroy the pageout_object allocated by
 410  *              vm_pageout_object_allocate(), and perform all of the
 411  *              required cleanup actions.
 412  *
 413  *      In/Out conditions:
 414  *              The object must be locked, and will be returned locked.
 415  */
 416 void
 417 vm_pageout_object_terminate(
 418         vm_object_t     object)
 419 {
 420         vm_object_t     shadow_object;
 421         boolean_t       shadow_internal;
 422
 423         /*
 424          * Deal with the deallocation (last reference) of a pageout object
 425          * (used for cleaning-in-place) by dropping the paging references/
 426          * freeing pages in the original object.
 427          */
 428
 429         assert(object->pageout);
 430         shadow_object = object->shadow;
 431         vm_object_lock(shadow_object);
 432         shadow_internal = shadow_object->internal;
 433
 434         while (!queue_empty(&object->memq)) {
 435                 vm_page_t               p, m;
 436                 vm_object_offset_t      offset;
 437
 438                 p = (vm_page_t) queue_first(&object->memq);
 439
 440                 assert(p->private);
 441                 assert(p->pageout);
 442                 p->pageout = FALSE;
 443                 assert(!p->cleaning);
 444
 445                 offset = p->offset;
 446                 VM_PAGE_FREE(p);
 447                 p = VM_PAGE_NULL;
 448
 449                 m = vm_page_lookup(shadow_object,
 450                         offset + object->shadow_offset);
 451
 452                 if(m == VM_PAGE_NULL)
 453                         continue;
 454                 assert(m->cleaning);
 455                 /* used as a trigger on upl_commit etc to recognize the */
 456                 /* pageout daemon's subseqent desire to pageout a cleaning */
 457                 /* page.  When the bit is on the upl commit code will   */
 458                 /* respect the pageout bit in the target page over the  */
 459                 /* caller's page list indication */
 460                 m->dump_cleaning = FALSE;
 461
 462                 /*
 463                  * Account for the paging reference taken when
 464                  * m->cleaning was set on this page.
 465                  */
 466                 vm_object_paging_end(shadow_object);
 467                 assert((m->dirty) || (m->precious) ||
 468                                 (m->busy && m->cleaning));
 469
 470                 /*
 471                  * Handle the trusted pager throttle.
 472                  * Also decrement the burst throttle (if external).
 473                  */
 474                 vm_page_lock_queues();
 475                 if (m->laundry) {
 476                         vm_pageout_throttle_up(m);
 477                 }
 478
 479                 /*
 480                  * Handle the "target" page(s). These pages are to be freed if
 481                  * successfully cleaned. Target pages are always busy, and are
 482                  * wired exactly once. The initial target pages are not mapped,
 483                  * (so cannot be referenced or modified) but converted target
 484                  * pages may have been modified between the selection as an
 485                  * adjacent page and conversion to a target.
 486                  */
 487                 if (m->pageout) {
 488                         assert(m->busy);
 489                         assert(m->wire_count == 1);
 490                         m->cleaning = FALSE;
 491                         m->pageout = FALSE;
 492 #if MACH_CLUSTER_STATS
 493                         if (m->wanted) vm_pageout_target_collisions++;
 494 #endif
 495                         /*
 496                          * Revoke all access to the page. Since the object is
 497                          * locked, and the page is busy, this prevents the page
 498                          * from being dirtied after the pmap_disconnect() call
 499                          * returns.
 500                          *
 501                          * Since the page is left "dirty" but "not modifed", we
 502                          * can detect whether the page was redirtied during
 503                          * pageout by checking the modify state.
 504                          */
 505                         if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
 506                               m->dirty = TRUE;
 507                         else
 508                               m->dirty = FALSE;
 509
 510                         if (m->dirty) {
 511                                 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
 512                                 vm_page_unwire(m);/* reactivates */
 513                                 VM_STAT(reactivations++);
 514                                 PAGE_WAKEUP_DONE(m);
 515                         } else {
 516                                 CLUSTER_STAT(vm_pageout_target_page_freed++;)
 517                                 vm_page_free(m);/* clears busy, etc. */
 518                         }
 519                         vm_page_unlock_queues();
 520                         continue;
 521                 }
 522                 /*
 523                  * Handle the "adjacent" pages. These pages were cleaned in
 524                  * place, and should be left alone.
 525                  * If prep_pin_count is nonzero, then someone is using the
 526                  * page, so make it active.
 527                  */
 528                 if (!m->active && !m->inactive && !m->private) {
 529                         if (m->reference)
 530                                 vm_page_activate(m);
 531                         else
 532                                 vm_page_deactivate(m);
 533                 }
 534                 if((m->busy) && (m->cleaning)) {
 535
 536                         /* the request_page_list case, (COPY_OUT_FROM FALSE) */
 537                         m->busy = FALSE;
 538
 539                         /* We do not re-set m->dirty ! */
 540                         /* The page was busy so no extraneous activity     */
 541                         /* could have occurred. COPY_INTO is a read into the */
 542                         /* new pages. CLEAN_IN_PLACE does actually write   */
 543                         /* out the pages but handling outside of this code */
 544                         /* will take care of resetting dirty. We clear the */
 545                         /* modify however for the Programmed I/O case.     */
 546                         pmap_clear_modify(m->phys_page);
 547                         if(m->absent) {
 548                                 m->absent = FALSE;
 549                                 if(shadow_object->absent_count == 1)
 550                                         vm_object_absent_release(shadow_object);
 551                                 else
 552                                         shadow_object->absent_count--;
 553                         }
 554                         m->overwriting = FALSE;
 555                 } else if (m->overwriting) {
 556                         /* alternate request page list, write to page_list */
 557                         /* case.  Occurs when the original page was wired  */
 558                         /* at the time of the list request */
 559                         assert(m->wire_count != 0);
 560                         vm_page_unwire(m);/* reactivates */
 561                         m->overwriting = FALSE;
 562                 } else {
 563                 /*
 564                  * Set the dirty state according to whether or not the page was
 565                  * modified during the pageout. Note that we purposefully do
 566                  * NOT call pmap_clear_modify since the page is still mapped.
 567                  * If the page were to be dirtied between the 2 calls, this
 568                  * this fact would be lost. This code is only necessary to
 569                  * maintain statistics, since the pmap module is always
 570                  * consulted if m->dirty is false.
 571                  */
 572 #if MACH_CLUSTER_STATS
 573                         m->dirty = pmap_is_modified(m->phys_page);
 574
 575                         if (m->dirty)   vm_pageout_cluster_dirtied++;
 576                         else            vm_pageout_cluster_cleaned++;
 577                         if (m->wanted)  vm_pageout_cluster_collisions++;
 578 #else
 579                         m->dirty = 0;
 580 #endif
 581                 }
 582                 m->cleaning = FALSE;
 583
 584                 /*
 585                  * Wakeup any thread waiting for the page to be un-cleaning.
 586                  */
 587                 PAGE_WAKEUP(m);
 588                 vm_page_unlock_queues();
 589         }
 590         /*
 591          * Account for the paging reference taken in vm_paging_object_allocate.
 592          */
 593         vm_object_paging_end(shadow_object);
 594         vm_object_unlock(shadow_object);
 595
 596         assert(object->ref_count == 0);
 597         assert(object->paging_in_progress == 0);
 598         assert(object->resident_page_count == 0);
 599         return;
 600 }
 601
 602 /*
 603  *      Routine:        vm_pageout_setup
 604  *      Purpose:
 605  *              Set up a page for pageout (clean & flush).
 606  *
 607  *              Move the page to a new object, as part of which it will be
 608  *              sent to its memory manager in a memory_object_data_write or
 609  *              memory_object_initialize message.
 610  *
 611  *              The "new_object" and "new_offset" arguments
 612  *              indicate where the page should be moved.
 613  *
 614  *      In/Out conditions:
 615  *              The page in question must not be on any pageout queues,
 616  *              and must be busy.  The object to which it belongs
 617  *              must be unlocked, and the caller must hold a paging
 618  *              reference to it.  The new_object must not be locked.
 619  *
 620  *              This routine returns a pointer to a place-holder page,
 621  *              inserted at the same offset, to block out-of-order
 622  *              requests for the page.  The place-holder page must
 623  *              be freed after the data_write or initialize message
 624  *              has been sent.
 625  *
 626  *              The original page is put on a paging queue and marked
 627  *              not busy on exit.
 628  */
 629 vm_page_t
 630 vm_pageout_setup(
 631         register vm_page_t      m,
 632         register vm_object_t    new_object,
 633         vm_object_offset_t      new_offset)
 634 {
 635         register vm_object_t    old_object = m->object;
 636         vm_object_offset_t      paging_offset;
 637         vm_object_offset_t      offset;
 638         register vm_page_t      holding_page;
 639         register vm_page_t      new_m;
 640         boolean_t               need_to_wire = FALSE;
 641
 642
 643         XPR(XPR_VM_PAGEOUT,
 644      "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
 645                 (integer_t)m->object, (integer_t)m->offset,
 646                 (integer_t)m, (integer_t)new_object,
 647                 (integer_t)new_offset);
 648         assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
 649                 !m->restart);
 650
 651         assert(m->dirty || m->precious);
 652
 653         /*
 654          *      Create a place-holder page where the old one was, to prevent
 655          *      attempted pageins of this page while we're unlocked.
 656          */
 657         VM_PAGE_GRAB_FICTITIOUS(holding_page);
 658
 659         vm_object_lock(old_object);
 660
 661         offset = m->offset;
 662         paging_offset = offset + old_object->paging_offset;
 663
 664         if (old_object->pager_trusted) {
 665                 /*
 666                  * This pager is trusted, so we can clean this page
 667                  * in place. Leave it in the old object, and mark it
 668                  * cleaning & pageout.
 669                  */
 670                 new_m = holding_page;
 671                 holding_page = VM_PAGE_NULL;
 672
 673                 /*
 674                  * Set up new page to be private shadow of real page.
 675                  */
 676                 new_m->phys_page = m->phys_page;
 677                 new_m->fictitious = FALSE;
 678                 new_m->pageout = TRUE;
 679
 680                 /*
 681                  * Mark real page as cleaning (indicating that we hold a
 682                  * paging reference to be released via m_o_d_r_c) and
 683                  * pageout (indicating that the page should be freed
 684                  * when the pageout completes).
 685                  */
 686                 pmap_clear_modify(m->phys_page);
 687                 vm_page_lock_queues();
 688                 new_m->private = TRUE;
 689                 vm_page_wire(new_m);
 690                 m->cleaning = TRUE;
 691                 m->pageout = TRUE;
 692
 693                 vm_page_wire(m);
 694                 assert(m->wire_count == 1);
 695                 vm_page_unlock_queues();
 696
 697                 m->dirty = TRUE;
 698                 m->precious = FALSE;
 699                 m->page_lock = VM_PROT_NONE;
 700                 m->unusual = FALSE;
 701                 m->unlock_request = VM_PROT_NONE;
 702         } else {
 703                 /*
 704                  * Cannot clean in place, so rip the old page out of the
 705                  * object, and stick the holding page in. Set new_m to the
 706                  * page in the new object.
 707                  */
 708                 vm_page_lock_queues();
 709                 VM_PAGE_QUEUES_REMOVE(m);
 710                 vm_page_remove(m);
 711
 712                 vm_page_insert(holding_page, old_object, offset);
 713                 vm_page_unlock_queues();
 714
 715                 m->dirty = TRUE;
 716                 m->precious = FALSE;
 717                 new_m = m;
 718                 new_m->page_lock = VM_PROT_NONE;
 719                 new_m->unlock_request = VM_PROT_NONE;
 720
 721                 if (old_object->internal)
 722                         need_to_wire = TRUE;
 723         }
 724         /*
 725          *      Record that this page has been written out
 726          */
 727 #if     MACH_PAGEMAP
 728         vm_external_state_set(old_object->existence_map, offset);
 729 #endif  /* MACH_PAGEMAP */
 730
 731         vm_object_unlock(old_object);
 732
 733         vm_object_lock(new_object);
 734
 735         /*
 736          *      Put the page into the new object. If it is a not wired
 737          *      (if it's the real page) it will be activated.
 738          */
 739
 740         vm_page_lock_queues();
 741         vm_page_insert(new_m, new_object, new_offset);
 742         if (need_to_wire)
 743                 vm_page_wire(new_m);
 744         else
 745                 vm_page_activate(new_m);
 746         PAGE_WAKEUP_DONE(new_m);
 747         vm_page_unlock_queues();
 748
 749         vm_object_unlock(new_object);
 750
 751         /*
 752          *      Return the placeholder page to simplify cleanup.
 753          */
 754         return (holding_page);
 755 }
 756
 757 /*
 758  * Routine:     vm_pageclean_setup
 759  *
 760  * Purpose:     setup a page to be cleaned (made non-dirty), but not
 761  *              necessarily flushed from the VM page cache.
 762  *              This is accomplished by cleaning in place.
 763  *
 764  *              The page must not be busy, and the object and page
 765  *              queues must be locked.
 766  *
 767  */
 768 void
 769 vm_pageclean_setup(
 770         vm_page_t               m,
 771         vm_page_t               new_m,
 772         vm_object_t             new_object,
 773         vm_object_offset_t      new_offset)
 774 {
 775         vm_object_t old_object = m->object;
 776         assert(!m->busy);
 777         assert(!m->cleaning);
 778
 779         XPR(XPR_VM_PAGEOUT,
 780     "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
 781                 (integer_t)old_object, m->offset, (integer_t)m,
 782                 (integer_t)new_m, new_offset);
 783
 784         pmap_clear_modify(m->phys_page);
 785         vm_object_paging_begin(old_object);
 786
 787         /*
 788          *      Record that this page has been written out
 789          */
 790 #if     MACH_PAGEMAP
 791         vm_external_state_set(old_object->existence_map, m->offset);
 792 #endif  /*MACH_PAGEMAP*/
 793
 794         /*
 795          * Mark original page as cleaning in place.
 796          */
 797         m->cleaning = TRUE;
 798         m->dirty = TRUE;
 799         m->precious = FALSE;
 800
 801         /*
 802          * Convert the fictitious page to a private shadow of
 803          * the real page.
 804          */
 805         assert(new_m->fictitious);
 806         new_m->fictitious = FALSE;
 807         new_m->private = TRUE;
 808         new_m->pageout = TRUE;
 809         new_m->phys_page = m->phys_page;
 810         vm_page_wire(new_m);
 811
 812         vm_page_insert(new_m, new_object, new_offset);
 813         assert(!new_m->wanted);
 814         new_m->busy = FALSE;
 815 }
 816
 817 void
 818 vm_pageclean_copy(
 819         vm_page_t               m,
 820         vm_page_t               new_m,
 821         vm_object_t             new_object,
 822         vm_object_offset_t      new_offset)
 823 {
 824         XPR(XPR_VM_PAGEOUT,
 825         "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
 826                 m, new_m, new_object, new_offset, 0);
 827
 828         assert((!m->busy) && (!m->cleaning));
 829
 830         assert(!new_m->private && !new_m->fictitious);
 831
 832         pmap_clear_modify(m->phys_page);
 833
 834         m->busy = TRUE;
 835         vm_object_paging_begin(m->object);
 836         vm_page_unlock_queues();
 837         vm_object_unlock(m->object);
 838
 839         /*
 840          * Copy the original page to the new page.
 841          */
 842         vm_page_copy(m, new_m);
 843
 844         /*
 845          * Mark the old page as clean. A request to pmap_is_modified
 846          * will get the right answer.
 847          */
 848         vm_object_lock(m->object);
 849         m->dirty = FALSE;
 850
 851         vm_object_paging_end(m->object);
 852
 853         vm_page_lock_queues();
 854         if (!m->active && !m->inactive)
 855                 vm_page_activate(m);
 856         PAGE_WAKEUP_DONE(m);
 857
 858         vm_page_insert(new_m, new_object, new_offset);
 859         vm_page_activate(new_m);
 860         new_m->busy = FALSE;    /* No other thread can be waiting */
 861 }
 862
 863
 864 /*
 865  *      Routine:        vm_pageout_initialize_page
 866  *      Purpose:
 867  *              Causes the specified page to be initialized in
 868  *              the appropriate memory object. This routine is used to push
 869  *              pages into a copy-object when they are modified in the
 870  *              permanent object.
 871  *
 872  *              The page is moved to a temporary object and paged out.
 873  *
 874  *      In/out conditions:
 875  *              The page in question must not be on any pageout queues.
 876  *              The object to which it belongs must be locked.
 877  *              The page must be busy, but not hold a paging reference.
 878  *
 879  *      Implementation:
 880  *              Move this page to a completely new object.
 881  */
 882 void
 883 vm_pageout_initialize_page(
 884         vm_page_t       m)
 885 {
 886         vm_object_t             object;
 887         vm_object_offset_t      paging_offset;
 888         vm_page_t               holding_page;
 889
 890
 891         XPR(XPR_VM_PAGEOUT,
 892                 "vm_pageout_initialize_page, page 0x%X\n",
 893                 (integer_t)m, 0, 0, 0, 0);
 894         assert(m->busy);
 895
 896         /*
 897          *      Verify that we really want to clean this page
 898          */
 899         assert(!m->absent);
 900         assert(!m->error);
 901         assert(m->dirty);
 902
 903         /*
 904          *      Create a paging reference to let us play with the object.
 905          */
 906         object = m->object;
 907         paging_offset = m->offset + object->paging_offset;
 908         vm_object_paging_begin(object);
 909         if (m->absent || m->error || m->restart ||
 910             (!m->dirty && !m->precious)) {
 911                 VM_PAGE_FREE(m);
 912                 panic("reservation without pageout?"); /* alan */
 913              vm_object_unlock(object);
 914                 return;
 915         }
 916
 917         /* set the page for future call to vm_fault_list_request */
 918         holding_page = NULL;
 919         vm_page_lock_queues();
 920         pmap_clear_modify(m->phys_page);
 921         m->dirty = TRUE;
 922         m->busy = TRUE;
 923         m->list_req_pending = TRUE;
 924         m->cleaning = TRUE;
 925         m->pageout = TRUE;
 926         vm_page_wire(m);
 927         vm_page_unlock_queues();
 928         vm_object_unlock(object);
 929
 930         /*
 931          *      Write the data to its pager.
 932          *      Note that the data is passed by naming the new object,
 933          *      not a virtual address; the pager interface has been
 934          *      manipulated to use the "internal memory" data type.
 935          *      [The object reference from its allocation is donated
 936          *      to the eventual recipient.]
 937          */
 938         memory_object_data_initialize(object->pager,
 939                                         paging_offset,
 940                                         PAGE_SIZE);
 941
 942         vm_object_lock(object);
 943 }
 944
 945 #if     MACH_CLUSTER_STATS
 946 #define MAXCLUSTERPAGES 16
 947 struct {
 948         unsigned long pages_in_cluster;
 949         unsigned long pages_at_higher_offsets;
 950         unsigned long pages_at_lower_offsets;
 951 } cluster_stats[MAXCLUSTERPAGES];
 952 #endif  /* MACH_CLUSTER_STATS */
 953
 954 boolean_t allow_clustered_pageouts = FALSE;
 955
 956 /*
 957  * vm_pageout_cluster:
 958  *
 959  * Given a page, queue it to the appropriate I/O thread,
 960  * which will page it out and attempt to clean adjacent pages
 961  * in the same operation.
 962  *
 963  * The page must be busy, and the object and queues locked. We will take a
 964  * paging reference to prevent deallocation or collapse when we
 965  * release the object lock back at the call site.  The I/O thread
 966  * is responsible for consuming this reference
 967  *
 968  * The page must not be on any pageout queue.
 969  */
 970
 971 void
 972 vm_pageout_cluster(vm_page_t m)
 973 {
 974         vm_object_t     object = m->object;
 975         struct          vm_pageout_queue *q;
 976
 977
 978         XPR(XPR_VM_PAGEOUT,
 979                 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
 980                 (integer_t)object, m->offset, (integer_t)m, 0, 0);
 981
 982         /*
 983          * Only a certain kind of page is appreciated here.
 984          */
 985         assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
 986         assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
 987
 988         /*
 989          * protect the object from collapse -
 990          * locking in the object's paging_offset.
 991          */
 992         vm_object_paging_begin(object);
 993
 994         /*
 995          * set the page for future call to vm_fault_list_request
 996          * page should already be marked busy
 997          */
 998         vm_page_wire(m);
 999         m->list_req_pending = TRUE;
1000         m->cleaning = TRUE;
1001         m->pageout = TRUE;
1002         m->laundry = TRUE;
1003
1004         if (object->internal == TRUE)
1005                 q = &vm_pageout_queue_internal;
1006         else
1007                 q = &vm_pageout_queue_external;
1008         q->pgo_laundry++;
1009
1010         m->pageout_queue = TRUE;
1011         queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
1012
1013         if (q->pgo_idle == TRUE) {
1014                 q->pgo_idle = FALSE;
1015                 thread_wakeup((event_t) &q->pgo_pending);
1016         }
1017 }
1018
1019
1020 unsigned long vm_pageout_throttle_up_count = 0;
1021
1022 /*
1023  * A page is back from laundry.  See if there are some pages waiting to
1024  * go to laundry and if we can let some of them go now.
1025  *
1026  * Object and page queues must be locked.
1027  */
1028 void
1029 vm_pageout_throttle_up(
1030         vm_page_t       m)
1031 {
1032         struct vm_pageout_queue *q;
1033
1034         vm_pageout_throttle_up_count++;
1035
1036         assert(m->laundry);
1037         assert(m->object != VM_OBJECT_NULL);
1038         assert(m->object != kernel_object);
1039
1040         if (m->object->internal == TRUE)
1041                 q = &vm_pageout_queue_internal;
1042         else
1043                 q = &vm_pageout_queue_external;
1044
1045         m->laundry = FALSE;
1046         q->pgo_laundry--;
1047
1048         if (q->pgo_throttled == TRUE) {
1049                 q->pgo_throttled = FALSE;
1050                 thread_wakeup((event_t) &q->pgo_laundry);
1051         }
1052 }
1053
1054
1055 /*
1056  *      vm_pageout_scan does the dirty work for the pageout daemon.
1057  *      It returns with vm_page_queue_free_lock held and
1058  *      vm_page_free_wanted == 0.
1059  */
1060
1061 #define DELAYED_UNLOCK_LIMIT  (3 * MAX_UPL_TRANSFER)
1062
1063 #define FCS_IDLE                0
1064 #define FCS_DELAYED             1
1065 #define FCS_DEADLOCK_DETECTED   2
1066
1067 struct flow_control {
1068         int             state;
1069         mach_timespec_t ts;
1070 };
1071
1072 extern kern_return_t    sysclk_gettime(mach_timespec_t *);
1073
1074
1075 void
1076 vm_pageout_scan(void)
1077 {
1078         unsigned int loop_count = 0;
1079         unsigned int inactive_burst_count = 0;
1080         unsigned int active_burst_count = 0;
1081         vm_page_t   local_freeq = 0;
1082         int         local_freed = 0;
1083         int         delayed_unlock = 0;
1084         int         need_internal_inactive = 0;
1085         int         refmod_state = 0;
1086         int     vm_pageout_deadlock_target = 0;
1087         struct  vm_pageout_queue *iq;
1088         struct  vm_pageout_queue *eq;
1089         struct  flow_control    flow_control;
1090         boolean_t active_throttled = FALSE;
1091         boolean_t inactive_throttled = FALSE;
1092         mach_timespec_t         ts;
1093         unsigned int msecs = 0;
1094         vm_object_t     object;
1095
1096
1097         flow_control.state = FCS_IDLE;
1098         iq = &vm_pageout_queue_internal;
1099         eq = &vm_pageout_queue_external;
1100
1101         XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1102
1103 /*???*/ /*
1104          *      We want to gradually dribble pages from the active queue
1105          *      to the inactive queue.  If we let the inactive queue get
1106          *      very small, and then suddenly dump many pages into it,
1107          *      those pages won't get a sufficient chance to be referenced
1108          *      before we start taking them from the inactive queue.
1109          *
1110          *      We must limit the rate at which we send pages to the pagers.
1111          *      data_write messages consume memory, for message buffers and
1112          *      for map-copy objects.  If we get too far ahead of the pagers,
1113          *      we can potentially run out of memory.
1114          *
1115          *      We can use the laundry count to limit directly the number
1116          *      of pages outstanding to the default pager.  A similar
1117          *      strategy for external pagers doesn't work, because
1118          *      external pagers don't have to deallocate the pages sent them,
1119          *      and because we might have to send pages to external pagers
1120          *      even if they aren't processing writes.  So we also
1121          *      use a burst count to limit writes to external pagers.
1122          *
1123          *      When memory is very tight, we can't rely on external pagers to
1124          *      clean pages.  They probably aren't running, because they
1125          *      aren't vm-privileged.  If we kept sending dirty pages to them,
1126          *      we could exhaust the free list.
1127          */
1128         vm_page_lock_queues();
1129         delayed_unlock = 1;
1130
1131
1132 Restart:
1133         /*
1134          *      Recalculate vm_page_inactivate_target.
1135          */
1136         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1137                                                           vm_page_inactive_count);
1138         object = NULL;
1139
1140         for (;;) {
1141                 vm_page_t m;
1142
1143                 if (delayed_unlock == 0)
1144                         vm_page_lock_queues();
1145
1146                 active_burst_count = vm_page_active_count;
1147
1148                 if (active_burst_count > vm_pageout_burst_active_throttle)
1149                         active_burst_count = vm_pageout_burst_active_throttle;
1150
1151                 /*
1152                  *      Move pages from active to inactive.
1153                  */
1154                 while ((need_internal_inactive ||
1155                            vm_page_inactive_count < vm_page_inactive_target) &&
1156                        !queue_empty(&vm_page_queue_active) &&
1157                        ((active_burst_count--) > 0)) {
1158
1159                         vm_pageout_active++;
1160
1161                         m = (vm_page_t) queue_first(&vm_page_queue_active);
1162
1163                         assert(m->active && !m->inactive);
1164                         assert(!m->laundry);
1165                         assert(m->object != kernel_object);
1166
1167                         /*
1168                          * Try to lock object; since we've already got the
1169                          * page queues lock, we can only 'try' for this one.
1170                          * if the 'try' fails, we need to do a mutex_pause
1171                          * to allow the owner of the object lock a chance to
1172                          * run... otherwise, we're likely to trip over this
1173                          * object in the same state as we work our way through
1174                          * the queue... clumps of pages associated with the same
1175                          * object are fairly typical on the inactive and active queues
1176                          */
1177                         if (m->object != object) {
1178                                 if (object != NULL) {
1179                                         vm_object_unlock(object);
1180                                         object = NULL;
1181                                 }
1182                                 if (!vm_object_lock_try(m->object)) {
1183                                         /*
1184                                          * move page to end of active queue and continue
1185                                          */
1186                                         queue_remove(&vm_page_queue_active, m,
1187                                                      vm_page_t, pageq);
1188                                         queue_enter(&vm_page_queue_active, m,
1189                                                     vm_page_t, pageq);
1190
1191                                         goto done_with_activepage;
1192                                 }
1193                                 object = m->object;
1194                         }
1195                         /*
1196                          * if the page is BUSY, then we pull it
1197                          * off the active queue and leave it alone.
1198                          * when BUSY is cleared, it will get stuck
1199                          * back on the appropriate queue
1200                          */
1201                         if (m->busy) {
1202                                 queue_remove(&vm_page_queue_active, m,
1203                                              vm_page_t, pageq);
1204                                 m->pageq.next = NULL;
1205                                 m->pageq.prev = NULL;
1206
1207                                 if (!m->fictitious)
1208                                         vm_page_active_count--;
1209                                 m->active = FALSE;
1210
1211                                 goto done_with_activepage;
1212                         }
1213                         if (need_internal_inactive) {
1214                                 /*
1215                                  * If we're unable to make forward progress
1216                                  * with the current set of pages on the
1217                                  * inactive queue due to busy objects or
1218                                  * throttled pageout queues, then
1219                                  * move a page that is already clean
1220                                  * or belongs to a pageout queue that
1221                                  * isn't currently throttled
1222                                  */
1223                                 active_throttled = FALSE;
1224
1225                                 if (object->internal) {
1226                                         if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
1227                                                 active_throttled = TRUE;
1228                                 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1229                                                 active_throttled = TRUE;
1230                                 }
1231                                 if (active_throttled == TRUE) {
1232                                         if (!m->dirty) {
1233                                                 refmod_state = pmap_get_refmod(m->phys_page);
1234
1235                                                 if (refmod_state & VM_MEM_REFERENCED)
1236                                                         m->reference = TRUE;
1237                                                 if (refmod_state & VM_MEM_MODIFIED)
1238                                                         m->dirty = TRUE;
1239                                         }
1240                                         if (m->dirty || m->precious) {
1241                                                 /*
1242                                                  * page is dirty and targets a THROTTLED queue
1243                                                  * so all we can do is move it back to the
1244                                                  * end of the active queue to get it out
1245                                                  * of the way
1246                                                  */
1247                                                 queue_remove(&vm_page_queue_active, m,
1248                                                              vm_page_t, pageq);
1249                                                 queue_enter(&vm_page_queue_active, m,
1250                                                             vm_page_t, pageq);
1251
1252                                                 vm_pageout_scan_active_throttled++;
1253
1254                                                 goto done_with_activepage;
1255                                         }
1256                                 }
1257                                 vm_pageout_scan_active_throttle_success++;
1258                                 need_internal_inactive--;
1259                         }
1260                         /*
1261                          *      Deactivate the page while holding the object
1262                          *      locked, so we know the page is still not busy.
1263                          *      This should prevent races between pmap_enter
1264                          *      and pmap_clear_reference.  The page might be
1265                          *      absent or fictitious, but vm_page_deactivate
1266                          *      can handle that.
1267                          */
1268                         vm_page_deactivate(m);
1269 done_with_activepage:
1270                         if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1271
1272                                 if (object != NULL) {
1273                                         vm_object_unlock(object);
1274                                         object = NULL;
1275                                 }
1276                                 if (local_freeq) {
1277                                         vm_page_free_list(local_freeq);
1278
1279                                         local_freeq = 0;
1280                                         local_freed = 0;
1281                                 }
1282                                 delayed_unlock = 0;
1283                                 vm_page_unlock_queues();
1284
1285                                 mutex_pause();
1286                                 vm_page_lock_queues();
1287                                 /*
1288                                  * continue the while loop processing
1289                                  * the active queue... need to hold
1290                                  * the page queues lock
1291                                  */
1292                                 continue;
1293                         }
1294                 }
1295
1296
1297
1298                 /**********************************************************************
1299                  * above this point we're playing with the active queue
1300                  * below this point we're playing with the throttling mechanisms
1301                  * and the inactive queue
1302                  **********************************************************************/
1303
1304
1305
1306                 /*
1307                  *      We are done if we have met our target *and*
1308                  *      nobody is still waiting for a page.
1309                  */
1310                 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1311                         if (object != NULL) {
1312                                 vm_object_unlock(object);
1313                                 object = NULL;
1314                         }
1315                         if (local_freeq) {
1316                                 vm_page_free_list(local_freeq);
1317
1318                                 local_freeq = 0;
1319                                 local_freed = 0;
1320                         }
1321                         mutex_lock(&vm_page_queue_free_lock);
1322
1323                         if ((vm_page_free_count >= vm_page_free_target) &&
1324                                   (vm_page_free_wanted == 0)) {
1325
1326                                 vm_page_unlock_queues();
1327
1328                                 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1329                                 return;
1330                         }
1331                         mutex_unlock(&vm_page_queue_free_lock);
1332                 }
1333
1334
1335                 /*
1336                  * Sometimes we have to pause:
1337                  *      1) No inactive pages - nothing to do.
1338                  *      2) Flow control - default pageout queue is full
1339                  *      3) Loop control - no acceptable pages found on the inactive queue
1340                  *         within the last vm_pageout_burst_inactive_throttle iterations
1341                  */
1342                 if ((queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf))) {
1343                         vm_pageout_scan_empty_throttle++;
1344                         msecs = vm_pageout_empty_wait;
1345                         goto vm_pageout_scan_delay;
1346
1347                 } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
1348                         vm_pageout_scan_burst_throttle++;
1349                         msecs = vm_pageout_burst_wait;
1350                         goto vm_pageout_scan_delay;
1351
1352                 } else if (VM_PAGE_Q_THROTTLED(iq)) {
1353
1354                         switch (flow_control.state) {
1355
1356                         case FCS_IDLE:
1357 reset_deadlock_timer:
1358                                 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1359                                 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1360                                 sysclk_gettime(&flow_control.ts);
1361                                 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1362
1363                                 flow_control.state = FCS_DELAYED;
1364                                 msecs = vm_pageout_deadlock_wait;
1365
1366                                 break;
1367
1368                         case FCS_DELAYED:
1369                                 sysclk_gettime(&ts);
1370
1371                                 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1372                                         /*
1373                                          * the pageout thread for the default pager is potentially
1374                                          * deadlocked since the
1375                                          * default pager queue has been throttled for more than the
1376                                          * allowable time... we need to move some clean pages or dirty
1377                                          * pages belonging to the external pagers if they aren't throttled
1378                                          * vm_page_free_wanted represents the number of threads currently
1379                                          * blocked waiting for pages... we'll move one page for each of
1380                                          * these plus a fixed amount to break the logjam... once we're done
1381                                          * moving this number of pages, we'll re-enter the FSC_DELAYED state
1382                                          * with a new timeout target since we have no way of knowing
1383                                          * whether we've broken the deadlock except through observation
1384                                          * of the queue associated with the default pager... we need to
1385                                          * stop moving pagings and allow the system to run to see what
1386                                          * state it settles into.
1387                                          */
1388                                         vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted;
1389                                         vm_pageout_scan_deadlock_detected++;
1390                                         flow_control.state = FCS_DEADLOCK_DETECTED;
1391
1392                                         thread_wakeup((event_t) &vm_pageout_garbage_collect);
1393                                         goto consider_inactive;
1394                                 }
1395                                 /*
1396                                  * just resniff instead of trying
1397                                  * to compute a new delay time... we're going to be
1398                                  * awakened immediately upon a laundry completion,
1399                                  * so we won't wait any longer than necessary
1400                                  */
1401                                 msecs = vm_pageout_idle_wait;
1402                                 break;
1403
1404                         case FCS_DEADLOCK_DETECTED:
1405                                 if (vm_pageout_deadlock_target)
1406                                         goto consider_inactive;
1407                                 goto reset_deadlock_timer;
1408
1409                         }
1410                         vm_pageout_scan_throttle++;
1411                         iq->pgo_throttled = TRUE;
1412 vm_pageout_scan_delay:
1413                         if (object != NULL) {
1414                                 vm_object_unlock(object);
1415                                 object = NULL;
1416                         }
1417                         if (local_freeq) {
1418                                 vm_page_free_list(local_freeq);
1419
1420                                 local_freeq = 0;
1421                                 local_freed = 0;
1422                         }
1423                         assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1424
1425                         counter(c_vm_pageout_scan_block++);
1426
1427                         vm_page_unlock_queues();
1428
1429                         thread_block(THREAD_CONTINUE_NULL);
1430
1431                         vm_page_lock_queues();
1432                         delayed_unlock = 1;
1433
1434                         iq->pgo_throttled = FALSE;
1435
1436                         if (loop_count >= vm_page_inactive_count) {
1437                                 if (VM_PAGE_Q_THROTTLED(eq) || VM_PAGE_Q_THROTTLED(iq)) {
1438                                         /*
1439                                          * Make sure we move enough "appropriate"
1440                                          * pages to the inactive queue before trying
1441                                          * again.
1442                                          */
1443                                         need_internal_inactive = vm_pageout_inactive_relief;
1444                                 }
1445                                 loop_count = 0;
1446                         }
1447                         inactive_burst_count = 0;
1448
1449                         goto Restart;
1450                         /*NOTREACHED*/
1451                 }
1452
1453
1454                 flow_control.state = FCS_IDLE;
1455 consider_inactive:
1456                 loop_count++;
1457                 inactive_burst_count++;
1458                 vm_pageout_inactive++;
1459
1460                 if (!queue_empty(&vm_page_queue_inactive)) {
1461                         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1462
1463                         if (m->clustered && (m->no_isync == TRUE)) {
1464                                 goto use_this_page;
1465                         }
1466                 }
1467                 if (vm_zf_count < vm_accellerate_zf_pageout_trigger) {
1468                         vm_zf_iterator = 0;
1469                 } else {
1470                         last_page_zf = 0;
1471                         if((vm_zf_iterator+=1) >= vm_zf_iterator_count) {
1472                                         vm_zf_iterator = 0;
1473                         }
1474                 }
1475                 if (queue_empty(&vm_page_queue_zf) ||
1476                                 (((last_page_zf) || (vm_zf_iterator == 0)) &&
1477                                 !queue_empty(&vm_page_queue_inactive))) {
1478                         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1479                         last_page_zf = 0;
1480                 } else {
1481                         m = (vm_page_t) queue_first(&vm_page_queue_zf);
1482                         last_page_zf = 1;
1483                 }
1484 use_this_page:
1485                 assert(!m->active && m->inactive);
1486                 assert(!m->laundry);
1487                 assert(m->object != kernel_object);
1488
1489                 /*
1490                  * Try to lock object; since we've alread got the
1491                  * page queues lock, we can only 'try' for this one.
1492                  * if the 'try' fails, we need to do a mutex_pause
1493                  * to allow the owner of the object lock a chance to
1494                  * run... otherwise, we're likely to trip over this
1495                  * object in the same state as we work our way through
1496                  * the queue... clumps of pages associated with the same
1497                  * object are fairly typical on the inactive and active queues
1498                  */
1499                 if (m->object != object) {
1500                         if (object != NULL) {
1501                                 vm_object_unlock(object);
1502                                 object = NULL;
1503                         }
1504                         if (!vm_object_lock_try(m->object)) {
1505                                 /*
1506                                  *      Move page to end and continue.
1507                                  *      Don't re-issue ticket
1508                                  */
1509                                 if (m->zero_fill) {
1510                                         queue_remove(&vm_page_queue_zf, m,
1511                                                      vm_page_t, pageq);
1512                                         queue_enter(&vm_page_queue_zf, m,
1513                                                     vm_page_t, pageq);
1514                                 } else {
1515                                         queue_remove(&vm_page_queue_inactive, m,
1516                                                      vm_page_t, pageq);
1517                                         queue_enter(&vm_page_queue_inactive, m,
1518                                                     vm_page_t, pageq);
1519                                 }
1520                                 vm_pageout_inactive_nolock++;
1521
1522                                 /*
1523                                  * force us to dump any collected free pages
1524                                  * and to pause before moving on
1525                                  */
1526                                 delayed_unlock = DELAYED_UNLOCK_LIMIT + 1;
1527
1528                                 goto done_with_inactivepage;
1529                         }
1530                         object = m->object;
1531                 }
1532                 /*
1533                  * If the page belongs to a purgable object with no pending copies
1534                  * against it, then we reap all of the pages in the object
1535                  * and note that the object has been "emptied".  It'll be up to the
1536                  * application the discover this and recreate its contents if desired.
1537                  */
1538                 if ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
1539                      object->purgable == VM_OBJECT_PURGABLE_EMPTY) &&
1540                     object->copy == VM_OBJECT_NULL) {
1541
1542                         (void) vm_object_purge(object);
1543                         vm_pageout_purged_objects++;
1544                         /*
1545                          * we've just taken all of the pages from this object,
1546                          * so drop the lock now since we're not going to find
1547                          * any more pages belonging to it anytime soon
1548                          */
1549                         vm_object_unlock(object);
1550                         object = NULL;
1551
1552                         inactive_burst_count = 0;
1553
1554                         goto done_with_inactivepage;
1555                 }
1556
1557                 /*
1558                  *      Paging out pages of external objects which
1559                  *      are currently being created must be avoided.
1560                  *      The pager may claim for memory, thus leading to a
1561                  *      possible dead lock between it and the pageout thread,
1562                  *      if such pages are finally chosen. The remaining assumption
1563                  *      is that there will finally be enough available pages in the
1564                  *      inactive pool to page out in order to satisfy all memory
1565                  *      claimed by the thread which concurrently creates the pager.
1566                  */
1567                 if (!object->pager_initialized && object->pager_created) {
1568                         /*
1569                          *      Move page to end and continue, hoping that
1570                          *      there will be enough other inactive pages to
1571                          *      page out so that the thread which currently
1572                          *      initializes the pager will succeed.
1573                          *      Don't re-grant the ticket, the page should
1574                          *      pulled from the queue and paged out whenever
1575                          *      one of its logically adjacent fellows is
1576                          *      targeted.
1577                          */
1578                         if (m->zero_fill) {
1579                                 queue_remove(&vm_page_queue_zf, m,
1580                                              vm_page_t, pageq);
1581                                 queue_enter(&vm_page_queue_zf, m,
1582                                             vm_page_t, pageq);
1583                                 last_page_zf = 1;
1584                                 vm_zf_iterator = vm_zf_iterator_count - 1;
1585                         } else {
1586                                 queue_remove(&vm_page_queue_inactive, m,
1587                                              vm_page_t, pageq);
1588                                 queue_enter(&vm_page_queue_inactive, m,
1589                                             vm_page_t, pageq);
1590                                 last_page_zf = 0;
1591                                 vm_zf_iterator = 1;
1592                         }
1593                         vm_pageout_inactive_avoid++;
1594
1595                         goto done_with_inactivepage;
1596                 }
1597                 /*
1598                  *      Remove the page from the inactive list.
1599                  */
1600                 if (m->zero_fill) {
1601                         queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1602                 } else {
1603                         queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1604                 }
1605                 m->pageq.next = NULL;
1606                 m->pageq.prev = NULL;
1607                 m->inactive = FALSE;
1608                 if (!m->fictitious)
1609                         vm_page_inactive_count--;
1610
1611                 if (m->busy || !object->alive) {
1612                         /*
1613                          *      Somebody is already playing with this page.
1614                          *      Leave it off the pageout queues.
1615                          */
1616                         vm_pageout_inactive_busy++;
1617
1618                         goto done_with_inactivepage;
1619                 }
1620
1621                 /*
1622                  *      If it's absent or in error, we can reclaim the page.
1623                  */
1624
1625                 if (m->absent || m->error) {
1626                         vm_pageout_inactive_absent++;
1627 reclaim_page:
1628                         if (vm_pageout_deadlock_target) {
1629                                 vm_pageout_scan_inactive_throttle_success++;
1630                                 vm_pageout_deadlock_target--;
1631                         }
1632                         if (m->tabled)
1633                                 vm_page_remove(m);    /* clears tabled, object, offset */
1634                         if (m->absent)
1635                                 vm_object_absent_release(object);
1636
1637                         assert(m->pageq.next == NULL &&
1638                                m->pageq.prev == NULL);
1639                         m->pageq.next = (queue_entry_t)local_freeq;
1640                         local_freeq = m;
1641                         local_freed++;
1642
1643                         inactive_burst_count = 0;
1644
1645                         goto done_with_inactivepage;
1646                 }
1647
1648                 assert(!m->private);
1649                 assert(!m->fictitious);
1650
1651                 /*
1652                  *      If already cleaning this page in place, convert from
1653                  *      "adjacent" to "target". We can leave the page mapped,
1654                  *      and vm_pageout_object_terminate will determine whether
1655                  *      to free or reactivate.
1656                  */
1657
1658                 if (m->cleaning) {
1659                         m->busy = TRUE;
1660                         m->pageout = TRUE;
1661                         m->dump_cleaning = TRUE;
1662                         vm_page_wire(m);
1663
1664                         CLUSTER_STAT(vm_pageout_cluster_conversions++);
1665
1666                         inactive_burst_count = 0;
1667
1668                         goto done_with_inactivepage;
1669                 }
1670
1671                 /*
1672                  *      If it's being used, reactivate.
1673                  *      (Fictitious pages are either busy or absent.)
1674                  */
1675                 if ( (!m->reference) ) {
1676                         refmod_state = pmap_get_refmod(m->phys_page);
1677
1678                         if (refmod_state & VM_MEM_REFERENCED)
1679                                 m->reference = TRUE;
1680                         if (refmod_state & VM_MEM_MODIFIED)
1681                                 m->dirty = TRUE;
1682                 }
1683                 if (m->reference) {
1684 was_referenced:
1685                         vm_page_activate(m);
1686                         VM_STAT(reactivations++);
1687
1688                         vm_pageout_inactive_used++;
1689                         last_page_zf = 0;
1690                         inactive_burst_count = 0;
1691
1692                         goto done_with_inactivepage;
1693                 }
1694
1695                 XPR(XPR_VM_PAGEOUT,
1696                 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1697                 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1698
1699                 /*
1700                  * we've got a candidate page to steal...
1701                  *
1702                  * m->dirty is up to date courtesy of the
1703                  * preceding check for m->reference... if
1704                  * we get here, then m->reference had to be
1705                  * FALSE which means we did a pmap_get_refmod
1706                  * and updated both m->reference and m->dirty
1707                  *
1708                  * if it's dirty or precious we need to
1709                  * see if the target queue is throtttled
1710                  * it if is, we need to skip over it by moving it back
1711                  * to the end of the inactive queue
1712                  */
1713                 inactive_throttled = FALSE;
1714
1715                 if (m->dirty || m->precious) {
1716                         if (object->internal) {
1717                                 if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
1718                                         inactive_throttled = TRUE;
1719                         } else if (VM_PAGE_Q_THROTTLED(eq)) {
1720                                         inactive_throttled = TRUE;
1721                         }
1722                 }
1723                 if (inactive_throttled == TRUE) {
1724                         if (m->zero_fill) {
1725                                 queue_enter(&vm_page_queue_zf, m,
1726                                             vm_page_t, pageq);
1727                         } else {
1728                                 queue_enter(&vm_page_queue_inactive, m,
1729                                             vm_page_t, pageq);
1730                         }
1731                         if (!m->fictitious)
1732                                 vm_page_inactive_count++;
1733                         m->inactive = TRUE;
1734
1735                         vm_pageout_scan_inactive_throttled++;
1736
1737                         goto done_with_inactivepage;
1738                 }
1739                 /*
1740                  * we've got a page that we can steal...
1741                  * eliminate all mappings and make sure
1742                  * we have the up-to-date modified state
1743                  * first take the page BUSY, so that no new
1744                  * mappings can be made
1745                  */
1746                 m->busy = TRUE;
1747
1748                 /*
1749                  * if we need to do a pmap_disconnect then we
1750                  * need to re-evaluate m->dirty since the pmap_disconnect
1751                  * provides the true state atomically... the
1752                  * page was still mapped up to the pmap_disconnect
1753                  * and may have been dirtied at the last microsecond
1754                  *
1755                  * we also check for the page being referenced 'late'
1756                  * if it was, we first need to do a WAKEUP_DONE on it
1757                  * since we already set m->busy = TRUE, before
1758                  * going off to reactivate it
1759                  *
1760                  * if we don't need the pmap_disconnect, then
1761                  * m->dirty is up to date courtesy of the
1762                  * earlier check for m->reference... if
1763                  * we get here, then m->reference had to be
1764                  * FALSE which means we did a pmap_get_refmod
1765                  * and updated both m->reference and m->dirty...
1766                  */
1767                 if (m->no_isync == FALSE) {
1768                         refmod_state = pmap_disconnect(m->phys_page);
1769
1770                         if (refmod_state & VM_MEM_MODIFIED)
1771                                 m->dirty = TRUE;
1772                         if (refmod_state & VM_MEM_REFERENCED) {
1773                                 m->reference = TRUE;
1774
1775                                 PAGE_WAKEUP_DONE(m);
1776                                 goto was_referenced;
1777                         }
1778                 }
1779                 /*
1780                  *      If it's clean and not precious, we can free the page.
1781                  */
1782                 if (!m->dirty && !m->precious) {
1783                         vm_pageout_inactive_clean++;
1784                         goto reclaim_page;
1785                 }
1786                 vm_pageout_cluster(m);
1787
1788                 vm_pageout_inactive_dirty++;
1789
1790                 inactive_burst_count = 0;
1791
1792 done_with_inactivepage:
1793                 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1794
1795                         if (object != NULL) {
1796                                 vm_object_unlock(object);
1797                                 object = NULL;
1798                         }
1799                         if (local_freeq) {
1800                                 vm_page_free_list(local_freeq);
1801
1802                                 local_freeq = 0;
1803                                 local_freed = 0;
1804                         }
1805                         delayed_unlock = 0;
1806                         vm_page_unlock_queues();
1807                         mutex_pause();
1808                 }
1809                 /*
1810                  * back to top of pageout scan loop
1811                  */
1812         }
1813 }
1814
1815
1816 int vm_page_free_count_init;
1817
1818 void
1819 vm_page_free_reserve(
1820         int pages)
1821 {
1822         int             free_after_reserve;
1823
1824         vm_page_free_reserved += pages;
1825
1826         free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1827
1828         vm_page_free_min = vm_page_free_reserved +
1829                 VM_PAGE_FREE_MIN(free_after_reserve);
1830
1831         vm_page_free_target = vm_page_free_reserved +
1832                 VM_PAGE_FREE_TARGET(free_after_reserve);
1833
1834         if (vm_page_free_target < vm_page_free_min + 5)
1835                 vm_page_free_target = vm_page_free_min + 5;
1836 }
1837
1838 /*
1839  *      vm_pageout is the high level pageout daemon.
1840  */
1841
1842 void
1843 vm_pageout_continue(void)
1844 {
1845         vm_pageout_scan_event_counter++;
1846         vm_pageout_scan();
1847         /* we hold vm_page_queue_free_lock now */
1848         assert(vm_page_free_wanted == 0);
1849         assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1850         mutex_unlock(&vm_page_queue_free_lock);
1851
1852         counter(c_vm_pageout_block++);
1853         thread_block((thread_continue_t)vm_pageout_continue);
1854         /*NOTREACHED*/
1855 }
1856
1857
1858 /*
1859  * must be called with the
1860  * queues and object locks held
1861  */
1862 static void
1863 vm_pageout_queue_steal(vm_page_t m)
1864 {
1865         struct vm_pageout_queue *q;
1866
1867         if (m->object->internal == TRUE)
1868                 q = &vm_pageout_queue_internal;
1869         else
1870                 q = &vm_pageout_queue_external;
1871
1872         m->laundry = FALSE;
1873         m->pageout_queue = FALSE;
1874         queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
1875
1876         m->pageq.next = NULL;
1877         m->pageq.prev = NULL;
1878
1879         vm_object_paging_end(m->object);
1880
1881         q->pgo_laundry--;
1882 }
1883
1884
1885 #ifdef FAKE_DEADLOCK
1886
1887 #define FAKE_COUNT      5000
1888
1889 int internal_count = 0;
1890 int fake_deadlock = 0;
1891
1892 #endif
1893
1894 static void
1895 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
1896 {
1897         vm_page_t       m = NULL;
1898         vm_object_t     object;
1899         boolean_t       need_wakeup;
1900
1901         vm_page_lock_queues();
1902
1903         while ( !queue_empty(&q->pgo_pending) ) {
1904
1905                    q->pgo_busy = TRUE;
1906                    queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
1907                    m->pageout_queue = FALSE;
1908                    vm_page_unlock_queues();
1909
1910                    m->pageq.next = NULL;
1911                    m->pageq.prev = NULL;
1912 #ifdef FAKE_DEADLOCK
1913                    if (q == &vm_pageout_queue_internal) {
1914                            vm_offset_t addr;
1915                            int  pg_count;
1916
1917                            internal_count++;
1918
1919                            if ((internal_count == FAKE_COUNT)) {
1920
1921                                    pg_count = vm_page_free_count + vm_page_free_reserved;
1922
1923                                    if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
1924                                            kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
1925                                    }
1926                                    internal_count = 0;
1927                                    fake_deadlock++;
1928                            }
1929                    }
1930 #endif
1931                    object = m->object;
1932
1933                    if (!object->pager_initialized) {
1934                            vm_object_lock(object);
1935
1936                            /*
1937                             *   If there is no memory object for the page, create
1938                             *   one and hand it to the default pager.
1939                             */
1940
1941                            if (!object->pager_initialized)
1942                                    vm_object_collapse(object, (vm_object_offset_t)0);
1943                            if (!object->pager_initialized)
1944                                    vm_object_pager_create(object);
1945                            if (!object->pager_initialized) {
1946                                    /*
1947                                     *   Still no pager for the object.
1948                                     *   Reactivate the page.
1949                                     *
1950                                     *   Should only happen if there is no
1951                                     *   default pager.
1952                                     */
1953                                    m->list_req_pending = FALSE;
1954                                    m->cleaning = FALSE;
1955                                    m->pageout = FALSE;
1956                                    vm_page_unwire(m);
1957
1958                                    vm_pageout_throttle_up(m);
1959
1960                                    vm_page_lock_queues();
1961                                    vm_pageout_dirty_no_pager++;
1962                                    vm_page_activate(m);
1963                                    vm_page_unlock_queues();
1964
1965                                    /*
1966                                     *   And we are done with it.
1967                                     */
1968                                    PAGE_WAKEUP_DONE(m);
1969
1970                                    vm_object_paging_end(object);
1971                                    vm_object_unlock(object);
1972
1973                                    vm_page_lock_queues();
1974                                    continue;
1975                            } else if (object->pager == MEMORY_OBJECT_NULL) {
1976                                    /*
1977                                     * This pager has been destroyed by either
1978                                     * memory_object_destroy or vm_object_destroy, and
1979                                     * so there is nowhere for the page to go.
1980                                     * Just free the page... VM_PAGE_FREE takes
1981                                     * care of cleaning up all the state...
1982                                     * including doing the vm_pageout_throttle_up
1983                                     */
1984                                    VM_PAGE_FREE(m);
1985
1986                                    vm_object_paging_end(object);
1987                                    vm_object_unlock(object);
1988
1989                                    vm_page_lock_queues();
1990                                    continue;
1991                            }
1992                            vm_object_unlock(object);
1993                    }
1994                    /*
1995                     * we expect the paging_in_progress reference to have
1996                     * already been taken on the object before it was added
1997                     * to the appropriate pageout I/O queue... this will
1998                     * keep the object from being terminated and/or the
1999                     * paging_offset from changing until the I/O has
2000                     * completed... therefore no need to lock the object to
2001                     * pull the paging_offset from it.
2002                     *
2003                     * Send the data to the pager.
2004                     * any pageout clustering happens there
2005                     */
2006                    memory_object_data_return(object->pager,
2007                                              m->offset + object->paging_offset,
2008                                              PAGE_SIZE,
2009                                              NULL,
2010                                              NULL,
2011                                              FALSE,
2012                                              FALSE,
2013                                              0);
2014
2015                    vm_object_lock(object);
2016                    vm_object_paging_end(object);
2017                    vm_object_unlock(object);
2018
2019                    vm_page_lock_queues();
2020         }
2021         assert_wait((event_t) q, THREAD_UNINT);
2022
2023
2024         if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2025                 q->pgo_throttled = FALSE;
2026                 need_wakeup = TRUE;
2027         } else
2028                 need_wakeup = FALSE;
2029
2030         q->pgo_busy = FALSE;
2031         q->pgo_idle = TRUE;
2032         vm_page_unlock_queues();
2033
2034         if (need_wakeup == TRUE)
2035                 thread_wakeup((event_t) &q->pgo_laundry);
2036
2037         thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2038         /*NOTREACHED*/
2039 }
2040
2041
2042 static void
2043 vm_pageout_iothread_external(void)
2044 {
2045
2046         vm_pageout_iothread_continue(&vm_pageout_queue_external);
2047         /*NOTREACHED*/
2048 }
2049
2050
2051 static void
2052 vm_pageout_iothread_internal(void)
2053 {
2054         thread_t        self = current_thread();
2055
2056         self->options |= TH_OPT_VMPRIV;
2057
2058         vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2059         /*NOTREACHED*/
2060 }
2061
2062 static void
2063 vm_pageout_garbage_collect(int collect)
2064 {
2065         if (collect) {
2066                 stack_collect();
2067
2068                 /*
2069                  * consider_zone_gc should be last, because the other operations
2070                  * might return memory to zones.
2071                  */
2072                 consider_machine_collect();
2073                 consider_zone_gc();
2074
2075                 consider_machine_adjust();
2076         }
2077
2078         assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2079
2080         thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2081         /*NOTREACHED*/
2082 }
2083
2084
2085
2086 void
2087 vm_pageout(void)
2088 {
2089         thread_t        self = current_thread();
2090         thread_t        thread;
2091         kern_return_t   result;
2092         spl_t           s;
2093
2094         /*
2095          * Set thread privileges.
2096          */
2097         s = splsched();
2098         thread_lock(self);
2099         self->priority = BASEPRI_PREEMPT - 1;
2100         set_sched_pri(self, self->priority);
2101         thread_unlock(self);
2102         splx(s);
2103
2104         /*
2105          *      Initialize some paging parameters.
2106          */
2107
2108         if (vm_pageout_idle_wait == 0)
2109                 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2110
2111         if (vm_pageout_burst_wait == 0)
2112                 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2113
2114         if (vm_pageout_empty_wait == 0)
2115                 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2116
2117         if (vm_pageout_deadlock_wait == 0)
2118                 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2119
2120         if (vm_pageout_deadlock_relief == 0)
2121                 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2122
2123         if (vm_pageout_inactive_relief == 0)
2124                 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2125
2126         if (vm_pageout_burst_active_throttle == 0)
2127                 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2128
2129         if (vm_pageout_burst_inactive_throttle == 0)
2130                 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2131
2132         /*
2133          * Set kernel task to low backing store privileged
2134          * status
2135          */
2136         task_lock(kernel_task);
2137         kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2138         task_unlock(kernel_task);
2139
2140         vm_page_free_count_init = vm_page_free_count;
2141         vm_zf_iterator = 0;
2142         /*
2143          * even if we've already called vm_page_free_reserve
2144          * call it again here to insure that the targets are
2145          * accurately calculated (it uses vm_page_free_count_init)
2146          * calling it with an arg of 0 will not change the reserve
2147          * but will re-calculate free_min and free_target
2148          */
2149         if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2150                 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
2151         } else
2152                 vm_page_free_reserve(0);
2153
2154
2155         queue_init(&vm_pageout_queue_external.pgo_pending);
2156         vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2157         vm_pageout_queue_external.pgo_laundry = 0;
2158         vm_pageout_queue_external.pgo_idle = FALSE;
2159         vm_pageout_queue_external.pgo_busy = FALSE;
2160         vm_pageout_queue_external.pgo_throttled = FALSE;
2161
2162         queue_init(&vm_pageout_queue_internal.pgo_pending);
2163         vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2164         vm_pageout_queue_internal.pgo_laundry = 0;
2165         vm_pageout_queue_internal.pgo_idle = FALSE;
2166         vm_pageout_queue_internal.pgo_busy = FALSE;
2167         vm_pageout_queue_internal.pgo_throttled = FALSE;
2168
2169
2170         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &thread);
2171         if (result != KERN_SUCCESS)
2172                 panic("vm_pageout_iothread_internal: create failed");
2173
2174         thread_deallocate(thread);
2175
2176
2177         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, BASEPRI_PREEMPT - 1, &thread);
2178         if (result != KERN_SUCCESS)
2179                 panic("vm_pageout_iothread_external: create failed");
2180
2181         thread_deallocate(thread);
2182
2183
2184         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, BASEPRI_PREEMPT - 2, &thread);
2185         if (result != KERN_SUCCESS)
2186                 panic("vm_pageout_garbage_collect: create failed");
2187
2188         thread_deallocate(thread);
2189
2190         vm_object_reaper_init();
2191
2192
2193         vm_pageout_continue();
2194         /*NOTREACHED*/
2195 }
2196
2197
2198 static upl_t
2199 upl_create(
2200         int                flags,
2201         upl_size_t       size)
2202 {
2203         upl_t   upl;
2204         int     page_field_size;  /* bit field in word size buf */
2205
2206         page_field_size = 0;
2207         if (flags & UPL_CREATE_LITE) {
2208                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2209                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2210         }
2211         if(flags & UPL_CREATE_INTERNAL) {
2212                 upl = (upl_t)kalloc(sizeof(struct upl)
2213                         + (sizeof(struct upl_page_info)*(size/PAGE_SIZE))
2214                         + page_field_size);
2215         } else {
2216                 upl = (upl_t)kalloc(sizeof(struct upl) + page_field_size);
2217         }
2218         upl->flags = 0;
2219         upl->src_object = NULL;
2220         upl->kaddr = (vm_offset_t)0;
2221         upl->size = 0;
2222         upl->map_object = NULL;
2223         upl->ref_count = 1;
2224         upl_lock_init(upl);
2225 #ifdef UPL_DEBUG
2226         upl->ubc_alias1 = 0;
2227         upl->ubc_alias2 = 0;
2228 #endif /* UPL_DEBUG */
2229         return(upl);
2230 }
2231
2232 static void
2233 upl_destroy(
2234         upl_t   upl)
2235 {
2236         int     page_field_size;  /* bit field in word size buf */
2237
2238 #ifdef UPL_DEBUG
2239         {
2240                 upl_t   upl_ele;
2241                 vm_object_t     object;
2242                 if (upl->map_object->pageout) {
2243                         object = upl->map_object->shadow;
2244                 } else {
2245                         object = upl->map_object;
2246                 }
2247                 vm_object_lock(object);
2248                 queue_iterate(&object->uplq, upl_ele, upl_t, uplq) {
2249                         if(upl_ele == upl) {
2250                                 queue_remove(&object->uplq,
2251                                                 upl_ele, upl_t, uplq);
2252                                 break;
2253                         }
2254                 }
2255                 vm_object_unlock(object);
2256         }
2257 #endif /* UPL_DEBUG */
2258         /* drop a reference on the map_object whether or */
2259         /* not a pageout object is inserted */
2260         if(upl->map_object->pageout)
2261                 vm_object_deallocate(upl->map_object);
2262
2263         page_field_size = 0;
2264         if (upl->flags & UPL_LITE) {
2265                 page_field_size = ((upl->size/PAGE_SIZE) + 7) >> 3;
2266                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2267         }
2268         if(upl->flags & UPL_INTERNAL) {
2269                 kfree(upl,
2270                       sizeof(struct upl) +
2271                       (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
2272                       + page_field_size);
2273         } else {
2274                 kfree(upl, sizeof(struct upl) + page_field_size);
2275         }
2276 }
2277
2278 void uc_upl_dealloc(upl_t upl);
2279 __private_extern__ void
2280 uc_upl_dealloc(
2281         upl_t   upl)
2282 {
2283         upl->ref_count -= 1;
2284         if(upl->ref_count == 0) {
2285                 upl_destroy(upl);
2286         }
2287 }
2288
2289 void
2290 upl_deallocate(
2291         upl_t   upl)
2292 {
2293
2294         upl->ref_count -= 1;
2295         if(upl->ref_count == 0) {
2296                 upl_destroy(upl);
2297         }
2298 }
2299
2300 /*
2301  * Statistics about UPL enforcement of copy-on-write obligations.
2302  */
2303 unsigned long upl_cow = 0;
2304 unsigned long upl_cow_again = 0;
2305 unsigned long upl_cow_contiguous = 0;
2306 unsigned long upl_cow_pages = 0;
2307 unsigned long upl_cow_again_pages = 0;
2308 unsigned long upl_cow_contiguous_pages = 0;
2309
2310 /*
2311  *      Routine:        vm_object_upl_request
2312  *      Purpose:
2313  *              Cause the population of a portion of a vm_object.
2314  *              Depending on the nature of the request, the pages
2315  *              returned may be contain valid data or be uninitialized.
2316  *              A page list structure, listing the physical pages
2317  *              will be returned upon request.
2318  *              This function is called by the file system or any other
2319  *              supplier of backing store to a pager.
2320  *              IMPORTANT NOTE: The caller must still respect the relationship
2321  *              between the vm_object and its backing memory object.  The
2322  *              caller MUST NOT substitute changes in the backing file
2323  *              without first doing a memory_object_lock_request on the
2324  *              target range unless it is know that the pages are not
2325  *              shared with another entity at the pager level.
2326  *              Copy_in_to:
2327  *                      if a page list structure is present
2328  *                      return the mapped physical pages, where a
2329  *                      page is not present, return a non-initialized
2330  *                      one.  If the no_sync bit is turned on, don't
2331  *                      call the pager unlock to synchronize with other
2332  *                      possible copies of the page. Leave pages busy
2333  *                      in the original object, if a page list structure
2334  *                      was specified.  When a commit of the page list
2335  *                      pages is done, the dirty bit will be set for each one.
2336  *              Copy_out_from:
2337  *                      If a page list structure is present, return
2338  *                      all mapped pages.  Where a page does not exist
2339  *                      map a zero filled one. Leave pages busy in
2340  *                      the original object.  If a page list structure
2341  *                      is not specified, this call is a no-op.
2342  *
2343  *              Note:  access of default pager objects has a rather interesting
2344  *              twist.  The caller of this routine, presumably the file system
2345  *              page cache handling code, will never actually make a request
2346  *              against a default pager backed object.  Only the default
2347  *              pager will make requests on backing store related vm_objects
2348  *              In this way the default pager can maintain the relationship
2349  *              between backing store files (abstract memory objects) and
2350  *              the vm_objects (cache objects), they support.
2351  *
2352  */
2353
2354 __private_extern__ kern_return_t
2355 vm_object_upl_request(
2356         vm_object_t             object,
2357         vm_object_offset_t      offset,
2358         upl_size_t              size,
2359         upl_t                   *upl_ptr,
2360         upl_page_info_array_t   user_page_list,
2361         unsigned int            *page_list_count,
2362         int                     cntrl_flags)
2363 {
2364         vm_page_t               dst_page = VM_PAGE_NULL;
2365         vm_object_offset_t      dst_offset = offset;
2366         upl_size_t              xfer_size = size;
2367         boolean_t               do_m_lock = FALSE;
2368         boolean_t               dirty;
2369         boolean_t               hw_dirty;
2370         upl_t                   upl = NULL;
2371         unsigned int            entry;
2372 #if MACH_CLUSTER_STATS
2373         boolean_t               encountered_lrp = FALSE;
2374 #endif
2375         vm_page_t               alias_page = NULL;
2376         int                     page_ticket;
2377         int                     refmod_state;
2378         wpl_array_t             lite_list = NULL;
2379         vm_object_t             last_copy_object;
2380
2381
2382         if (cntrl_flags & ~UPL_VALID_FLAGS) {
2383                 /*
2384                  * For forward compatibility's sake,
2385                  * reject any unknown flag.
2386                  */
2387                 return KERN_INVALID_VALUE;
2388         }
2389
2390         page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2391                                         >> UPL_PAGE_TICKET_SHIFT;
2392
2393         if(((size/PAGE_SIZE) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2394                 size = MAX_UPL_TRANSFER * PAGE_SIZE;
2395         }
2396
2397         if(cntrl_flags & UPL_SET_INTERNAL)
2398                 if(page_list_count != NULL)
2399                         *page_list_count = MAX_UPL_TRANSFER;
2400
2401         if((!object->internal) && (object->paging_offset != 0))
2402                 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
2403
2404         if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2405                 return KERN_SUCCESS;
2406         }
2407
2408         vm_object_lock(object);
2409         vm_object_paging_begin(object);
2410         vm_object_unlock(object);
2411
2412         if(upl_ptr) {
2413                 if(cntrl_flags & UPL_SET_INTERNAL) {
2414                         if(cntrl_flags & UPL_SET_LITE) {
2415                                 uintptr_t page_field_size;
2416                                 upl = upl_create(
2417                                         UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
2418                                         size);
2419                                 user_page_list = (upl_page_info_t *)
2420                                    (((uintptr_t)upl) + sizeof(struct upl));
2421                                 lite_list = (wpl_array_t)
2422                                         (((uintptr_t)user_page_list) +
2423                                         ((size/PAGE_SIZE) *
2424                                                 sizeof(upl_page_info_t)));
2425                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2426                                 page_field_size =
2427                                         (page_field_size + 3) & 0xFFFFFFFC;
2428                                 bzero((char *)lite_list, page_field_size);
2429                                 upl->flags =
2430                                         UPL_LITE | UPL_INTERNAL;
2431                         } else {
2432                                 upl = upl_create(UPL_CREATE_INTERNAL, size);
2433                                 user_page_list = (upl_page_info_t *)
2434                                         (((uintptr_t)upl) + sizeof(struct upl));
2435                                 upl->flags = UPL_INTERNAL;
2436                         }
2437                 } else {
2438                         if(cntrl_flags & UPL_SET_LITE) {
2439                                 uintptr_t page_field_size;
2440                                 upl = upl_create(UPL_CREATE_LITE, size);
2441                                 lite_list = (wpl_array_t)
2442                                    (((uintptr_t)upl) + sizeof(struct upl));
2443                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2444                                 page_field_size =
2445                                         (page_field_size + 3) & 0xFFFFFFFC;
2446                                 bzero((char *)lite_list, page_field_size);
2447                                 upl->flags = UPL_LITE;
2448                         } else {
2449                                 upl = upl_create(UPL_CREATE_EXTERNAL, size);
2450                                 upl->flags = 0;
2451                         }
2452                 }
2453
2454                 if (object->phys_contiguous) {
2455                         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2456                             object->copy != VM_OBJECT_NULL) {
2457                                 /* Honor copy-on-write obligations */
2458
2459                                 /*
2460                                  * XXX FBDP
2461                                  * We could still have a race...
2462                                  * A is here building the UPL for a write().
2463                                  * A pushes the pages to the current copy
2464                                  * object.
2465                                  * A returns the UPL to the caller.
2466                                  * B comes along and establishes another
2467                                  * private mapping on this object, inserting
2468                                  * a new copy object between the original
2469                                  * object and the old copy object.
2470                                  * B reads a page and gets the original contents
2471                                  * from the original object.
2472                                  * A modifies the page in the original object.
2473                                  * B reads the page again and sees A's changes,
2474                                  * which is wrong...
2475                                  *
2476                                  * The problem is that the pages are not
2477                                  * marked "busy" in the original object, so
2478                                  * nothing prevents B from reading it before
2479                                  * before A's changes are completed.
2480                                  *
2481                                  * The "paging_in_progress" might protect us
2482                                  * from the insertion of a new copy object
2483                                  * though...  To be verified.
2484                                  */
2485                                 vm_object_lock_request(object,
2486                                                        offset,
2487                                                        size,
2488                                                        FALSE,
2489                                                        MEMORY_OBJECT_COPY_SYNC,
2490                                                        VM_PROT_NO_CHANGE);
2491                                 upl_cow_contiguous++;
2492                                 upl_cow_contiguous_pages += size >> PAGE_SHIFT;
2493                         }
2494
2495                         upl->map_object = object;
2496                         /* don't need any shadow mappings for this one */
2497                         /* since it is already I/O memory */
2498                         upl->flags |= UPL_DEVICE_MEMORY;
2499
2500
2501                         /* paging_in_progress protects paging_offset */
2502                         upl->offset = offset + object->paging_offset;
2503                         upl->size = size;
2504                         *upl_ptr = upl;
2505                         if(user_page_list) {
2506                                 user_page_list[0].phys_addr =
2507                                    (offset + object->shadow_offset)>>PAGE_SHIFT;
2508                                 user_page_list[0].device = TRUE;
2509                         }
2510
2511                         if(page_list_count != NULL) {
2512                                 if (upl->flags & UPL_INTERNAL) {
2513                                         *page_list_count = 0;
2514                                 } else {
2515                                         *page_list_count = 1;
2516                                 }
2517                         }
2518
2519                         return KERN_SUCCESS;
2520                 }
2521
2522                 if(user_page_list)
2523                         user_page_list[0].device = FALSE;
2524
2525                 if(cntrl_flags & UPL_SET_LITE) {
2526                         upl->map_object = object;
2527                 } else {
2528                         upl->map_object = vm_object_allocate(size);
2529                         /*
2530                          * No neeed to lock the new object: nobody else knows
2531                          * about it yet, so it's all ours so far.
2532                          */
2533                         upl->map_object->shadow = object;
2534                         upl->map_object->pageout = TRUE;
2535                         upl->map_object->can_persist = FALSE;
2536                         upl->map_object->copy_strategy =
2537                                         MEMORY_OBJECT_COPY_NONE;
2538                         upl->map_object->shadow_offset = offset;
2539                         upl->map_object->wimg_bits = object->wimg_bits;
2540                 }
2541
2542         }
2543         if (!(cntrl_flags & UPL_SET_LITE)) {
2544                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2545         }
2546
2547         /*
2548          * ENCRYPTED SWAP:
2549          * Just mark the UPL as "encrypted" here.
2550          * We'll actually encrypt the pages later,
2551          * in upl_encrypt(), when the caller has
2552          * selected which pages need to go to swap.
2553          */
2554         if (cntrl_flags & UPL_ENCRYPT) {
2555                 upl->flags |= UPL_ENCRYPTED;
2556         }
2557         if (cntrl_flags & UPL_FOR_PAGEOUT) {
2558                 upl->flags |= UPL_PAGEOUT;
2559         }
2560         vm_object_lock(object);
2561
2562         /* we can lock in the paging_offset once paging_in_progress is set */
2563         if(upl_ptr) {
2564                 upl->size = size;
2565                 upl->offset = offset + object->paging_offset;
2566                 *upl_ptr = upl;
2567 #ifdef UPL_DEBUG
2568                 queue_enter(&object->uplq, upl, upl_t, uplq);
2569 #endif /* UPL_DEBUG */
2570         }
2571
2572         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2573             object->copy != VM_OBJECT_NULL) {
2574                 /* Honor copy-on-write obligations */
2575
2576                 /*
2577                  * The caller is gathering these pages and
2578                  * might modify their contents.  We need to
2579                  * make sure that the copy object has its own
2580                  * private copies of these pages before we let
2581                  * the caller modify them.
2582                  */
2583                 vm_object_update(object,
2584                                  offset,
2585                                  size,
2586                                  NULL,
2587                                  NULL,
2588                                  FALSE, /* should_return */
2589                                  MEMORY_OBJECT_COPY_SYNC,
2590                                  VM_PROT_NO_CHANGE);
2591                 upl_cow++;
2592                 upl_cow_pages += size >> PAGE_SHIFT;
2593
2594         }
2595         /* remember which copy object we synchronized with */
2596         last_copy_object = object->copy;
2597
2598         entry = 0;
2599         if(cntrl_flags & UPL_COPYOUT_FROM) {
2600                 upl->flags |= UPL_PAGE_SYNC_DONE;
2601
2602                 while (xfer_size) {
2603                         if((alias_page == NULL) &&
2604                                 !(cntrl_flags & UPL_SET_LITE)) {
2605                                 vm_object_unlock(object);
2606                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2607                                 vm_object_lock(object);
2608                         }
2609                         if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
2610                                 dst_page->fictitious ||
2611                                 dst_page->absent ||
2612                                 dst_page->error ||
2613                                (dst_page->wire_count && !dst_page->pageout) ||
2614
2615                              ((!dst_page->inactive) && (cntrl_flags & UPL_FOR_PAGEOUT) &&
2616                                (dst_page->page_ticket != page_ticket) &&
2617                               ((dst_page->page_ticket+1) != page_ticket)) ) {
2618
2619                                 if (user_page_list)
2620                                         user_page_list[entry].phys_addr = 0;
2621                         } else {
2622                                 /*
2623                                  * grab this up front...
2624                                  * a high percentange of the time we're going to
2625                                  * need the hardware modification state a bit later
2626                                  * anyway... so we can eliminate an extra call into
2627                                  * the pmap layer by grabbing it here and recording it
2628                                  */
2629                                 refmod_state = pmap_get_refmod(dst_page->phys_page);
2630
2631                                 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
2632                                         /*
2633                                          * we're only asking for DIRTY pages to be returned
2634                                          */
2635
2636                                         if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
2637                                                 /*
2638                                                  * if we were the page stolen by vm_pageout_scan to be
2639                                                  * cleaned (as opposed to a buddy being clustered in
2640                                                  * or this request is not being driven by a PAGEOUT cluster
2641                                                  * then we only need to check for the page being diry or
2642                                                  * precious to decide whether to return it
2643                                                  */
2644                                                 if (dst_page->dirty || dst_page->precious ||
2645                                                     (refmod_state & VM_MEM_MODIFIED)) {
2646                                                         goto check_busy;
2647                                                 }
2648                                         }
2649                                         /*
2650                                          * this is a request for a PAGEOUT cluster and this page
2651                                          * is merely along for the ride as a 'buddy'... not only
2652                                          * does it have to be dirty to be returned, but it also
2653                                          * can't have been referenced recently... note that we've
2654                                          * already filtered above based on whether this page is
2655                                          * currently on the inactive queue or it meets the page
2656                                          * ticket (generation count) check
2657                                          */
2658                                         if ( !(refmod_state & VM_MEM_REFERENCED) &&
2659                                              ((refmod_state & VM_MEM_MODIFIED) ||
2660                                               dst_page->dirty || dst_page->precious) ) {
2661                                                 goto check_busy;
2662                                         }
2663                                         /*
2664                                          * if we reach here, we're not to return
2665                                          * the page... go on to the next one
2666                                          */
2667                                         if (user_page_list)
2668                                                 user_page_list[entry].phys_addr = 0;
2669                                         entry++;
2670                                         dst_offset += PAGE_SIZE_64;
2671                                         xfer_size -= PAGE_SIZE;
2672                                         continue;
2673                                 }
2674 check_busy:
2675                                 if(dst_page->busy &&
2676                                         (!(dst_page->list_req_pending &&
2677                                                 dst_page->pageout))) {
2678                                         if(cntrl_flags & UPL_NOBLOCK) {
2679                                                 if(user_page_list) {
2680                                                         user_page_list[entry].phys_addr = 0;
2681                                                 }
2682                                                 entry++;
2683                                                 dst_offset += PAGE_SIZE_64;
2684                                                 xfer_size -= PAGE_SIZE;
2685                                                 continue;
2686                                         }
2687                                         /*
2688                                          * someone else is playing with the
2689                                          * page.  We will have to wait.
2690                                          */
2691                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2692                                         continue;
2693                                 }
2694                                 /* Someone else already cleaning the page? */
2695                                 if((dst_page->cleaning || dst_page->absent ||
2696                                         dst_page->wire_count != 0) &&
2697                                         !dst_page->list_req_pending) {
2698                                    if(user_page_list) {
2699                                            user_page_list[entry].phys_addr = 0;
2700                                    }
2701                                    entry++;
2702                                    dst_offset += PAGE_SIZE_64;
2703                                    xfer_size -= PAGE_SIZE;
2704                                    continue;
2705                                 }
2706                                 /* eliminate all mappings from the */
2707                                 /* original object and its prodigy */
2708
2709                                 vm_page_lock_queues();
2710
2711                                 if (dst_page->pageout_queue == TRUE)
2712                                         /*
2713                                          * we've buddied up a page for a clustered pageout
2714                                          * that has already been moved to the pageout
2715                                          * queue by pageout_scan... we need to remove
2716                                          * it from the queue and drop the laundry count
2717                                          * on that queue
2718                                          */
2719                                         vm_pageout_queue_steal(dst_page);
2720 #if MACH_CLUSTER_STATS
2721                                 /* pageout statistics gathering.  count  */
2722                                 /* all the pages we will page out that   */
2723                                 /* were not counted in the initial       */
2724                                 /* vm_pageout_scan work                  */
2725                                 if(dst_page->list_req_pending)
2726                                         encountered_lrp = TRUE;
2727                                 if((dst_page->dirty ||
2728                                         (dst_page->object->internal &&
2729                                         dst_page->precious)) &&
2730                                         (dst_page->list_req_pending
2731                                         == FALSE)) {
2732                                         if(encountered_lrp) {
2733                                                 CLUSTER_STAT
2734                                                 (pages_at_higher_offsets++;)
2735                                         } else {
2736                                                 CLUSTER_STAT
2737                                                 (pages_at_lower_offsets++;)
2738                                         }
2739                                 }
2740 #endif
2741                                 /* Turn off busy indication on pending */
2742                                 /* pageout.  Note: we can only get here */
2743                                 /* in the request pending case.  */
2744                                 dst_page->list_req_pending = FALSE;
2745                                 dst_page->busy = FALSE;
2746                                 dst_page->cleaning = FALSE;
2747
2748                                 hw_dirty = refmod_state & VM_MEM_MODIFIED;
2749                                 dirty = hw_dirty ? TRUE : dst_page->dirty;
2750
2751                                 if(cntrl_flags & UPL_SET_LITE) {
2752                                         int     pg_num;
2753                                         pg_num = (dst_offset-offset)/PAGE_SIZE;
2754                                         lite_list[pg_num>>5] |=
2755                                                         1 << (pg_num & 31);
2756                                         if (hw_dirty)
2757                                                 pmap_clear_modify(dst_page->phys_page);
2758                                         /*
2759                                          * Record that this page has been
2760                                          * written out
2761                                          */
2762 #if     MACH_PAGEMAP
2763                                         vm_external_state_set(
2764                                                 object->existence_map,
2765                                                 dst_page->offset);
2766 #endif  /*MACH_PAGEMAP*/
2767
2768                                         /*
2769                                          * Mark original page as cleaning
2770                                          * in place.
2771                                          */
2772                                         dst_page->cleaning = TRUE;
2773                                         dst_page->dirty = TRUE;
2774                                         dst_page->precious = FALSE;
2775                                 } else {
2776                                         /* use pageclean setup, it is more */
2777                                         /* convenient even for the pageout */
2778                                         /* cases here */
2779
2780                                         vm_object_lock(upl->map_object);
2781                                         vm_pageclean_setup(dst_page,
2782                                                 alias_page, upl->map_object,
2783                                                 size - xfer_size);
2784                                         vm_object_unlock(upl->map_object);
2785
2786                                         alias_page->absent = FALSE;
2787                                         alias_page = NULL;
2788                                 }
2789
2790                                 if(!dirty) {
2791                                         dst_page->dirty = FALSE;
2792                                         dst_page->precious = TRUE;
2793                                 }
2794
2795                                 if(dst_page->pageout)
2796                                         dst_page->busy = TRUE;
2797
2798                                 if ( (cntrl_flags & UPL_ENCRYPT) ) {
2799                                         /*
2800                                          * ENCRYPTED SWAP:
2801                                          * We want to deny access to the target page
2802                                          * because its contents are about to be
2803                                          * encrypted and the user would be very
2804                                          * confused to see encrypted data instead
2805                                          * of their data.
2806                                          */
2807                                         dst_page->busy = TRUE;
2808                                 }
2809                                 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
2810                                         /*
2811                                          * deny access to the target page
2812                                          * while it is being worked on
2813                                          */
2814                                         if ((!dst_page->pageout) &&
2815                                             (dst_page->wire_count == 0)) {
2816                                                 dst_page->busy = TRUE;
2817                                                 dst_page->pageout = TRUE;
2818                                                 vm_page_wire(dst_page);
2819                                         }
2820                                 }
2821
2822                                 if(user_page_list) {
2823                                         user_page_list[entry].phys_addr
2824                                                 = dst_page->phys_page;
2825                                         user_page_list[entry].dirty =
2826                                                         dst_page->dirty;
2827                                         user_page_list[entry].pageout =
2828                                                         dst_page->pageout;
2829                                         user_page_list[entry].absent =
2830                                                         dst_page->absent;
2831                                         user_page_list[entry].precious =
2832                                                         dst_page->precious;
2833                                 }
2834                                 vm_page_unlock_queues();
2835
2836                                 /*
2837                                  * ENCRYPTED SWAP:
2838                                  * The caller is gathering this page and might
2839                                  * access its contents later on.  Decrypt the
2840                                  * page before adding it to the UPL, so that
2841                                  * the caller never sees encrypted data.
2842                                  */
2843                                 if (! (cntrl_flags & UPL_ENCRYPT) &&
2844                                     dst_page->encrypted) {
2845                                         assert(dst_page->busy);
2846
2847                                         vm_page_decrypt(dst_page, 0);
2848                                         vm_page_decrypt_for_upl_counter++;
2849
2850                                         /*
2851                                          * Retry this page, since anything
2852                                          * could have changed while we were
2853                                          * decrypting.
2854                                          */
2855                                         continue;
2856                                 }
2857                         }
2858                         entry++;
2859                         dst_offset += PAGE_SIZE_64;
2860                         xfer_size -= PAGE_SIZE;
2861                 }
2862         } else {
2863                 while (xfer_size) {
2864                         if((alias_page == NULL) &&
2865                                 !(cntrl_flags & UPL_SET_LITE)) {
2866                                 vm_object_unlock(object);
2867                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2868                                 vm_object_lock(object);
2869                         }
2870
2871                         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2872                             object->copy != last_copy_object) {
2873                                 /* Honor copy-on-write obligations */
2874
2875                                 /*
2876                                  * The copy object has changed since we
2877                                  * last synchronized for copy-on-write.
2878                                  * Another copy object might have been
2879                                  * inserted while we released the object's
2880                                  * lock.  Since someone could have seen the
2881                                  * original contents of the remaining pages
2882                                  * through that new object, we have to
2883                                  * synchronize with it again for the remaining
2884                                  * pages only.  The previous pages are "busy"
2885                                  * so they can not be seen through the new
2886                                  * mapping.  The new mapping will see our
2887                                  * upcoming changes for those previous pages,
2888                                  * but that's OK since they couldn't see what
2889                                  * was there before.  It's just a race anyway
2890                                  * and there's no guarantee of consistency or
2891                                  * atomicity.  We just don't want new mappings
2892                                  * to see both the *before* and *after* pages.
2893                                  */
2894                                 if (object->copy != VM_OBJECT_NULL) {
2895                                         vm_object_update(
2896                                                 object,
2897                                                 dst_offset,/* current offset */
2898                                                 xfer_size, /* remaining size */
2899                                                 NULL,
2900                                                 NULL,
2901                                                 FALSE,     /* should_return */
2902                                                 MEMORY_OBJECT_COPY_SYNC,
2903                                                 VM_PROT_NO_CHANGE);
2904                                         upl_cow_again++;
2905                                         upl_cow_again_pages +=
2906                                                 xfer_size >> PAGE_SHIFT;
2907                                 }
2908                                 /* remember the copy object we synced with */
2909                                 last_copy_object = object->copy;
2910                         }
2911
2912                         dst_page = vm_page_lookup(object, dst_offset);
2913
2914                         if(dst_page != VM_PAGE_NULL) {
2915                                 if((cntrl_flags & UPL_RET_ONLY_ABSENT) &&
2916                                         !((dst_page->list_req_pending)
2917                                                 && (dst_page->absent))) {
2918                                         /* we are doing extended range */
2919                                         /* requests.  we want to grab  */
2920                                         /* pages around some which are */
2921                                         /* already present.  */
2922                                         if(user_page_list) {
2923                                                 user_page_list[entry].phys_addr = 0;
2924                                         }
2925                                         entry++;
2926                                         dst_offset += PAGE_SIZE_64;
2927                                         xfer_size -= PAGE_SIZE;
2928                                         continue;
2929                                 }
2930                                 if((dst_page->cleaning) &&
2931                                    !(dst_page->list_req_pending)) {
2932                                         /*someone else is writing to the */
2933                                         /* page.  We will have to wait.  */
2934                                         PAGE_SLEEP(object,dst_page,THREAD_UNINT);
2935                                         continue;
2936                                 }
2937                                 if ((dst_page->fictitious &&
2938                                      dst_page->list_req_pending)) {
2939                                         /* dump the fictitious page */
2940                                         dst_page->list_req_pending = FALSE;
2941                                         dst_page->clustered = FALSE;
2942
2943                                         vm_page_lock_queues();
2944                                         vm_page_free(dst_page);
2945                                         vm_page_unlock_queues();
2946
2947                                         dst_page = NULL;
2948                                 } else if ((dst_page->absent &&
2949                                             dst_page->list_req_pending)) {
2950                                         /* the default_pager case */
2951                                         dst_page->list_req_pending = FALSE;
2952                                         dst_page->busy = FALSE;
2953                                 }
2954                         }
2955                         if(dst_page == VM_PAGE_NULL) {
2956                                 if(object->private) {
2957                                         /*
2958                                          * This is a nasty wrinkle for users
2959                                          * of upl who encounter device or
2960                                          * private memory however, it is
2961                                          * unavoidable, only a fault can
2962                                          * reslove the actual backing
2963                                          * physical page by asking the
2964                                          * backing device.
2965                                          */
2966                                         if(user_page_list) {
2967                                                 user_page_list[entry].phys_addr = 0;
2968                                         }
2969                                         entry++;
2970                                         dst_offset += PAGE_SIZE_64;
2971                                         xfer_size -= PAGE_SIZE;
2972                                         continue;
2973                                 }
2974                                 /* need to allocate a page */
2975                                 dst_page = vm_page_alloc(object, dst_offset);
2976                                 if (dst_page == VM_PAGE_NULL) {
2977                                         vm_object_unlock(object);
2978                                         VM_PAGE_WAIT();
2979                                         vm_object_lock(object);
2980                                         continue;
2981                                 }
2982                                 dst_page->busy = FALSE;
2983 #if 0
2984                                 if(cntrl_flags & UPL_NO_SYNC) {
2985                                         dst_page->page_lock = 0;
2986                                         dst_page->unlock_request = 0;
2987                                 }
2988 #endif
2989                                 if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
2990                                         /*
2991                                          * if UPL_RET_ONLY_ABSENT was specified,
2992                                          * than we're definitely setting up a
2993                                          * upl for a clustered read/pagein
2994                                          * operation... mark the pages as clustered
2995                                          * so vm_fault can correctly attribute them
2996                                          * to the 'pagein' bucket the first time
2997                                          * a fault happens on them
2998                                          */
2999                                         dst_page->clustered = TRUE;
3000                                 }
3001                                 dst_page->absent = TRUE;
3002                                 object->absent_count++;
3003                         }
3004 #if 1
3005                         if(cntrl_flags & UPL_NO_SYNC) {
3006                                 dst_page->page_lock = 0;
3007                                 dst_page->unlock_request = 0;
3008                         }
3009 #endif /* 1 */
3010
3011                         /*
3012                          * ENCRYPTED SWAP:
3013                          */
3014                         if (cntrl_flags & UPL_ENCRYPT) {
3015                                 /*
3016                                  * The page is going to be encrypted when we
3017                                  * get it from the pager, so mark it so.
3018                                  */
3019                                 dst_page->encrypted = TRUE;
3020                         } else {
3021                                 /*
3022                                  * Otherwise, the page will not contain
3023                                  * encrypted data.
3024                                  */
3025                                 dst_page->encrypted = FALSE;
3026                         }
3027
3028                         dst_page->overwriting = TRUE;
3029                         if(dst_page->fictitious) {
3030                                 panic("need corner case for fictitious page");
3031                         }
3032                         if(dst_page->page_lock) {
3033                                 do_m_lock = TRUE;
3034                         }
3035                         if(upl_ptr) {
3036
3037                                 /* eliminate all mappings from the */
3038                                 /* original object and its prodigy */
3039
3040                                 if(dst_page->busy) {
3041                                         /*someone else is playing with the */
3042                                         /* page.  We will have to wait.    */
3043                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3044                                         continue;
3045                                 }
3046                                 vm_page_lock_queues();
3047
3048                                 if( !(cntrl_flags & UPL_FILE_IO))
3049                                         hw_dirty = pmap_disconnect(dst_page->phys_page) & VM_MEM_MODIFIED;
3050                                 else
3051                                         hw_dirty = pmap_get_refmod(dst_page->phys_page) & VM_MEM_MODIFIED;
3052                                 dirty = hw_dirty ? TRUE : dst_page->dirty;
3053
3054                                 if(cntrl_flags & UPL_SET_LITE) {
3055                                         int     pg_num;
3056                                         pg_num = (dst_offset-offset)/PAGE_SIZE;
3057                                         lite_list[pg_num>>5] |=
3058                                                         1 << (pg_num & 31);
3059                                         if (hw_dirty)
3060                                                 pmap_clear_modify(dst_page->phys_page);
3061                                         /*
3062                                          * Record that this page has been
3063                                          * written out
3064                                          */
3065 #if     MACH_PAGEMAP
3066                                         vm_external_state_set(
3067                                                 object->existence_map,
3068                                                 dst_page->offset);
3069 #endif  /*MACH_PAGEMAP*/
3070
3071                                         /*
3072                                          * Mark original page as cleaning
3073                                          * in place.
3074                                          */
3075                                         dst_page->cleaning = TRUE;
3076                                         dst_page->dirty = TRUE;
3077                                         dst_page->precious = FALSE;
3078                                 } else {
3079                                         /* use pageclean setup, it is more */
3080                                         /* convenient even for the pageout */
3081                                         /* cases here */
3082                                         vm_object_lock(upl->map_object);
3083                                         vm_pageclean_setup(dst_page,
3084                                                 alias_page, upl->map_object,
3085                                                 size - xfer_size);
3086                                         vm_object_unlock(upl->map_object);
3087
3088                                         alias_page->absent = FALSE;
3089                                         alias_page = NULL;
3090                                 }
3091
3092                                 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
3093                                         /* clean in place for read implies   */
3094                                         /* that a write will be done on all  */
3095                                         /* the pages that are dirty before   */
3096                                         /* a upl commit is done.  The caller */
3097                                         /* is obligated to preserve the      */
3098                                         /* contents of all pages marked      */
3099                                         /* dirty. */
3100                                         upl->flags |= UPL_CLEAR_DIRTY;
3101                                 }
3102
3103                                 if(!dirty) {
3104                                         dst_page->dirty = FALSE;
3105                                         dst_page->precious = TRUE;
3106                                 }
3107
3108                                 if (dst_page->wire_count == 0) {
3109                                    /* deny access to the target page while */
3110                                    /* it is being worked on */
3111                                         dst_page->busy = TRUE;
3112                                 } else {
3113                                         vm_page_wire(dst_page);
3114                                 }
3115                                 if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
3116                                         /*
3117                                          * expect the page not to be used
3118                                          * since it's coming in as part
3119                                          * of a cluster and could be
3120                                          * speculative... pages that
3121                                          * are 'consumed' will get a
3122                                          * hardware reference
3123                                          */
3124                                         dst_page->reference = FALSE;
3125                                 } else {
3126                                         /*
3127                                          * expect the page to be used
3128                                          */
3129                                         dst_page->reference = TRUE;
3130                                 }
3131                                 dst_page->precious =
3132                                         (cntrl_flags & UPL_PRECIOUS)
3133                                                         ? TRUE : FALSE;
3134                                 if(user_page_list) {
3135                                         user_page_list[entry].phys_addr
3136                                                 = dst_page->phys_page;
3137                                         user_page_list[entry].dirty =
3138                                                         dst_page->dirty;
3139                                         user_page_list[entry].pageout =
3140                                                         dst_page->pageout;
3141                                         user_page_list[entry].absent =
3142                                                         dst_page->absent;
3143                                         user_page_list[entry].precious =
3144                                                         dst_page->precious;
3145                                 }
3146                                 vm_page_unlock_queues();
3147                         }
3148                         entry++;
3149                         dst_offset += PAGE_SIZE_64;
3150                         xfer_size -= PAGE_SIZE;
3151                 }
3152         }
3153
3154         if (upl->flags & UPL_INTERNAL) {
3155                 if(page_list_count != NULL)
3156                         *page_list_count = 0;
3157         } else if (*page_list_count > entry) {
3158                 if(page_list_count != NULL)
3159                         *page_list_count = entry;
3160         }
3161
3162         if(alias_page != NULL) {
3163                 vm_page_lock_queues();
3164                 vm_page_free(alias_page);
3165                 vm_page_unlock_queues();
3166         }
3167
3168         if(do_m_lock) {
3169            vm_prot_t    access_required;
3170            /* call back all associated pages from other users of the pager */
3171            /* all future updates will be on data which is based on the     */
3172            /* changes we are going to make here. Note: it is assumed that  */
3173            /* we already hold copies of the data so we will not be seeing  */
3174            /* an avalanche of incoming data from the pager */
3175            access_required = (cntrl_flags & UPL_COPYOUT_FROM)
3176                                         ? VM_PROT_READ : VM_PROT_WRITE;
3177            while (TRUE) {
3178                 kern_return_t   rc;
3179
3180                 if(!object->pager_ready) {
3181                    wait_result_t wait_result;
3182
3183                    wait_result = vm_object_sleep(object,
3184                                                 VM_OBJECT_EVENT_PAGER_READY,
3185                                                 THREAD_UNINT);
3186                    if (wait_result !=  THREAD_AWAKENED) {
3187                         vm_object_unlock(object);
3188                         return KERN_FAILURE;
3189                    }
3190                    continue;
3191                 }
3192
3193                 vm_object_unlock(object);
3194                 rc = memory_object_data_unlock(
3195                         object->pager,
3196                         dst_offset + object->paging_offset,
3197                         size,
3198                         access_required);
3199                 if (rc != KERN_SUCCESS && rc != MACH_SEND_INTERRUPTED)
3200                         return KERN_FAILURE;
3201                 vm_object_lock(object);
3202
3203                 if (rc == KERN_SUCCESS)
3204                         break;
3205            }
3206
3207            /* lets wait on the last page requested */
3208            /* NOTE: we will have to update lock completed routine to signal */
3209            if(dst_page != VM_PAGE_NULL &&
3210                 (access_required & dst_page->page_lock) != access_required) {
3211                 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
3212                 vm_object_unlock(object);
3213                 thread_block(THREAD_CONTINUE_NULL);
3214                 return KERN_SUCCESS;
3215            }
3216         }
3217
3218         vm_object_unlock(object);
3219         return KERN_SUCCESS;
3220 }
3221
3222 /* JMM - Backward compatability for now */
3223 kern_return_t
3224 vm_fault_list_request(                  /* forward */
3225         memory_object_control_t         control,
3226         vm_object_offset_t      offset,
3227         upl_size_t              size,
3228         upl_t                   *upl_ptr,
3229         upl_page_info_t         **user_page_list_ptr,
3230         int                     page_list_count,
3231         int                     cntrl_flags);
3232 kern_return_t
3233 vm_fault_list_request(
3234         memory_object_control_t         control,
3235         vm_object_offset_t      offset,
3236         upl_size_t              size,
3237         upl_t                   *upl_ptr,
3238         upl_page_info_t         **user_page_list_ptr,
3239         int                     page_list_count,
3240         int                     cntrl_flags)
3241 {
3242         int                     local_list_count;
3243         upl_page_info_t         *user_page_list;
3244         kern_return_t           kr;
3245
3246         if (user_page_list_ptr != NULL) {
3247                 local_list_count = page_list_count;
3248                 user_page_list = *user_page_list_ptr;
3249         } else {
3250                 local_list_count = 0;
3251                 user_page_list = NULL;
3252         }
3253         kr =  memory_object_upl_request(control,
3254                                 offset,
3255                                 size,
3256                                 upl_ptr,
3257                                 user_page_list,
3258                                 &local_list_count,
3259                                 cntrl_flags);
3260
3261         if(kr != KERN_SUCCESS)
3262                 return kr;
3263
3264         if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3265                 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3266         }
3267
3268         return KERN_SUCCESS;
3269 }
3270
3271
3272
3273 /*
3274  *      Routine:        vm_object_super_upl_request
3275  *      Purpose:
3276  *              Cause the population of a portion of a vm_object
3277  *              in much the same way as memory_object_upl_request.
3278  *              Depending on the nature of the request, the pages
3279  *              returned may be contain valid data or be uninitialized.
3280  *              However, the region may be expanded up to the super
3281  *              cluster size provided.
3282  */
3283
3284 __private_extern__ kern_return_t
3285 vm_object_super_upl_request(
3286         vm_object_t object,
3287         vm_object_offset_t      offset,
3288         upl_size_t              size,
3289         upl_size_t              super_cluster,
3290         upl_t                   *upl,
3291         upl_page_info_t         *user_page_list,
3292         unsigned int            *page_list_count,
3293         int                     cntrl_flags)
3294 {
3295         vm_page_t       target_page;
3296         int             ticket;
3297
3298
3299         if(object->paging_offset > offset)
3300                 return KERN_FAILURE;
3301
3302         assert(object->paging_in_progress);
3303         offset = offset - object->paging_offset;
3304
3305         if(cntrl_flags & UPL_FOR_PAGEOUT) {
3306
3307                 vm_object_lock(object);
3308
3309                 if((target_page = vm_page_lookup(object, offset))
3310                                                         != VM_PAGE_NULL) {
3311                         ticket = target_page->page_ticket;
3312                         cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
3313                         cntrl_flags = cntrl_flags |
3314                                 ((ticket << UPL_PAGE_TICKET_SHIFT)
3315                                                         & UPL_PAGE_TICKET_MASK);
3316                 }
3317                 vm_object_unlock(object);
3318         }
3319
3320         if (super_cluster > size) {
3321
3322                 vm_object_offset_t      base_offset;
3323                 upl_size_t              super_size;
3324
3325                 base_offset = (offset &
3326                         ~((vm_object_offset_t) super_cluster - 1));
3327                 super_size = (offset+size) > (base_offset + super_cluster) ?
3328                                 super_cluster<<1 : super_cluster;
3329                 super_size = ((base_offset + super_size) > object->size) ?
3330                                 (object->size - base_offset) : super_size;
3331                 if(offset > (base_offset + super_size))
3332                    panic("vm_object_super_upl_request: Missed target pageout"
3333                          " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3334                          offset, base_offset, super_size, super_cluster,
3335                          size, object->paging_offset);
3336                 /*
3337                  * apparently there is a case where the vm requests a
3338                  * page to be written out who's offset is beyond the
3339                  * object size
3340                  */
3341                 if((offset + size) > (base_offset + super_size))
3342                    super_size = (offset + size) - base_offset;
3343
3344                 offset = base_offset;
3345                 size = super_size;
3346         }
3347         return vm_object_upl_request(object, offset, size,
3348                                      upl, user_page_list, page_list_count,
3349                                      cntrl_flags);
3350 }
3351
3352
3353 kern_return_t
3354 vm_map_create_upl(
3355         vm_map_t                map,
3356         vm_map_address_t        offset,
3357         upl_size_t              *upl_size,
3358         upl_t                   *upl,
3359         upl_page_info_array_t   page_list,
3360         unsigned int            *count,
3361         int                     *flags)
3362 {
3363         vm_map_entry_t  entry;
3364         int             caller_flags;
3365         int             force_data_sync;
3366         int             sync_cow_data;
3367         vm_object_t     local_object;
3368         vm_map_offset_t local_offset;
3369         vm_map_offset_t local_start;
3370         kern_return_t   ret;
3371
3372         caller_flags = *flags;
3373
3374         if (caller_flags & ~UPL_VALID_FLAGS) {
3375                 /*
3376                  * For forward compatibility's sake,
3377                  * reject any unknown flag.
3378                  */
3379                 return KERN_INVALID_VALUE;
3380         }
3381
3382         force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3383         sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3384
3385         if(upl == NULL)
3386                 return KERN_INVALID_ARGUMENT;
3387
3388
3389 REDISCOVER_ENTRY:
3390         vm_map_lock(map);
3391         if (vm_map_lookup_entry(map, offset, &entry)) {
3392                 if (entry->object.vm_object == VM_OBJECT_NULL ||
3393                         !entry->object.vm_object->phys_contiguous) {
3394                         if((*upl_size/page_size) > MAX_UPL_TRANSFER) {
3395                                 *upl_size = MAX_UPL_TRANSFER * page_size;
3396                         }
3397                 }
3398                 if((entry->vme_end - offset) < *upl_size) {
3399                         *upl_size = entry->vme_end - offset;
3400                 }
3401                 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3402                         if (entry->object.vm_object == VM_OBJECT_NULL) {
3403                                 *flags = 0;
3404                         } else if (entry->object.vm_object->private) {
3405                                 *flags = UPL_DEV_MEMORY;
3406                                 if (entry->object.vm_object->phys_contiguous) {
3407                                         *flags |= UPL_PHYS_CONTIG;
3408                                 }
3409                         } else  {
3410                                 *flags = 0;
3411                         }
3412                         vm_map_unlock(map);
3413                         return KERN_SUCCESS;
3414                 }
3415                 /*
3416                  *      Create an object if necessary.
3417                  */
3418                 if (entry->object.vm_object == VM_OBJECT_NULL) {
3419                         entry->object.vm_object = vm_object_allocate(
3420                                 (vm_size_t)(entry->vme_end - entry->vme_start));
3421                         entry->offset = 0;
3422                 }
3423                 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3424                         if (!(entry->protection & VM_PROT_WRITE)) {
3425                                 vm_map_unlock(map);
3426                                 return KERN_PROTECTION_FAILURE;
3427                         }
3428                         if (entry->needs_copy)  {
3429                                 vm_map_t                local_map;
3430                                 vm_object_t             object;
3431                                 vm_map_offset_t         offset_hi;
3432                                 vm_map_offset_t         offset_lo;
3433                                 vm_object_offset_t      new_offset;
3434                                 vm_prot_t               prot;
3435                                 boolean_t               wired;
3436                                 vm_behavior_t           behavior;
3437                                 vm_map_version_t        version;
3438                                 vm_map_t                real_map;
3439
3440                                 local_map = map;
3441                                 vm_map_lock_write_to_read(map);
3442                                 if(vm_map_lookup_locked(&local_map,
3443                                         offset, VM_PROT_WRITE,
3444                                         &version, &object,
3445                                         &new_offset, &prot, &wired,
3446                                         &behavior, &offset_lo,
3447                                         &offset_hi, &real_map)) {
3448                                         vm_map_unlock(local_map);
3449                                         return KERN_FAILURE;
3450                                 }
3451                                 if (real_map != map) {
3452                                         vm_map_unlock(real_map);
3453                                 }
3454                                 vm_object_unlock(object);
3455                                 vm_map_unlock(local_map);
3456
3457                                 goto REDISCOVER_ENTRY;
3458                         }
3459                 }
3460                 if (entry->is_sub_map) {
3461                         vm_map_t        submap;
3462
3463                         submap = entry->object.sub_map;
3464                         local_start = entry->vme_start;
3465                         local_offset = entry->offset;
3466                         vm_map_reference(submap);
3467                         vm_map_unlock(map);
3468
3469                         ret = (vm_map_create_upl(submap,
3470                                 local_offset + (offset - local_start),
3471                                 upl_size, upl, page_list, count,
3472                                 flags));
3473
3474                         vm_map_deallocate(submap);
3475                         return ret;
3476                 }
3477
3478                 if (sync_cow_data) {
3479                         if (entry->object.vm_object->shadow
3480                                     || entry->object.vm_object->copy) {
3481
3482                                 local_object = entry->object.vm_object;
3483                                 local_start = entry->vme_start;
3484                                 local_offset = entry->offset;
3485                                 vm_object_reference(local_object);
3486                                 vm_map_unlock(map);
3487
3488                                 if (entry->object.vm_object->shadow &&
3489                                            entry->object.vm_object->copy) {
3490                                    vm_object_lock_request(
3491                                         local_object->shadow,
3492                                         (vm_object_offset_t)
3493                                         ((offset - local_start) +
3494                                          local_offset) +
3495                                         local_object->shadow_offset,
3496                                         *upl_size, FALSE,
3497                                         MEMORY_OBJECT_DATA_SYNC,
3498                                         VM_PROT_NO_CHANGE);
3499                                 }
3500                                 sync_cow_data = FALSE;
3501                                 vm_object_deallocate(local_object);
3502                                 goto REDISCOVER_ENTRY;
3503                         }
3504                 }
3505
3506                 if (force_data_sync) {
3507
3508                         local_object = entry->object.vm_object;
3509                         local_start = entry->vme_start;
3510                         local_offset = entry->offset;
3511                         vm_object_reference(local_object);
3512                         vm_map_unlock(map);
3513
3514                         vm_object_lock_request(
3515                                    local_object,
3516                                    (vm_object_offset_t)
3517                                    ((offset - local_start) + local_offset),
3518                                    (vm_object_size_t)*upl_size, FALSE,
3519                                    MEMORY_OBJECT_DATA_SYNC,
3520                                    VM_PROT_NO_CHANGE);
3521                         force_data_sync = FALSE;
3522                         vm_object_deallocate(local_object);
3523                         goto REDISCOVER_ENTRY;
3524                 }
3525
3526                 if(!(entry->object.vm_object->private)) {
3527                         if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
3528                                 *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
3529                         if(entry->object.vm_object->phys_contiguous) {
3530                                 *flags = UPL_PHYS_CONTIG;
3531                         } else {
3532                                 *flags = 0;
3533                         }
3534                 } else {
3535                         *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
3536                 }
3537                 local_object = entry->object.vm_object;
3538                 local_offset = entry->offset;
3539                 local_start = entry->vme_start;
3540                 vm_object_reference(local_object);
3541                 vm_map_unlock(map);
3542                 if(caller_flags & UPL_SET_IO_WIRE) {
3543                         ret = (vm_object_iopl_request(local_object,
3544                                 (vm_object_offset_t)
3545                                    ((offset - local_start)
3546                                                 + local_offset),
3547                                 *upl_size,
3548                                 upl,
3549                                 page_list,
3550                                 count,
3551                                 caller_flags));
3552                 } else {
3553                         ret = (vm_object_upl_request(local_object,
3554                                 (vm_object_offset_t)
3555                                    ((offset - local_start)
3556                                                 + local_offset),
3557                                 *upl_size,
3558                                 upl,
3559                                 page_list,
3560                                 count,
3561                                 caller_flags));
3562                 }
3563                 vm_object_deallocate(local_object);
3564                 return(ret);
3565         }
3566
3567         vm_map_unlock(map);
3568         return(KERN_FAILURE);
3569
3570 }
3571
3572 /*
3573  * Internal routine to enter a UPL into a VM map.
3574  *
3575  * JMM - This should just be doable through the standard
3576  * vm_map_enter() API.
3577  */
3578 kern_return_t
3579 vm_map_enter_upl(
3580         vm_map_t                map,
3581         upl_t                   upl,
3582         vm_map_offset_t *dst_addr)
3583 {
3584         vm_map_size_t           size;
3585         vm_object_offset_t      offset;
3586         vm_map_offset_t         addr;
3587         vm_page_t               m;
3588         kern_return_t           kr;
3589
3590         if (upl == UPL_NULL)
3591                 return KERN_INVALID_ARGUMENT;
3592
3593         upl_lock(upl);
3594
3595         /* check to see if already mapped */
3596         if(UPL_PAGE_LIST_MAPPED & upl->flags) {
3597                 upl_unlock(upl);
3598                 return KERN_FAILURE;
3599         }
3600
3601         if((!(upl->map_object->pageout)) &&
3602                 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3603                                         (upl->map_object->phys_contiguous))) {
3604                 vm_object_t             object;
3605                 vm_page_t               alias_page;
3606                 vm_object_offset_t      new_offset;
3607                 int                     pg_num;
3608                 wpl_array_t             lite_list;
3609
3610                 if(upl->flags & UPL_INTERNAL) {
3611                         lite_list = (wpl_array_t)
3612                                 ((((uintptr_t)upl) + sizeof(struct upl))
3613                                 + ((upl->size/PAGE_SIZE)
3614                                                 * sizeof(upl_page_info_t)));
3615                 } else {
3616                         lite_list = (wpl_array_t)
3617                                 (((uintptr_t)upl) + sizeof(struct upl));
3618                 }
3619                 object = upl->map_object;
3620                 upl->map_object = vm_object_allocate(upl->size);
3621                 vm_object_lock(upl->map_object);
3622                 upl->map_object->shadow = object;
3623                 upl->map_object->pageout = TRUE;
3624                 upl->map_object->can_persist = FALSE;
3625                 upl->map_object->copy_strategy =
3626                                 MEMORY_OBJECT_COPY_NONE;
3627                 upl->map_object->shadow_offset =
3628                                 upl->offset - object->paging_offset;
3629                 upl->map_object->wimg_bits = object->wimg_bits;
3630                 offset = upl->map_object->shadow_offset;
3631                 new_offset = 0;
3632                 size = upl->size;
3633
3634                 vm_object_lock(object);
3635
3636                 while(size) {
3637                    pg_num = (new_offset)/PAGE_SIZE;
3638                    if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3639                         vm_object_unlock(object);
3640                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
3641                         vm_object_lock(object);
3642                         m = vm_page_lookup(object, offset);
3643                         if (m == VM_PAGE_NULL) {
3644                                 panic("vm_upl_map: page missing\n");
3645                         }
3646
3647                         vm_object_paging_begin(object);
3648
3649                         /*
3650                         * Convert the fictitious page to a private
3651                          * shadow of the real page.
3652                          */
3653                         assert(alias_page->fictitious);
3654                         alias_page->fictitious = FALSE;
3655                         alias_page->private = TRUE;
3656                         alias_page->pageout = TRUE;
3657                         alias_page->phys_page = m->phys_page;
3658
3659                         vm_page_lock_queues();
3660                         vm_page_wire(alias_page);
3661                         vm_page_unlock_queues();
3662
3663                         /*
3664                          * ENCRYPTED SWAP:
3665                          * The virtual page ("m") has to be wired in some way
3666                          * here or its physical page ("m->phys_page") could
3667                          * be recycled at any time.
3668                          * Assuming this is enforced by the caller, we can't
3669                          * get an encrypted page here.  Since the encryption
3670                          * key depends on the VM page's "pager" object and
3671                          * the "paging_offset", we couldn't handle 2 pageable
3672                          * VM pages (with different pagers and paging_offsets)
3673                          * sharing the same physical page:  we could end up
3674                          * encrypting with one key (via one VM page) and
3675                          * decrypting with another key (via the alias VM page).
3676                          */
3677                         ASSERT_PAGE_DECRYPTED(m);
3678
3679                         vm_page_insert(alias_page,
3680                                         upl->map_object, new_offset);
3681                         assert(!alias_page->wanted);
3682                         alias_page->busy = FALSE;
3683                         alias_page->absent = FALSE;
3684                    }
3685
3686                    size -= PAGE_SIZE;
3687                    offset += PAGE_SIZE_64;
3688                    new_offset += PAGE_SIZE_64;
3689                 }
3690                 vm_object_unlock(object);
3691                 vm_object_unlock(upl->map_object);
3692         }
3693         if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3694                 offset = upl->offset - upl->map_object->paging_offset;
3695         else
3696                 offset = 0;
3697
3698         size = upl->size;
3699
3700         vm_object_lock(upl->map_object);
3701         upl->map_object->ref_count++;
3702         vm_object_res_reference(upl->map_object);
3703         vm_object_unlock(upl->map_object);
3704
3705         *dst_addr = 0;
3706
3707
3708         /* NEED A UPL_MAP ALIAS */
3709         kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3710                 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
3711                 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3712
3713         if (kr != KERN_SUCCESS) {
3714                 upl_unlock(upl);
3715                 return(kr);
3716         }
3717
3718         vm_object_lock(upl->map_object);
3719
3720         for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
3721                 m = vm_page_lookup(upl->map_object, offset);
3722                 if(m) {
3723                    unsigned int cache_attr;
3724                    cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3725
3726                    PMAP_ENTER(map->pmap, addr,
3727                                 m, VM_PROT_ALL,
3728                                 cache_attr, TRUE);
3729                 }
3730                 offset+=PAGE_SIZE_64;
3731         }
3732         vm_object_unlock(upl->map_object);
3733
3734         upl->ref_count++;  /* hold a reference for the mapping */
3735         upl->flags |= UPL_PAGE_LIST_MAPPED;
3736         upl->kaddr = *dst_addr;
3737         upl_unlock(upl);
3738         return KERN_SUCCESS;
3739 }
3740
3741 /*
3742  * Internal routine to remove a UPL mapping from a VM map.
3743  *
3744  * XXX - This should just be doable through a standard
3745  * vm_map_remove() operation.  Otherwise, implicit clean-up
3746  * of the target map won't be able to correctly remove
3747  * these (and release the reference on the UPL).  Having
3748  * to do this means we can't map these into user-space
3749  * maps yet.
3750  */
3751 kern_return_t
3752 vm_map_remove_upl(
3753         vm_map_t        map,
3754         upl_t           upl)
3755 {
3756         vm_address_t    addr;
3757         upl_size_t      size;
3758
3759         if (upl == UPL_NULL)
3760                 return KERN_INVALID_ARGUMENT;
3761
3762         upl_lock(upl);
3763         if(upl->flags & UPL_PAGE_LIST_MAPPED) {
3764                 addr = upl->kaddr;
3765                 size = upl->size;
3766                 assert(upl->ref_count > 1);
3767                 upl->ref_count--;               /* removing mapping ref */
3768                 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3769                 upl->kaddr = (vm_offset_t) 0;
3770                 upl_unlock(upl);
3771
3772                 vm_map_remove(  map,
3773                                 vm_map_trunc_page(addr),
3774                                 vm_map_round_page(addr + size),
3775                                 VM_MAP_NO_FLAGS);
3776                 return KERN_SUCCESS;
3777         }
3778         upl_unlock(upl);
3779         return KERN_FAILURE;
3780 }
3781
3782 kern_return_t
3783 upl_commit_range(
3784         upl_t                   upl,
3785         upl_offset_t            offset,
3786         upl_size_t              size,
3787         int                     flags,
3788         upl_page_info_t         *page_list,
3789         mach_msg_type_number_t  count,
3790         boolean_t               *empty)
3791 {
3792         upl_size_t              xfer_size = size;
3793         vm_object_t             shadow_object;
3794         vm_object_t             object = upl->map_object;
3795         vm_object_offset_t      target_offset;
3796         int                     entry;
3797         wpl_array_t             lite_list;
3798         int                     occupied;
3799         int                     delayed_unlock = 0;
3800         int                     clear_refmod = 0;
3801         boolean_t               shadow_internal;
3802
3803         *empty = FALSE;
3804
3805         if (upl == UPL_NULL)
3806                 return KERN_INVALID_ARGUMENT;
3807
3808
3809         if (count == 0)
3810                 page_list = NULL;
3811
3812         if (object->pageout) {
3813                 shadow_object = object->shadow;
3814         } else {
3815                 shadow_object = object;
3816         }
3817
3818         upl_lock(upl);
3819
3820         if (upl->flags & UPL_ACCESS_BLOCKED) {
3821                 /*
3822                  * We used this UPL to block access to the pages by marking
3823                  * them "busy".  Now we need to clear the "busy" bit to allow
3824                  * access to these pages again.
3825                  */
3826                 flags |= UPL_COMMIT_ALLOW_ACCESS;
3827         }
3828
3829         if (upl->flags & UPL_CLEAR_DIRTY)
3830                 flags |= UPL_COMMIT_CLEAR_DIRTY;
3831
3832         if (upl->flags & UPL_DEVICE_MEMORY) {
3833                 xfer_size = 0;
3834         } else if ((offset + size) > upl->size) {
3835                 upl_unlock(upl);
3836                 return KERN_FAILURE;
3837         }
3838
3839         if (upl->flags & UPL_INTERNAL) {
3840                 lite_list = (wpl_array_t)
3841                         ((((uintptr_t)upl) + sizeof(struct upl))
3842                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3843         } else {
3844                 lite_list = (wpl_array_t)
3845                         (((uintptr_t)upl) + sizeof(struct upl));
3846         }
3847         if (object != shadow_object)
3848                 vm_object_lock(object);
3849         vm_object_lock(shadow_object);
3850
3851         shadow_internal = shadow_object->internal;
3852
3853         entry = offset/PAGE_SIZE;
3854         target_offset = (vm_object_offset_t)offset;
3855
3856         while (xfer_size) {
3857                 vm_page_t       t,m;
3858                 upl_page_info_t *p;
3859
3860                 m = VM_PAGE_NULL;
3861
3862                 if (upl->flags & UPL_LITE) {
3863                         int     pg_num;
3864
3865                         pg_num = target_offset/PAGE_SIZE;
3866
3867                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3868                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3869                                 m = vm_page_lookup(shadow_object,
3870                                                    target_offset + (upl->offset -
3871                                                                     shadow_object->paging_offset));
3872                         }
3873                 }
3874                 if (object->pageout) {
3875                         if ((t = vm_page_lookup(object, target_offset)) != NULL) {
3876                                 t->pageout = FALSE;
3877
3878                                 if (delayed_unlock) {
3879                                         delayed_unlock = 0;
3880                                         vm_page_unlock_queues();
3881                                 }
3882                                 VM_PAGE_FREE(t);
3883
3884                                 if (m == NULL) {
3885                                         m = vm_page_lookup(
3886                                             shadow_object,
3887                                             target_offset +
3888                                                 object->shadow_offset);
3889                                 }
3890                                 if (m != VM_PAGE_NULL)
3891                                         vm_object_paging_end(m->object);
3892                         }
3893                 }
3894                 if (m != VM_PAGE_NULL) {
3895
3896                    clear_refmod = 0;
3897
3898                    if (upl->flags & UPL_IO_WIRE) {
3899
3900                         if (delayed_unlock == 0)
3901                                 vm_page_lock_queues();
3902
3903                         vm_page_unwire(m);
3904
3905                         if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3906                                 delayed_unlock = 0;
3907                                 vm_page_unlock_queues();
3908                         }
3909                         if (page_list) {
3910                                 page_list[entry].phys_addr = 0;
3911                         }
3912                         if (flags & UPL_COMMIT_SET_DIRTY) {
3913                                 m->dirty = TRUE;
3914                         } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3915                                 m->dirty = FALSE;
3916                                 clear_refmod |= VM_MEM_MODIFIED;
3917                         }
3918                         if (flags & UPL_COMMIT_INACTIVATE) {
3919                                 m->reference = FALSE;
3920                                 clear_refmod |= VM_MEM_REFERENCED;
3921                                 vm_page_deactivate(m);
3922                         }
3923                         if (clear_refmod)
3924                                 pmap_clear_refmod(m->phys_page, clear_refmod);
3925
3926                         if (flags & UPL_COMMIT_ALLOW_ACCESS) {
3927                                 /*
3928                                  * We blocked access to the pages in this UPL.
3929                                  * Clear the "busy" bit and wake up any waiter
3930                                  * for this page.
3931                                  */
3932                                 PAGE_WAKEUP_DONE(m);
3933                         }
3934
3935                         target_offset += PAGE_SIZE_64;
3936                         xfer_size -= PAGE_SIZE;
3937                         entry++;
3938                         continue;
3939                    }
3940                    if (delayed_unlock == 0)
3941                         vm_page_lock_queues();
3942                    /*
3943                     * make sure to clear the hardware
3944                     * modify or reference bits before
3945                     * releasing the BUSY bit on this page
3946                     * otherwise we risk losing a legitimate
3947                     * change of state
3948                     */
3949                    if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3950                         m->dirty = FALSE;
3951                         clear_refmod |= VM_MEM_MODIFIED;
3952                    }
3953                    if (flags & UPL_COMMIT_INACTIVATE)
3954                         clear_refmod |= VM_MEM_REFERENCED;
3955
3956                    if (clear_refmod)
3957                         pmap_clear_refmod(m->phys_page, clear_refmod);
3958
3959                    if (page_list) {
3960                         p = &(page_list[entry]);
3961                         if(p->phys_addr && p->pageout && !m->pageout) {
3962                                 m->busy = TRUE;
3963                                 m->pageout = TRUE;
3964                                 vm_page_wire(m);
3965                         } else if (page_list[entry].phys_addr &&
3966                                         !p->pageout && m->pageout &&
3967                                         !m->dump_cleaning) {
3968                                 m->pageout = FALSE;
3969                                 m->absent = FALSE;
3970                                 m->overwriting = FALSE;
3971                                 vm_page_unwire(m);
3972                                 PAGE_WAKEUP_DONE(m);
3973                         }
3974                         page_list[entry].phys_addr = 0;
3975                    }
3976                    m->dump_cleaning = FALSE;
3977                    if(m->laundry) {
3978                            vm_pageout_throttle_up(m);
3979                    }
3980                    if(m->pageout) {
3981                       m->cleaning = FALSE;
3982                       m->pageout = FALSE;
3983 #if MACH_CLUSTER_STATS
3984                       if (m->wanted) vm_pageout_target_collisions++;
3985 #endif
3986                       if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
3987                               m->dirty = TRUE;
3988                       else
3989                               m->dirty = FALSE;
3990
3991                       if(m->dirty) {
3992                               vm_page_unwire(m);/* reactivates */
3993
3994                               if (upl->flags & UPL_PAGEOUT) {
3995                                       CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
3996                                       VM_STAT(reactivations++);
3997                               }
3998                               PAGE_WAKEUP_DONE(m);
3999                       } else {
4000                             vm_page_free(m);/* clears busy, etc. */
4001
4002                             if (upl->flags & UPL_PAGEOUT) {
4003                                     CLUSTER_STAT(vm_pageout_target_page_freed++;)
4004
4005                                     if (page_list[entry].dirty)
4006                                             VM_STAT(pageouts++);
4007                             }
4008                       }
4009                       if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4010                             delayed_unlock = 0;
4011                             vm_page_unlock_queues();
4012                       }
4013                       target_offset += PAGE_SIZE_64;
4014                       xfer_size -= PAGE_SIZE;
4015                       entry++;
4016                       continue;
4017                    }
4018 #if MACH_CLUSTER_STATS
4019                    m->dirty = pmap_is_modified(m->phys_page);
4020
4021                    if (m->dirty)   vm_pageout_cluster_dirtied++;
4022                    else            vm_pageout_cluster_cleaned++;
4023                    if (m->wanted)  vm_pageout_cluster_collisions++;
4024 #else
4025                    m->dirty = 0;
4026 #endif
4027
4028                    if((m->busy) && (m->cleaning)) {
4029                         /* the request_page_list case */
4030                         if(m->absent) {
4031                                 m->absent = FALSE;
4032                                 if(shadow_object->absent_count == 1)
4033                                       vm_object_absent_release(shadow_object);
4034                                 else
4035                                       shadow_object->absent_count--;
4036                         }
4037                         m->overwriting = FALSE;
4038                         m->busy = FALSE;
4039                         m->dirty = FALSE;
4040                    } else if (m->overwriting) {
4041                          /* alternate request page list, write to
4042                           * page_list case.  Occurs when the original
4043                           * page was wired at the time of the list
4044                           * request */
4045                          assert(m->wire_count != 0);
4046                          vm_page_unwire(m);/* reactivates */
4047                          m->overwriting = FALSE;
4048                    }
4049                    m->cleaning = FALSE;
4050
4051                    /* It is a part of the semantic of COPYOUT_FROM */
4052                    /* UPLs that a commit implies cache sync           */
4053                    /* between the vm page and the backing store    */
4054                    /* this can be used to strip the precious bit   */
4055                    /* as well as clean */
4056                    if (upl->flags & UPL_PAGE_SYNC_DONE)
4057                          m->precious = FALSE;
4058
4059                    if (flags & UPL_COMMIT_SET_DIRTY)
4060                         m->dirty = TRUE;
4061
4062                    if (flags & UPL_COMMIT_INACTIVATE) {
4063                         m->reference = FALSE;
4064                         vm_page_deactivate(m);
4065                    } else if (!m->active && !m->inactive) {
4066                         if (m->reference)
4067                                 vm_page_activate(m);
4068                         else
4069                                 vm_page_deactivate(m);
4070                    }
4071
4072                    if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4073                            /*
4074                             * We blocked access to the pages in this URL.
4075                             * Clear the "busy" bit on this page before we
4076                             * wake up any waiter.
4077                             */
4078                            m->busy = FALSE;
4079                    }
4080
4081                    /*
4082                     * Wakeup any thread waiting for the page to be un-cleaning.
4083                     */
4084                    PAGE_WAKEUP(m);
4085
4086                    if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4087                          delayed_unlock = 0;
4088                          vm_page_unlock_queues();
4089                    }
4090                 }
4091                 target_offset += PAGE_SIZE_64;
4092                 xfer_size -= PAGE_SIZE;
4093                 entry++;
4094         }
4095         if (delayed_unlock)
4096                 vm_page_unlock_queues();
4097
4098         occupied = 1;
4099
4100         if (upl->flags & UPL_DEVICE_MEMORY)  {
4101                 occupied = 0;
4102         } else if (upl->flags & UPL_LITE) {
4103                 int     pg_num;
4104                 int     i;
4105                 pg_num = upl->size/PAGE_SIZE;
4106                 pg_num = (pg_num + 31) >> 5;
4107                 occupied = 0;
4108                 for(i= 0; i<pg_num; i++) {
4109                         if(lite_list[i] != 0) {
4110                                 occupied = 1;
4111                                 break;
4112                         }
4113                 }
4114         } else {
4115                 if(queue_empty(&upl->map_object->memq)) {
4116                         occupied = 0;
4117                 }
4118         }
4119
4120         if(occupied == 0) {
4121                 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
4122                         *empty = TRUE;
4123                 }
4124                 if(object == shadow_object)
4125                         vm_object_paging_end(shadow_object);
4126         }
4127         vm_object_unlock(shadow_object);
4128         if (object != shadow_object)
4129                 vm_object_unlock(object);
4130         upl_unlock(upl);
4131
4132         return KERN_SUCCESS;
4133 }
4134
4135 kern_return_t
4136 upl_abort_range(
4137         upl_t                   upl,
4138         upl_offset_t            offset,
4139         upl_size_t              size,
4140         int                     error,
4141         boolean_t               *empty)
4142 {
4143         upl_size_t              xfer_size = size;
4144         vm_object_t             shadow_object;
4145         vm_object_t             object = upl->map_object;
4146         vm_object_offset_t      target_offset;
4147         int                     entry;
4148         wpl_array_t             lite_list;
4149         int                     occupied;
4150         boolean_t               shadow_internal;
4151
4152         *empty = FALSE;
4153
4154         if (upl == UPL_NULL)
4155                 return KERN_INVALID_ARGUMENT;
4156
4157         if (upl->flags & UPL_IO_WIRE) {
4158                 return upl_commit_range(upl,
4159                         offset, size, 0,
4160                         NULL, 0, empty);
4161         }
4162
4163         if(object->pageout) {
4164                 shadow_object = object->shadow;
4165         } else {
4166                 shadow_object = object;
4167         }
4168
4169         upl_lock(upl);
4170         if(upl->flags & UPL_DEVICE_MEMORY) {
4171                 xfer_size = 0;
4172         } else if ((offset + size) > upl->size) {
4173                 upl_unlock(upl);
4174                 return KERN_FAILURE;
4175         }
4176         if (object != shadow_object)
4177                 vm_object_lock(object);
4178         vm_object_lock(shadow_object);
4179
4180         shadow_internal = shadow_object->internal;
4181
4182         if(upl->flags & UPL_INTERNAL) {
4183                 lite_list = (wpl_array_t)
4184                         ((((uintptr_t)upl) + sizeof(struct upl))
4185                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4186         } else {
4187                 lite_list = (wpl_array_t)
4188                         (((uintptr_t)upl) + sizeof(struct upl));
4189         }
4190
4191         entry = offset/PAGE_SIZE;
4192         target_offset = (vm_object_offset_t)offset;
4193         while(xfer_size) {
4194                 vm_page_t       t,m;
4195
4196                 m = VM_PAGE_NULL;
4197                 if(upl->flags & UPL_LITE) {
4198                         int     pg_num;
4199                         pg_num = target_offset/PAGE_SIZE;
4200                         if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4201                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4202                                 m = vm_page_lookup(shadow_object,
4203                                         target_offset + (upl->offset -
4204                                                 shadow_object->paging_offset));
4205                         }
4206                 }
4207                 if(object->pageout) {
4208                         if ((t = vm_page_lookup(object, target_offset))
4209                                                                 != NULL) {
4210                                 t->pageout = FALSE;
4211                                 VM_PAGE_FREE(t);
4212                                 if(m == NULL) {
4213                                         m = vm_page_lookup(
4214                                             shadow_object,
4215                                             target_offset +
4216                                                 object->shadow_offset);
4217                                 }
4218                                 if(m != VM_PAGE_NULL)
4219                                         vm_object_paging_end(m->object);
4220                         }
4221                 }
4222                 if(m != VM_PAGE_NULL) {
4223                         vm_page_lock_queues();
4224                         if(m->absent) {
4225                                 boolean_t must_free = TRUE;
4226
4227                                 /* COPYOUT = FALSE case */
4228                                 /* check for error conditions which must */
4229                                 /* be passed back to the pages customer  */
4230                                 if(error & UPL_ABORT_RESTART) {
4231                                         m->restart = TRUE;
4232                                         m->absent = FALSE;
4233                                         vm_object_absent_release(m->object);
4234                                         m->page_error = KERN_MEMORY_ERROR;
4235                                         m->error = TRUE;
4236                                         must_free = FALSE;
4237                                 } else if(error & UPL_ABORT_UNAVAILABLE) {
4238                                         m->restart = FALSE;
4239                                         m->unusual = TRUE;
4240                                         must_free = FALSE;
4241                                 } else if(error & UPL_ABORT_ERROR) {
4242                                         m->restart = FALSE;
4243                                         m->absent = FALSE;
4244                                         vm_object_absent_release(m->object);
4245                                         m->page_error = KERN_MEMORY_ERROR;
4246                                         m->error = TRUE;
4247                                         must_free = FALSE;
4248                                 }
4249
4250                                 /*
4251                                  * ENCRYPTED SWAP:
4252                                  * If the page was already encrypted,
4253                                  * we don't really need to decrypt it
4254                                  * now.  It will get decrypted later,
4255                                  * on demand, as soon as someone needs
4256                                  * to access its contents.
4257                                  */
4258
4259                                 m->cleaning = FALSE;
4260                                 m->overwriting = FALSE;
4261                                 PAGE_WAKEUP_DONE(m);
4262
4263                                 if (must_free == TRUE) {
4264                                         vm_page_free(m);
4265                                 } else {
4266                                         vm_page_activate(m);
4267                                 }
4268                                 vm_page_unlock_queues();
4269
4270                                 target_offset += PAGE_SIZE_64;
4271                                 xfer_size -= PAGE_SIZE;
4272                                 entry++;
4273                                 continue;
4274                         }
4275                         /*
4276                         * Handle the trusted pager throttle.
4277                         */
4278                         if (m->laundry) {
4279                                 vm_pageout_throttle_up(m);
4280                         }
4281                         if(m->pageout) {
4282                                 assert(m->busy);
4283                                 assert(m->wire_count == 1);
4284                                 m->pageout = FALSE;
4285                                 vm_page_unwire(m);
4286                         }
4287                         m->dump_cleaning = FALSE;
4288                         m->cleaning = FALSE;
4289                         m->overwriting = FALSE;
4290 #if     MACH_PAGEMAP
4291                         vm_external_state_clr(
4292                                 m->object->existence_map, m->offset);
4293 #endif  /* MACH_PAGEMAP */
4294                         if(error & UPL_ABORT_DUMP_PAGES) {
4295                                 vm_page_free(m);
4296                                 pmap_disconnect(m->phys_page);
4297                         } else {
4298                                 PAGE_WAKEUP_DONE(m);
4299                         }
4300                         vm_page_unlock_queues();
4301                 }
4302                 target_offset += PAGE_SIZE_64;
4303                 xfer_size -= PAGE_SIZE;
4304                 entry++;
4305         }
4306         occupied = 1;
4307         if (upl->flags & UPL_DEVICE_MEMORY)  {
4308                 occupied = 0;
4309         } else if (upl->flags & UPL_LITE) {
4310                 int     pg_num;
4311                 int     i;
4312                 pg_num = upl->size/PAGE_SIZE;
4313                 pg_num = (pg_num + 31) >> 5;
4314                 occupied = 0;
4315                 for(i= 0; i<pg_num; i++) {
4316                         if(lite_list[i] != 0) {
4317                                 occupied = 1;
4318                                 break;
4319                         }
4320                 }
4321         } else {
4322                 if(queue_empty(&upl->map_object->memq)) {
4323                         occupied = 0;
4324                 }
4325         }
4326
4327         if(occupied == 0) {
4328                 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
4329                         *empty = TRUE;
4330                 }
4331                 if(object == shadow_object)
4332                         vm_object_paging_end(shadow_object);
4333         }
4334         vm_object_unlock(shadow_object);
4335         if (object != shadow_object)
4336                 vm_object_unlock(object);
4337
4338         upl_unlock(upl);
4339
4340         return KERN_SUCCESS;
4341 }
4342
4343 kern_return_t
4344 upl_abort(
4345         upl_t   upl,
4346         int     error)
4347 {
4348         vm_object_t             object = NULL;
4349         vm_object_t             shadow_object = NULL;
4350         vm_object_offset_t      offset;
4351         vm_object_offset_t      shadow_offset;
4352         vm_object_offset_t      target_offset;
4353         upl_size_t              i;
4354         wpl_array_t             lite_list;
4355         vm_page_t               t,m;
4356         int                     occupied;
4357         boolean_t               shadow_internal;
4358
4359         if (upl == UPL_NULL)
4360                 return KERN_INVALID_ARGUMENT;
4361
4362         if (upl->flags & UPL_IO_WIRE) {
4363                 boolean_t       empty;
4364                 return upl_commit_range(upl,
4365                         0, upl->size, 0,
4366                         NULL, 0, &empty);
4367         }
4368
4369         upl_lock(upl);
4370         if(upl->flags & UPL_DEVICE_MEMORY) {
4371                 upl_unlock(upl);
4372                 return KERN_SUCCESS;
4373         }
4374
4375         object = upl->map_object;
4376
4377         if (object == NULL) {
4378                 panic("upl_abort: upl object is not backed by an object");
4379                 upl_unlock(upl);
4380                 return KERN_INVALID_ARGUMENT;
4381         }
4382
4383         if(object->pageout) {
4384                 shadow_object = object->shadow;
4385                 shadow_offset = object->shadow_offset;
4386         } else {
4387                 shadow_object = object;
4388                 shadow_offset = upl->offset - object->paging_offset;
4389         }
4390
4391         if(upl->flags & UPL_INTERNAL) {
4392                 lite_list = (wpl_array_t)
4393                         ((((uintptr_t)upl) + sizeof(struct upl))
4394                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4395         } else {
4396                 lite_list = (wpl_array_t)
4397                         (((uintptr_t)upl) + sizeof(struct upl));
4398         }
4399         offset = 0;
4400
4401         if (object != shadow_object)
4402                 vm_object_lock(object);
4403         vm_object_lock(shadow_object);
4404
4405         shadow_internal = shadow_object->internal;
4406
4407         for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
4408                 m = VM_PAGE_NULL;
4409                 target_offset = offset + shadow_offset;
4410                 if(upl->flags & UPL_LITE) {
4411                         int     pg_num;
4412                         pg_num = offset/PAGE_SIZE;
4413                         if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4414                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4415                                 m = vm_page_lookup(
4416                                         shadow_object, target_offset);
4417                         }
4418                 }
4419                 if(object->pageout) {
4420                         if ((t = vm_page_lookup(object, offset)) != NULL) {
4421                                 t->pageout = FALSE;
4422                                 VM_PAGE_FREE(t);
4423                                 if(m == NULL) {
4424                                         m = vm_page_lookup(
4425                                             shadow_object, target_offset);
4426                                 }
4427                                 if(m != VM_PAGE_NULL)
4428                                         vm_object_paging_end(m->object);
4429                         }
4430                 }
4431                 if(m != VM_PAGE_NULL) {
4432                         vm_page_lock_queues();
4433                         if(m->absent) {
4434                                 boolean_t must_free = TRUE;
4435
4436                                 /* COPYOUT = FALSE case */
4437                                 /* check for error conditions which must */
4438                                 /* be passed back to the pages customer  */
4439                                 if(error & UPL_ABORT_RESTART) {
4440                                         m->restart = TRUE;
4441                                         m->absent = FALSE;
4442                                         vm_object_absent_release(m->object);
4443                                         m->page_error = KERN_MEMORY_ERROR;
4444                                         m->error = TRUE;
4445                                         must_free = FALSE;
4446                                 } else if(error & UPL_ABORT_UNAVAILABLE) {
4447                                         m->restart = FALSE;
4448                                         m->unusual = TRUE;
4449                                         must_free = FALSE;
4450                                 } else if(error & UPL_ABORT_ERROR) {
4451                                         m->restart = FALSE;
4452                                         m->absent = FALSE;
4453                                         vm_object_absent_release(m->object);
4454                                         m->page_error = KERN_MEMORY_ERROR;
4455                                         m->error = TRUE;
4456                                         must_free = FALSE;
4457                                 }
4458
4459                                 /*
4460                                  * ENCRYPTED SWAP:
4461                                  * If the page was already encrypted,
4462                                  * we don't really need to decrypt it
4463                                  * now.  It will get decrypted later,
4464                                  * on demand, as soon as someone needs
4465                                  * to access its contents.
4466                                  */
4467
4468                                 m->cleaning = FALSE;
4469                                 m->overwriting = FALSE;
4470                                 PAGE_WAKEUP_DONE(m);
4471
4472                                 if (must_free == TRUE) {
4473                                         vm_page_free(m);
4474                                 } else {
4475                                         vm_page_activate(m);
4476                                 }
4477                                 vm_page_unlock_queues();
4478                                 continue;
4479                         }
4480                         /*
4481                          * Handle the trusted pager throttle.
4482                          */
4483                         if (m->laundry) {
4484                                 vm_pageout_throttle_up(m);
4485                         }
4486                         if(m->pageout) {
4487                                 assert(m->busy);
4488                                 assert(m->wire_count == 1);
4489                                 m->pageout = FALSE;
4490                                 vm_page_unwire(m);
4491                         }
4492                         m->dump_cleaning = FALSE;
4493                         m->cleaning = FALSE;
4494                         m->overwriting = FALSE;
4495 #if     MACH_PAGEMAP
4496                         vm_external_state_clr(
4497                                 m->object->existence_map, m->offset);
4498 #endif  /* MACH_PAGEMAP */
4499                         if(error & UPL_ABORT_DUMP_PAGES) {
4500                                 vm_page_free(m);
4501                                 pmap_disconnect(m->phys_page);
4502                         } else {
4503                                 PAGE_WAKEUP_DONE(m);
4504                         }
4505                         vm_page_unlock_queues();
4506                 }
4507         }
4508         occupied = 1;
4509         if (upl->flags & UPL_DEVICE_MEMORY)  {
4510                 occupied = 0;
4511         } else if (upl->flags & UPL_LITE) {
4512                 int     pg_num;
4513                 int     j;
4514                 pg_num = upl->size/PAGE_SIZE;
4515                 pg_num = (pg_num + 31) >> 5;
4516                 occupied = 0;
4517                 for(j= 0; j<pg_num; j++) {
4518                         if(lite_list[j] != 0) {
4519                                 occupied = 1;
4520                                 break;
4521                         }
4522                 }
4523         } else {
4524                 if(queue_empty(&upl->map_object->memq)) {
4525                         occupied = 0;
4526                 }
4527         }
4528
4529         if(occupied == 0) {
4530                 if(object == shadow_object)
4531                         vm_object_paging_end(shadow_object);
4532         }
4533         vm_object_unlock(shadow_object);
4534         if (object != shadow_object)
4535                 vm_object_unlock(object);
4536
4537         upl_unlock(upl);
4538         return KERN_SUCCESS;
4539 }
4540
4541 /* an option on commit should be wire */
4542 kern_return_t
4543 upl_commit(
4544         upl_t                   upl,
4545         upl_page_info_t         *page_list,
4546         mach_msg_type_number_t  count)
4547 {
4548         if (upl == UPL_NULL)
4549                 return KERN_INVALID_ARGUMENT;
4550
4551         if(upl->flags & (UPL_LITE | UPL_IO_WIRE)) {
4552                 boolean_t       empty;
4553                 return upl_commit_range(upl, 0, upl->size, 0,
4554                                         page_list, count, &empty);
4555         }
4556
4557         if (count == 0)
4558                 page_list = NULL;
4559
4560         upl_lock(upl);
4561         if (upl->flags & UPL_DEVICE_MEMORY)
4562                 page_list = NULL;
4563
4564         if (upl->flags & UPL_ENCRYPTED) {
4565                 /*
4566                  * ENCRYPTED SWAP:
4567                  * This UPL was encrypted, but we don't need
4568                  * to decrypt here.  We'll decrypt each page
4569                  * later, on demand, as soon as someone needs
4570                  * to access the page's contents.
4571                  */
4572         }
4573
4574         if ((upl->flags & UPL_CLEAR_DIRTY) ||
4575                 (upl->flags & UPL_PAGE_SYNC_DONE) || page_list) {
4576                 vm_object_t     shadow_object = upl->map_object->shadow;
4577                 vm_object_t     object = upl->map_object;
4578                 vm_object_offset_t target_offset;
4579                 upl_size_t      xfer_end;
4580                 int             entry;
4581
4582                 vm_page_t       t, m;
4583                 upl_page_info_t *p;
4584
4585                 if (object != shadow_object)
4586                         vm_object_lock(object);
4587                 vm_object_lock(shadow_object);
4588
4589                 entry = 0;
4590                 target_offset = object->shadow_offset;
4591                 xfer_end = upl->size + object->shadow_offset;
4592
4593                 while(target_offset < xfer_end) {
4594
4595                         if ((t = vm_page_lookup(object,
4596                                 target_offset - object->shadow_offset))
4597                                 == NULL) {
4598                                 target_offset += PAGE_SIZE_64;
4599                                 entry++;
4600                                 continue;
4601                         }
4602
4603                         m = vm_page_lookup(shadow_object, target_offset);
4604                         if(m != VM_PAGE_NULL) {
4605                             /*
4606                              * ENCRYPTED SWAP:
4607                              * If this page was encrypted, we
4608                              * don't need to decrypt it here.
4609                              * We'll decrypt it later, on demand,
4610                              * as soon as someone needs to access
4611                              * its contents.
4612                              */
4613
4614                             if (upl->flags & UPL_CLEAR_DIRTY) {
4615                                 pmap_clear_modify(m->phys_page);
4616                                 m->dirty = FALSE;
4617                             }
4618                             /* It is a part of the semantic of */
4619                             /* COPYOUT_FROM UPLs that a commit */
4620                             /* implies cache sync between the  */
4621                             /* vm page and the backing store   */
4622                             /* this can be used to strip the   */
4623                             /* precious bit as well as clean   */
4624                             if (upl->flags & UPL_PAGE_SYNC_DONE)
4625                                 m->precious = FALSE;
4626
4627                            if(page_list) {
4628                                 p = &(page_list[entry]);
4629                                 if(page_list[entry].phys_addr &&
4630                                                 p->pageout && !m->pageout) {
4631                                         vm_page_lock_queues();
4632                                         m->busy = TRUE;
4633                                         m->pageout = TRUE;
4634                                         vm_page_wire(m);
4635                                         vm_page_unlock_queues();
4636                                 } else if (page_list[entry].phys_addr &&
4637                                                 !p->pageout && m->pageout &&
4638                                                 !m->dump_cleaning) {
4639                                         vm_page_lock_queues();
4640                                         m->pageout = FALSE;
4641                                         m->absent = FALSE;
4642                                         m->overwriting = FALSE;
4643                                         vm_page_unwire(m);
4644                                         PAGE_WAKEUP_DONE(m);
4645                                         vm_page_unlock_queues();
4646                                 }
4647                                 page_list[entry].phys_addr = 0;
4648                            }
4649                         }
4650                         target_offset += PAGE_SIZE_64;
4651                         entry++;
4652                 }
4653                 vm_object_unlock(shadow_object);
4654                 if (object != shadow_object)
4655                         vm_object_unlock(object);
4656
4657         }
4658         if (upl->flags & UPL_DEVICE_MEMORY)  {
4659                 vm_object_lock(upl->map_object->shadow);
4660                 if(upl->map_object == upl->map_object->shadow)
4661                         vm_object_paging_end(upl->map_object->shadow);
4662                 vm_object_unlock(upl->map_object->shadow);
4663         }
4664         upl_unlock(upl);
4665         return KERN_SUCCESS;
4666 }
4667
4668
4669
4670 kern_return_t
4671 vm_object_iopl_request(
4672         vm_object_t             object,
4673         vm_object_offset_t      offset,
4674         upl_size_t              size,
4675         upl_t                   *upl_ptr,
4676         upl_page_info_array_t   user_page_list,
4677         unsigned int            *page_list_count,
4678         int                     cntrl_flags)
4679 {
4680         vm_page_t               dst_page;
4681         vm_object_offset_t      dst_offset = offset;
4682         upl_size_t              xfer_size = size;
4683         upl_t                   upl = NULL;
4684         unsigned int            entry;
4685         wpl_array_t             lite_list = NULL;
4686         int                     page_field_size;
4687         int                     delayed_unlock = 0;
4688         int                     no_zero_fill = FALSE;
4689         vm_page_t               alias_page = NULL;
4690         kern_return_t           ret;
4691         vm_prot_t               prot;
4692
4693
4694         if (cntrl_flags & ~UPL_VALID_FLAGS) {
4695                 /*
4696                  * For forward compatibility's sake,
4697                  * reject any unknown flag.
4698                  */
4699                 return KERN_INVALID_VALUE;
4700         }
4701
4702         if (cntrl_flags & UPL_ENCRYPT) {
4703                 /*
4704                  * ENCRYPTED SWAP:
4705                  * The paging path doesn't use this interface,
4706                  * so we don't support the UPL_ENCRYPT flag
4707                  * here.  We won't encrypt the pages.
4708                  */
4709                 assert(! (cntrl_flags & UPL_ENCRYPT));
4710         }
4711
4712         if (cntrl_flags & UPL_NOZEROFILL)
4713                 no_zero_fill = TRUE;
4714
4715         if (cntrl_flags & UPL_COPYOUT_FROM)
4716                 prot = VM_PROT_READ;
4717         else
4718                 prot = VM_PROT_READ | VM_PROT_WRITE;
4719
4720         if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
4721                 size = MAX_UPL_TRANSFER * page_size;
4722         }
4723
4724         if(cntrl_flags & UPL_SET_INTERNAL)
4725                 if(page_list_count != NULL)
4726                         *page_list_count = MAX_UPL_TRANSFER;
4727         if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4728            ((page_list_count != NULL) && (*page_list_count != 0)
4729                                 && *page_list_count < (size/page_size)))
4730                 return KERN_INVALID_ARGUMENT;
4731
4732         if((!object->internal) && (object->paging_offset != 0))
4733                 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
4734
4735         if(object->phys_contiguous) {
4736                 /* No paging operations are possible against this memory */
4737                 /* and so no need for map object, ever */
4738                 cntrl_flags |= UPL_SET_LITE;
4739         }
4740
4741         if(upl_ptr) {
4742                 if(cntrl_flags & UPL_SET_INTERNAL) {
4743                         if(cntrl_flags & UPL_SET_LITE) {
4744                                 upl = upl_create(
4745                                         UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
4746                                         size);
4747                                 user_page_list = (upl_page_info_t *)
4748                                    (((uintptr_t)upl) + sizeof(struct upl));
4749                                 lite_list = (wpl_array_t)
4750                                         (((uintptr_t)user_page_list) +
4751                                         ((size/PAGE_SIZE) *
4752                                                 sizeof(upl_page_info_t)));
4753                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4754                                 page_field_size =
4755                                         (page_field_size + 3) & 0xFFFFFFFC;
4756                                 bzero((char *)lite_list, page_field_size);
4757                                 upl->flags =
4758                                         UPL_LITE | UPL_INTERNAL | UPL_IO_WIRE;
4759                         } else {
4760                                 upl = upl_create(UPL_CREATE_INTERNAL, size);
4761                                 user_page_list = (upl_page_info_t *)
4762                                         (((uintptr_t)upl)
4763                                                 + sizeof(struct upl));
4764                                 upl->flags = UPL_INTERNAL | UPL_IO_WIRE;
4765                         }
4766                 } else {
4767                         if(cntrl_flags & UPL_SET_LITE) {
4768                                 upl = upl_create(UPL_CREATE_LITE, size);
4769                                 lite_list = (wpl_array_t)
4770                                    (((uintptr_t)upl) + sizeof(struct upl));
4771                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4772                                 page_field_size =
4773                                         (page_field_size + 3) & 0xFFFFFFFC;
4774                                 bzero((char *)lite_list, page_field_size);
4775                                 upl->flags = UPL_LITE | UPL_IO_WIRE;
4776                         } else {
4777                                 upl = upl_create(UPL_CREATE_EXTERNAL, size);
4778                                 upl->flags = UPL_IO_WIRE;
4779                         }
4780                 }
4781
4782                 if(object->phys_contiguous) {
4783                         upl->map_object = object;
4784                         /* don't need any shadow mappings for this one */
4785                         /* since it is already I/O memory */
4786                         upl->flags |= UPL_DEVICE_MEMORY;
4787
4788                         vm_object_lock(object);
4789                         vm_object_paging_begin(object);
4790                         vm_object_unlock(object);
4791
4792                         /* paging in progress also protects the paging_offset */
4793                         upl->offset = offset + object->paging_offset;
4794                         upl->size = size;
4795                         *upl_ptr = upl;
4796                         if(user_page_list) {
4797                                 user_page_list[0].phys_addr =
4798                                   (offset + object->shadow_offset)>>PAGE_SHIFT;
4799                                 user_page_list[0].device = TRUE;
4800                         }
4801
4802                         if(page_list_count != NULL) {
4803                                 if (upl->flags & UPL_INTERNAL) {
4804                                         *page_list_count = 0;
4805                                 } else {
4806                                         *page_list_count = 1;
4807                                 }
4808                         }
4809                         return KERN_SUCCESS;
4810                 }
4811                 if(user_page_list)
4812                         user_page_list[0].device = FALSE;
4813
4814                 if(cntrl_flags & UPL_SET_LITE) {
4815                         upl->map_object = object;
4816                 } else {
4817                         upl->map_object = vm_object_allocate(size);
4818                         vm_object_lock(upl->map_object);
4819                         upl->map_object->shadow = object;
4820                         upl->map_object->pageout = TRUE;
4821                         upl->map_object->can_persist = FALSE;
4822                         upl->map_object->copy_strategy =
4823                                         MEMORY_OBJECT_COPY_NONE;
4824                         upl->map_object->shadow_offset = offset;
4825                         upl->map_object->wimg_bits = object->wimg_bits;
4826                         vm_object_unlock(upl->map_object);
4827                 }
4828         }
4829         vm_object_lock(object);
4830         vm_object_paging_begin(object);
4831
4832         if (!object->phys_contiguous) {
4833                 /* Protect user space from future COW operations */
4834                 object->true_share = TRUE;
4835                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4836                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4837         }
4838
4839         /* we can lock the upl offset now that paging_in_progress is set */
4840         if(upl_ptr) {
4841                 upl->size = size;
4842                 upl->offset = offset + object->paging_offset;
4843                 *upl_ptr = upl;
4844 #ifdef UPL_DEBUG
4845                 queue_enter(&object->uplq, upl, upl_t, uplq);
4846 #endif /* UPL_DEBUG */
4847         }
4848
4849         if (cntrl_flags & UPL_BLOCK_ACCESS) {
4850                 /*
4851                  * The user requested that access to the pages in this URL
4852                  * be blocked until the UPL is commited or aborted.
4853                  */
4854                 upl->flags |= UPL_ACCESS_BLOCKED;
4855         }
4856
4857         entry = 0;
4858         while (xfer_size) {
4859                 if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
4860                         if (delayed_unlock) {
4861                                 delayed_unlock = 0;
4862                                 vm_page_unlock_queues();
4863                         }
4864                         vm_object_unlock(object);
4865                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
4866                         vm_object_lock(object);
4867                 }
4868                 dst_page = vm_page_lookup(object, dst_offset);
4869
4870                 /*
4871                  * ENCRYPTED SWAP:
4872                  * If the page is encrypted, we need to decrypt it,
4873                  * so force a soft page fault.
4874                  */
4875                 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
4876                     (dst_page->encrypted) ||
4877                     (dst_page->unusual && (dst_page->error ||
4878                                            dst_page->restart ||
4879                                            dst_page->absent ||
4880                                            dst_page->fictitious ||
4881                                            (prot & dst_page->page_lock)))) {
4882                         vm_fault_return_t       result;
4883                    do {
4884                         vm_page_t       top_page;
4885                         kern_return_t   error_code;
4886                         int             interruptible;
4887
4888                         vm_object_offset_t      lo_offset = offset;
4889                         vm_object_offset_t      hi_offset = offset + size;
4890
4891
4892                         if (delayed_unlock) {
4893                                 delayed_unlock = 0;
4894                                 vm_page_unlock_queues();
4895                         }
4896
4897                         if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
4898                                 interruptible = THREAD_ABORTSAFE;
4899                         } else {
4900                                 interruptible = THREAD_UNINT;
4901                         }
4902
4903                         result = vm_fault_page(object, dst_offset,
4904                                 prot | VM_PROT_WRITE, FALSE,
4905                                 interruptible,
4906                                 lo_offset, hi_offset,
4907                                 VM_BEHAVIOR_SEQUENTIAL,
4908                                 &prot, &dst_page, &top_page,
4909                                 (int *)0,
4910                                 &error_code, no_zero_fill, FALSE, NULL, 0);
4911
4912                         switch(result) {
4913                         case VM_FAULT_SUCCESS:
4914
4915                                 PAGE_WAKEUP_DONE(dst_page);
4916
4917                                 /*
4918                                  *      Release paging references and
4919                                  *      top-level placeholder page, if any.
4920                                  */
4921
4922                                 if(top_page != VM_PAGE_NULL) {
4923                                         vm_object_t local_object;
4924                                         local_object =
4925                                                 top_page->object;
4926                                         if(top_page->object
4927                                                 != dst_page->object) {
4928                                                 vm_object_lock(
4929                                                         local_object);
4930                                                 VM_PAGE_FREE(top_page);
4931                                                 vm_object_paging_end(
4932                                                         local_object);
4933                                                 vm_object_unlock(
4934                                                         local_object);
4935                                         } else {
4936                                                 VM_PAGE_FREE(top_page);
4937                                                 vm_object_paging_end(
4938                                                         local_object);
4939                                         }
4940                                 }
4941
4942                                 break;
4943
4944
4945                         case VM_FAULT_RETRY:
4946                                 vm_object_lock(object);
4947                                 vm_object_paging_begin(object);
4948                                 break;
4949
4950                         case VM_FAULT_FICTITIOUS_SHORTAGE:
4951                                 vm_page_more_fictitious();
4952                                 vm_object_lock(object);
4953                                 vm_object_paging_begin(object);
4954                                 break;
4955
4956                         case VM_FAULT_MEMORY_SHORTAGE:
4957                                 if (vm_page_wait(interruptible)) {
4958                                         vm_object_lock(object);
4959                                         vm_object_paging_begin(object);
4960                                         break;
4961                                 }
4962                                 /* fall thru */
4963
4964                         case VM_FAULT_INTERRUPTED:
4965                                 error_code = MACH_SEND_INTERRUPTED;
4966                         case VM_FAULT_MEMORY_ERROR:
4967                                 ret = (error_code ? error_code:
4968                                         KERN_MEMORY_ERROR);
4969                                 vm_object_lock(object);
4970                                 for(; offset < dst_offset;
4971                                                 offset += PAGE_SIZE) {
4972                                    dst_page = vm_page_lookup(
4973                                                 object, offset);
4974                                    if(dst_page == VM_PAGE_NULL)
4975                                         panic("vm_object_iopl_request: Wired pages missing. \n");
4976                                    vm_page_lock_queues();
4977                                    vm_page_unwire(dst_page);
4978                                    vm_page_unlock_queues();
4979                                    VM_STAT(reactivations++);
4980                                 }
4981                                 vm_object_unlock(object);
4982                                 upl_destroy(upl);
4983                                 return ret;
4984                         }
4985                    } while ((result != VM_FAULT_SUCCESS)
4986                                 || (result == VM_FAULT_INTERRUPTED));
4987                 }
4988                 if (delayed_unlock == 0)
4989                         vm_page_lock_queues();
4990                 vm_page_wire(dst_page);
4991
4992                 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4993                         /*
4994                          * Mark the page "busy" to block any future page fault
4995                          * on this page.  We'll also remove the mapping
4996                          * of all these pages before leaving this routine.
4997                          */
4998                         assert(!dst_page->fictitious);
4999                         dst_page->busy = TRUE;
5000                 }
5001
5002                 if (upl_ptr) {
5003                         if (cntrl_flags & UPL_SET_LITE) {
5004                                 int     pg_num;
5005                                 pg_num = (dst_offset-offset)/PAGE_SIZE;
5006                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5007                         } else {
5008                                 /*
5009                                  * Convert the fictitious page to a
5010                                  * private shadow of the real page.
5011                                  */
5012                                 assert(alias_page->fictitious);
5013                                 alias_page->fictitious = FALSE;
5014                                 alias_page->private = TRUE;
5015                                 alias_page->pageout = TRUE;
5016                                 alias_page->phys_page = dst_page->phys_page;
5017                                 vm_page_wire(alias_page);
5018
5019                                 vm_page_insert(alias_page,
5020                                         upl->map_object, size - xfer_size);
5021                                 assert(!alias_page->wanted);
5022                                 alias_page->busy = FALSE;
5023                                 alias_page->absent = FALSE;
5024                         }
5025
5026                         /* expect the page to be used */
5027                         dst_page->reference = TRUE;
5028
5029                         if (!(cntrl_flags & UPL_COPYOUT_FROM))
5030                                 dst_page->dirty = TRUE;
5031                         alias_page = NULL;
5032
5033                         if (user_page_list) {
5034                                 user_page_list[entry].phys_addr
5035                                         = dst_page->phys_page;
5036                                 user_page_list[entry].dirty =
5037                                                 dst_page->dirty;
5038                                 user_page_list[entry].pageout =
5039                                                 dst_page->pageout;
5040                                 user_page_list[entry].absent =
5041                                                 dst_page->absent;
5042                                 user_page_list[entry].precious =
5043                                                 dst_page->precious;
5044                         }
5045                 }
5046                 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
5047                         delayed_unlock = 0;
5048                         vm_page_unlock_queues();
5049                 }
5050                 entry++;
5051                 dst_offset += PAGE_SIZE_64;
5052                 xfer_size -= PAGE_SIZE;
5053         }
5054         if (delayed_unlock)
5055                 vm_page_unlock_queues();
5056
5057         if (upl->flags & UPL_INTERNAL) {
5058                 if(page_list_count != NULL)
5059                         *page_list_count = 0;
5060         } else if (*page_list_count > entry) {
5061                 if(page_list_count != NULL)
5062                         *page_list_count = entry;
5063         }
5064
5065         if (alias_page != NULL) {
5066                 vm_page_lock_queues();
5067                 vm_page_free(alias_page);
5068                 vm_page_unlock_queues();
5069         }
5070
5071         vm_object_unlock(object);
5072
5073         if (cntrl_flags & UPL_BLOCK_ACCESS) {
5074                 /*
5075                  * We've marked all the pages "busy" so that future
5076                  * page faults will block.
5077                  * Now remove the mapping for these pages, so that they
5078                  * can't be accessed without causing a page fault.
5079                  */
5080                 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5081                                        PMAP_NULL, 0, VM_PROT_NONE);
5082         }
5083
5084         return KERN_SUCCESS;
5085 }
5086
5087 kern_return_t
5088 upl_transpose(
5089         upl_t           upl1,
5090         upl_t           upl2)
5091 {
5092         kern_return_t           retval;
5093         boolean_t               upls_locked;
5094         vm_object_t             object1, object2;
5095
5096         if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) {
5097                 return KERN_INVALID_ARGUMENT;
5098         }
5099
5100         upls_locked = FALSE;
5101
5102         /*
5103          * Since we need to lock both UPLs at the same time,
5104          * avoid deadlocks by always taking locks in the same order.
5105          */
5106         if (upl1 < upl2) {
5107                 upl_lock(upl1);
5108                 upl_lock(upl2);
5109         } else {
5110                 upl_lock(upl2);
5111                 upl_lock(upl1);
5112         }
5113         upls_locked = TRUE;     /* the UPLs will need to be unlocked */
5114
5115         object1 = upl1->map_object;
5116         object2 = upl2->map_object;
5117
5118         if (upl1->offset != 0 || upl2->offset != 0 ||
5119             upl1->size != upl2->size) {
5120                 /*
5121                  * We deal only with full objects, not subsets.
5122                  * That's because we exchange the entire backing store info
5123                  * for the objects: pager, resident pages, etc...  We can't do
5124                  * only part of it.
5125                  */
5126                 retval = KERN_INVALID_VALUE;
5127                 goto done;
5128         }
5129
5130         /*
5131          * Tranpose the VM objects' backing store.
5132          */
5133         retval = vm_object_transpose(object1, object2,
5134                                      (vm_object_size_t) upl1->size);
5135
5136         if (retval == KERN_SUCCESS) {
5137                 /*
5138                  * Make each UPL point to the correct VM object, i.e. the
5139                  * object holding the pages that the UPL refers to...
5140                  */
5141                 upl1->map_object = object2;
5142                 upl2->map_object = object1;
5143         }
5144
5145 done:
5146         /*
5147          * Cleanup.
5148          */
5149         if (upls_locked) {
5150                 upl_unlock(upl1);
5151                 upl_unlock(upl2);
5152                 upls_locked = FALSE;
5153         }
5154
5155         return retval;
5156 }
5157
5158 /*
5159  * ENCRYPTED SWAP:
5160  *
5161  * Rationale:  the user might have some encrypted data on disk (via
5162  * FileVault or any other mechanism).  That data is then decrypted in
5163  * memory, which is safe as long as the machine is secure.  But that
5164  * decrypted data in memory could be paged out to disk by the default
5165  * pager.  The data would then be stored on disk in clear (not encrypted)
5166  * and it could be accessed by anyone who gets physical access to the
5167  * disk (if the laptop or the disk gets stolen for example).  This weakens
5168  * the security offered by FileVault.
5169  *
5170  * Solution:  the default pager will optionally request that all the
5171  * pages it gathers for pageout be encrypted, via the UPL interfaces,
5172  * before it sends this UPL to disk via the vnode_pageout() path.
5173  *
5174  * Notes:
5175  *
5176  * To avoid disrupting the VM LRU algorithms, we want to keep the
5177  * clean-in-place mechanisms, which allow us to send some extra pages to
5178  * swap (clustering) without actually removing them from the user's
5179  * address space.  We don't want the user to unknowingly access encrypted
5180  * data, so we have to actually remove the encrypted pages from the page
5181  * table.  When the user accesses the data, the hardware will fail to
5182  * locate the virtual page in its page table and will trigger a page
5183  * fault.  We can then decrypt the page and enter it in the page table
5184  * again.  Whenever we allow the user to access the contents of a page,
5185  * we have to make sure it's not encrypted.
5186  *
5187  *
5188  */
5189 /*
5190  * ENCRYPTED SWAP:
5191  * Reserve of virtual addresses in the kernel address space.
5192  * We need to map the physical pages in the kernel, so that we
5193  * can call the encryption/decryption routines with a kernel
5194  * virtual address.  We keep this pool of pre-allocated kernel
5195  * virtual addresses so that we don't have to scan the kernel's
5196  * virtaul address space each time we need to encrypt or decrypt
5197  * a physical page.
5198  * It would be nice to be able to encrypt and decrypt in physical
5199  * mode but that might not always be more efficient...
5200  */
5201 decl_simple_lock_data(,vm_paging_lock)
5202 #define VM_PAGING_NUM_PAGES     64
5203 vm_map_offset_t vm_paging_base_address = 0;
5204 boolean_t       vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
5205 int             vm_paging_max_index = 0;
5206 unsigned long   vm_paging_no_kernel_page = 0;
5207 unsigned long   vm_paging_objects_mapped = 0;
5208 unsigned long   vm_paging_pages_mapped = 0;
5209 unsigned long   vm_paging_objects_mapped_slow = 0;
5210 unsigned long   vm_paging_pages_mapped_slow = 0;
5211
5212 /*
5213  * ENCRYPTED SWAP:
5214  * vm_paging_map_object:
5215  *      Maps part of a VM object's pages in the kernel
5216  *      virtual address space, using the pre-allocated
5217  *      kernel virtual addresses, if possible.
5218  * Context:
5219  *      The VM object is locked.  This lock will get
5220  *      dropped and re-acquired though.
5221  */
5222 kern_return_t
5223 vm_paging_map_object(
5224         vm_map_offset_t         *address,
5225         vm_page_t               page,
5226         vm_object_t             object,
5227         vm_object_offset_t      offset,
5228         vm_map_size_t           *size)
5229 {
5230         kern_return_t           kr;
5231         vm_map_offset_t         page_map_offset;
5232         vm_map_size_t           map_size;
5233         vm_object_offset_t      object_offset;
5234 #ifdef __ppc__
5235         int                     i;
5236         vm_map_entry_t          map_entry;
5237 #endif /* __ppc__ */
5238
5239
5240 #ifdef __ppc__
5241         if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
5242                 /*
5243                  * Optimization for the PowerPC.
5244                  * Use one of the pre-allocated kernel virtual addresses
5245                  * and just enter the VM page in the kernel address space
5246                  * at that virtual address.
5247                  */
5248                 vm_object_unlock(object);
5249                 simple_lock(&vm_paging_lock);
5250
5251                 if (vm_paging_base_address == 0) {
5252                         /*
5253                          * Initialize our pool of pre-allocated kernel
5254                          * virtual addresses.
5255                          */
5256                         simple_unlock(&vm_paging_lock);
5257                         page_map_offset = 0;
5258                         kr = vm_map_find_space(kernel_map,
5259                                                &page_map_offset,
5260                                                VM_PAGING_NUM_PAGES * PAGE_SIZE,
5261                                                0,
5262                                                &map_entry);
5263                         if (kr != KERN_SUCCESS) {
5264                                 panic("vm_paging_map_object: "
5265                                       "kernel_map full\n");
5266                         }
5267                         map_entry->object.vm_object = kernel_object;
5268                         map_entry->offset =
5269                                 page_map_offset - VM_MIN_KERNEL_ADDRESS;
5270                         vm_object_reference(kernel_object);
5271                         vm_map_unlock(kernel_map);
5272
5273                         simple_lock(&vm_paging_lock);
5274                         if (vm_paging_base_address != 0) {
5275                                 /* someone raced us and won: undo */
5276                                 simple_unlock(&vm_paging_lock);
5277                                 kr = vm_map_remove(kernel_map,
5278                                                    page_map_offset,
5279                                                    page_map_offset +
5280                                                    (VM_PAGING_NUM_PAGES
5281                                                     * PAGE_SIZE),
5282                                                    VM_MAP_NO_FLAGS);
5283                                 assert(kr == KERN_SUCCESS);
5284                                 simple_lock(&vm_paging_lock);
5285                         } else {
5286                                 vm_paging_base_address = page_map_offset;
5287                         }
5288                 }
5289
5290                 /*
5291                  * Try and find an available kernel virtual address
5292                  * from our pre-allocated pool.
5293                  */
5294                 page_map_offset = 0;
5295                 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
5296                         if (vm_paging_page_inuse[i] == FALSE) {
5297                                 page_map_offset = vm_paging_base_address +
5298                                         (i * PAGE_SIZE);
5299                                 break;
5300                         }
5301                 }
5302
5303                 if (page_map_offset != 0) {
5304                         /*
5305                          * We found a kernel virtual address;
5306                          * map the physical page to that virtual address.
5307                          */
5308                         if (i > vm_paging_max_index) {
5309                                 vm_paging_max_index = i;
5310                         }
5311                         vm_paging_page_inuse[i] = TRUE;
5312                         simple_unlock(&vm_paging_lock);
5313                         pmap_map_block(kernel_pmap,
5314                                        page_map_offset,
5315                                        page->phys_page,
5316                                        1,                                               /* Size is number of 4k pages */
5317                                        VM_PROT_DEFAULT,
5318                                        ((int) page->object->wimg_bits &
5319                                         VM_WIMG_MASK),
5320                                        0);
5321                         vm_paging_objects_mapped++;
5322                         vm_paging_pages_mapped++;
5323                         *address = page_map_offset;
5324                         vm_object_lock(object);
5325
5326                         /* all done and mapped, ready to use ! */
5327                         return KERN_SUCCESS;
5328                 }
5329
5330                 /*
5331                  * We ran out of pre-allocated kernel virtual
5332                  * addresses.  Just map the page in the kernel
5333                  * the slow and regular way.
5334                  */
5335                 vm_paging_no_kernel_page++;
5336                 simple_unlock(&vm_paging_lock);
5337                 vm_object_lock(object);
5338         }
5339 #endif /* __ppc__ */
5340
5341         object_offset = vm_object_trunc_page(offset);
5342         map_size = vm_map_round_page(*size);
5343
5344         /*
5345          * Try and map the required range of the object
5346          * in the kernel_map
5347          */
5348
5349         /* don't go beyond the object's end... */
5350         if (object_offset >= object->size) {
5351                 map_size = 0;
5352         } else if (map_size > object->size - offset) {
5353                 map_size = object->size - offset;
5354         }
5355
5356         vm_object_reference_locked(object);     /* for the map entry */
5357         vm_object_unlock(object);
5358
5359         kr = vm_map_enter(kernel_map,
5360                           address,
5361                           map_size,
5362                           0,
5363                           VM_FLAGS_ANYWHERE,
5364                           object,
5365                           object_offset,
5366                           FALSE,
5367                           VM_PROT_DEFAULT,
5368                           VM_PROT_ALL,
5369                           VM_INHERIT_NONE);
5370         if (kr != KERN_SUCCESS) {
5371                 *address = 0;
5372                 *size = 0;
5373                 vm_object_deallocate(object);   /* for the map entry */
5374                 return kr;
5375         }
5376
5377         *size = map_size;
5378
5379         /*
5380          * Enter the mapped pages in the page table now.
5381          */
5382         vm_object_lock(object);
5383         for (page_map_offset = 0;
5384              map_size != 0;
5385              map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
5386                 unsigned int    cache_attr;
5387
5388                 page = vm_page_lookup(object, offset + page_map_offset);
5389                 if (page == VM_PAGE_NULL) {
5390                         panic("vm_paging_map_object: no page !?");
5391                 }
5392                 if (page->no_isync == TRUE) {
5393                         pmap_sync_page_data_phys(page->phys_page);
5394                 }
5395                 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
5396
5397                 PMAP_ENTER(kernel_pmap,
5398                            *address + page_map_offset,
5399                            page,
5400                            VM_PROT_DEFAULT,
5401                            cache_attr,
5402                            FALSE);
5403         }
5404
5405         vm_paging_objects_mapped_slow++;
5406         vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64;
5407
5408         return KERN_SUCCESS;
5409 }
5410
5411 /*
5412  * ENCRYPTED SWAP:
5413  * vm_paging_unmap_object:
5414  *      Unmaps part of a VM object's pages from the kernel
5415  *      virtual address space.
5416  * Context:
5417  *      The VM object is locked.  This lock will get
5418  *      dropped and re-acquired though.
5419  */
5420 void
5421 vm_paging_unmap_object(
5422         vm_object_t     object,
5423         vm_map_offset_t start,
5424         vm_map_offset_t end)
5425 {
5426         kern_return_t   kr;
5427 #ifdef __ppc__
5428         int             i;
5429 #endif /* __ppc__ */
5430
5431         if ((vm_paging_base_address != 0) ||
5432             (start < vm_paging_base_address) ||
5433             (end > (vm_paging_base_address
5434                     + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
5435                 /*
5436                  * We didn't use our pre-allocated pool of
5437                  * kernel virtual address.  Deallocate the
5438                  * virtual memory.
5439                  */
5440                 if (object != VM_OBJECT_NULL) {
5441                         vm_object_unlock(object);
5442                 }
5443                 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
5444                 if (object != VM_OBJECT_NULL) {
5445                         vm_object_lock(object);
5446                 }
5447                 assert(kr == KERN_SUCCESS);
5448         } else {
5449                 /*
5450                  * We used a kernel virtual address from our
5451                  * pre-allocated pool.  Put it back in the pool
5452                  * for next time.
5453                  */
5454 #ifdef __ppc__
5455                 assert(end - start == PAGE_SIZE);
5456                 i = (start - vm_paging_base_address) >> PAGE_SHIFT;
5457
5458                 /* undo the pmap mapping */
5459                 mapping_remove(kernel_pmap, start);
5460
5461                 simple_lock(&vm_paging_lock);
5462                 vm_paging_page_inuse[i] = FALSE;
5463                 simple_unlock(&vm_paging_lock);
5464 #endif /* __ppc__ */
5465         }
5466 }
5467
5468 /*
5469  * Encryption data.
5470  * "iv" is the "initial vector".  Ideally, we want to
5471  * have a different one for each page we encrypt, so that
5472  * crackers can't find encryption patterns too easily.
5473  */
5474 #define SWAP_CRYPT_AES_KEY_SIZE 128     /* XXX 192 and 256 don't work ! */
5475 boolean_t               swap_crypt_ctx_initialized = FALSE;
5476 aes_32t                 swap_crypt_key[8]; /* big enough for a 256 key */
5477 aes_ctx                 swap_crypt_ctx;
5478 const unsigned char     swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
5479
5480 #if DEBUG
5481 boolean_t               swap_crypt_ctx_tested = FALSE;
5482 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
5483 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
5484 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
5485 #endif /* DEBUG */
5486
5487 extern u_long random(void);
5488
5489 /*
5490  * Initialize the encryption context: key and key size.
5491  */
5492 void swap_crypt_ctx_initialize(void); /* forward */
5493 void
5494 swap_crypt_ctx_initialize(void)
5495 {
5496         unsigned int    i;
5497
5498         /*
5499          * No need for locking to protect swap_crypt_ctx_initialized
5500          * because the first use of encryption will come from the
5501          * pageout thread (we won't pagein before there's been a pageout)
5502          * and there's only one pageout thread.
5503          */
5504         if (swap_crypt_ctx_initialized == FALSE) {
5505                 for (i = 0;
5506                      i < (sizeof (swap_crypt_key) /
5507                           sizeof (swap_crypt_key[0]));
5508                      i++) {
5509                         swap_crypt_key[i] = random();
5510                 }
5511                 aes_encrypt_key((const unsigned char *) swap_crypt_key,
5512                                 SWAP_CRYPT_AES_KEY_SIZE,
5513                                 &swap_crypt_ctx.encrypt);
5514                 aes_decrypt_key((const unsigned char *) swap_crypt_key,
5515                                 SWAP_CRYPT_AES_KEY_SIZE,
5516                                 &swap_crypt_ctx.decrypt);
5517                 swap_crypt_ctx_initialized = TRUE;
5518         }
5519
5520 #if DEBUG
5521         /*
5522          * Validate the encryption algorithms.
5523          */
5524         if (swap_crypt_ctx_tested == FALSE) {
5525                 /* initialize */
5526                 for (i = 0; i < 4096; i++) {
5527                         swap_crypt_test_page_ref[i] = (char) i;
5528                 }
5529                 /* encrypt */
5530                 aes_encrypt_cbc(swap_crypt_test_page_ref,
5531                                 swap_crypt_null_iv,
5532                                 PAGE_SIZE / AES_BLOCK_SIZE,
5533                                 swap_crypt_test_page_encrypt,
5534                                 &swap_crypt_ctx.encrypt);
5535                 /* decrypt */
5536                 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
5537                                 swap_crypt_null_iv,
5538                                 PAGE_SIZE / AES_BLOCK_SIZE,
5539                                 swap_crypt_test_page_decrypt,
5540                                 &swap_crypt_ctx.decrypt);
5541                 /* compare result with original */
5542                 for (i = 0; i < 4096; i ++) {
5543                         if (swap_crypt_test_page_decrypt[i] !=
5544                             swap_crypt_test_page_ref[i]) {
5545                                 panic("encryption test failed");
5546                         }
5547                 }
5548
5549                 /* encrypt again */
5550                 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
5551                                 swap_crypt_null_iv,
5552                                 PAGE_SIZE / AES_BLOCK_SIZE,
5553                                 swap_crypt_test_page_decrypt,
5554                                 &swap_crypt_ctx.encrypt);
5555                 /* decrypt in place */
5556                 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
5557                                 swap_crypt_null_iv,
5558                                 PAGE_SIZE / AES_BLOCK_SIZE,
5559                                 swap_crypt_test_page_decrypt,
5560                                 &swap_crypt_ctx.decrypt);
5561                 for (i = 0; i < 4096; i ++) {
5562                         if (swap_crypt_test_page_decrypt[i] !=
5563                             swap_crypt_test_page_ref[i]) {
5564                                 panic("in place encryption test failed");
5565                         }
5566                 }
5567
5568                 swap_crypt_ctx_tested = TRUE;
5569         }
5570 #endif /* DEBUG */
5571 }
5572
5573 /*
5574  * ENCRYPTED SWAP:
5575  * vm_page_encrypt:
5576  *      Encrypt the given page, for secure paging.
5577  *      The page might already be mapped at kernel virtual
5578  *      address "kernel_mapping_offset".  Otherwise, we need
5579  *      to map it.
5580  *
5581  * Context:
5582  *      The page's object is locked, but this lock will be released
5583  *      and re-acquired.
5584  *      The page is busy and not accessible by users (not entered in any pmap).
5585  */
5586 void
5587 vm_page_encrypt(
5588         vm_page_t       page,
5589         vm_map_offset_t kernel_mapping_offset)
5590 {
5591         int                     clear_refmod = 0;
5592         kern_return_t           kr;
5593         boolean_t               page_was_referenced;
5594         boolean_t               page_was_modified;
5595         vm_map_size_t           kernel_mapping_size;
5596         vm_offset_t             kernel_vaddr;
5597         union {
5598                 unsigned char   aes_iv[AES_BLOCK_SIZE];
5599                 struct {
5600                         memory_object_t         pager_object;
5601                         vm_object_offset_t      paging_offset;
5602                 } vm;
5603         } encrypt_iv;
5604
5605         if (! vm_pages_encrypted) {
5606                 vm_pages_encrypted = TRUE;
5607         }
5608
5609         assert(page->busy);
5610         assert(page->dirty || page->precious);
5611
5612         if (page->encrypted) {
5613                 /*
5614                  * Already encrypted: no need to do it again.
5615                  */
5616                 vm_page_encrypt_already_encrypted_counter++;
5617                 return;
5618         }
5619         ASSERT_PAGE_DECRYPTED(page);
5620
5621         /*
5622          * Gather the "reference" and "modified" status of the page.
5623          * We'll restore these values after the encryption, so that
5624          * the encryption is transparent to the rest of the system
5625          * and doesn't impact the VM's LRU logic.
5626          */
5627         page_was_referenced =
5628                 (page->reference || pmap_is_referenced(page->phys_page));
5629         page_was_modified =
5630                 (page->dirty || pmap_is_modified(page->phys_page));
5631
5632         if (kernel_mapping_offset == 0) {
5633                 /*
5634                  * The page hasn't already been mapped in kernel space
5635                  * by the caller.  Map it now, so that we can access
5636                  * its contents and encrypt them.
5637                  */
5638                 kernel_mapping_size = PAGE_SIZE;
5639                 kr = vm_paging_map_object(&kernel_mapping_offset,
5640                                           page,
5641                                           page->object,
5642                                           page->offset,
5643                                           &kernel_mapping_size);
5644                 if (kr != KERN_SUCCESS) {
5645                         panic("vm_page_encrypt: "
5646                               "could not map page in kernel: 0x%x\n",
5647                               kr);
5648                 }
5649         } else {
5650                 kernel_mapping_size = 0;
5651         }
5652         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5653
5654         if (swap_crypt_ctx_initialized == FALSE) {
5655                 swap_crypt_ctx_initialize();
5656         }
5657         assert(swap_crypt_ctx_initialized);
5658
5659         /*
5660          * Prepare an "initial vector" for the encryption.
5661          * We use the "pager" and the "paging_offset" for that
5662          * page to obfuscate the encrypted data a bit more and
5663          * prevent crackers from finding patterns that they could
5664          * use to break the key.
5665          */
5666         bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
5667         encrypt_iv.vm.pager_object = page->object->pager;
5668         encrypt_iv.vm.paging_offset =
5669                 page->object->paging_offset + page->offset;
5670
5671         vm_object_unlock(page->object);
5672
5673         /* encrypt the "initial vector" */
5674         aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
5675                         swap_crypt_null_iv,
5676                         1,
5677                         &encrypt_iv.aes_iv[0],
5678                         &swap_crypt_ctx.encrypt);
5679
5680         /*
5681          * Encrypt the page.
5682          */
5683         aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
5684                         &encrypt_iv.aes_iv[0],
5685                         PAGE_SIZE / AES_BLOCK_SIZE,
5686                         (unsigned char *) kernel_vaddr,
5687                         &swap_crypt_ctx.encrypt);
5688
5689         vm_page_encrypt_counter++;
5690
5691         vm_object_lock(page->object);
5692
5693         /*
5694          * Unmap the page from the kernel's address space,
5695          * if we had to map it ourselves.  Otherwise, let
5696          * the caller undo the mapping if needed.
5697          */
5698         if (kernel_mapping_size != 0) {
5699                 vm_paging_unmap_object(page->object,
5700                                        kernel_mapping_offset,
5701                                        kernel_mapping_offset + kernel_mapping_size);
5702         }
5703
5704         /*
5705          * Restore the "reference" and "modified" bits.
5706          * This should clean up any impact the encryption had
5707          * on them.
5708          */
5709         if (! page_was_referenced) {
5710                 clear_refmod |= VM_MEM_REFERENCED;
5711                 page->reference = FALSE;
5712         }
5713         if (! page_was_modified) {
5714                 clear_refmod |= VM_MEM_MODIFIED;
5715                 page->dirty = FALSE;
5716         }
5717         if (clear_refmod)
5718                 pmap_clear_refmod(page->phys_page, clear_refmod);
5719
5720         page->encrypted = TRUE;
5721 }
5722
5723 /*
5724  * ENCRYPTED SWAP:
5725  * vm_page_decrypt:
5726  *      Decrypt the given page.
5727  *      The page might already be mapped at kernel virtual
5728  *      address "kernel_mapping_offset".  Otherwise, we need
5729  *      to map it.
5730  *
5731  * Context:
5732  *      The page's VM object is locked but will be unlocked and relocked.
5733  *      The page is busy and not accessible by users (not entered in any pmap).
5734  */
5735 void
5736 vm_page_decrypt(
5737         vm_page_t       page,
5738         vm_map_offset_t kernel_mapping_offset)
5739 {
5740         int                     clear_refmod = 0;
5741         kern_return_t           kr;
5742         vm_map_size_t           kernel_mapping_size;
5743         vm_offset_t             kernel_vaddr;
5744         boolean_t               page_was_referenced;
5745         union {
5746                 unsigned char   aes_iv[AES_BLOCK_SIZE];
5747                 struct {
5748                         memory_object_t         pager_object;
5749                         vm_object_offset_t      paging_offset;
5750                 } vm;
5751         } decrypt_iv;
5752
5753         assert(page->busy);
5754         assert(page->encrypted);
5755
5756         /*
5757          * Gather the "reference" status of the page.
5758          * We'll restore its value after the decryption, so that
5759          * the decryption is transparent to the rest of the system
5760          * and doesn't impact the VM's LRU logic.
5761          */
5762         page_was_referenced =
5763                 (page->reference || pmap_is_referenced(page->phys_page));
5764
5765         if (kernel_mapping_offset == 0) {
5766                 /*
5767                  * The page hasn't already been mapped in kernel space
5768                  * by the caller.  Map it now, so that we can access
5769                  * its contents and decrypt them.
5770                  */
5771                 kernel_mapping_size = PAGE_SIZE;
5772                 kr = vm_paging_map_object(&kernel_mapping_offset,
5773                                           page,
5774                                           page->object,
5775                                           page->offset,
5776                                           &kernel_mapping_size);
5777                 if (kr != KERN_SUCCESS) {
5778                         panic("vm_page_decrypt: "
5779                               "could not map page in kernel: 0x%x\n");
5780                 }
5781         } else {
5782                 kernel_mapping_size = 0;
5783         }
5784         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5785
5786         assert(swap_crypt_ctx_initialized);
5787
5788         /*
5789          * Prepare an "initial vector" for the decryption.
5790          * It has to be the same as the "initial vector" we
5791          * used to encrypt that page.
5792          */
5793         bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
5794         decrypt_iv.vm.pager_object = page->object->pager;
5795         decrypt_iv.vm.paging_offset =
5796                 page->object->paging_offset + page->offset;
5797
5798         vm_object_unlock(page->object);
5799
5800         /* encrypt the "initial vector" */
5801         aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
5802                         swap_crypt_null_iv,
5803                         1,
5804                         &decrypt_iv.aes_iv[0],
5805                         &swap_crypt_ctx.encrypt);
5806
5807         /*
5808          * Decrypt the page.
5809          */
5810         aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
5811                         &decrypt_iv.aes_iv[0],
5812                         PAGE_SIZE / AES_BLOCK_SIZE,
5813                         (unsigned char *) kernel_vaddr,
5814                         &swap_crypt_ctx.decrypt);
5815         vm_page_decrypt_counter++;
5816
5817         vm_object_lock(page->object);
5818
5819         /*
5820          * Unmap the page from the kernel's address space,
5821          * if we had to map it ourselves.  Otherwise, let
5822          * the caller undo the mapping if needed.
5823          */
5824         if (kernel_mapping_size != 0) {
5825                 vm_paging_unmap_object(page->object,
5826                                        kernel_vaddr,
5827                                        kernel_vaddr + PAGE_SIZE);
5828         }
5829
5830         /*
5831          * After decryption, the page is actually clean.
5832          * It was encrypted as part of paging, which "cleans"
5833          * the "dirty" pages.
5834          * Noone could access it after it was encrypted
5835          * and the decryption doesn't count.
5836          */
5837         page->dirty = FALSE;
5838         clear_refmod = VM_MEM_MODIFIED;
5839
5840         /* restore the "reference" bit */
5841         if (! page_was_referenced) {
5842                 page->reference = FALSE;
5843                 clear_refmod |= VM_MEM_REFERENCED;
5844         }
5845         pmap_clear_refmod(page->phys_page, clear_refmod);
5846
5847         page->encrypted = FALSE;
5848
5849         /*
5850          * We've just modified the page's contents via the data cache and part
5851          * of the new contents might still be in the cache and not yet in RAM.
5852          * Since the page is now available and might get gathered in a UPL to
5853          * be part of a DMA transfer from a driver that expects the memory to
5854          * be coherent at this point, we have to flush the data cache.
5855          */
5856         pmap_sync_page_data_phys(page->phys_page);
5857         /*
5858          * Since the page is not mapped yet, some code might assume that it
5859          * doesn't need to invalidate the instruction cache when writing to
5860          * that page.  That code relies on "no_isync" being set, so that the
5861          * caches get syncrhonized when the page is first mapped.  So we need
5862          * to set "no_isync" here too, despite the fact that we just
5863          * synchronized the caches above...
5864          */
5865         page->no_isync = TRUE;
5866 }
5867
5868 unsigned long upl_encrypt_upls = 0;
5869 unsigned long upl_encrypt_pages = 0;
5870
5871 /*
5872  * ENCRYPTED SWAP:
5873  *
5874  * upl_encrypt:
5875  *      Encrypts all the pages in the UPL, within the specified range.
5876  *
5877  */
5878 void
5879 upl_encrypt(
5880         upl_t                   upl,
5881         upl_offset_t            crypt_offset,
5882         upl_size_t              crypt_size)
5883 {
5884         upl_size_t              upl_size;
5885         upl_offset_t            upl_offset;
5886         vm_object_t             upl_object;
5887         vm_page_t               page;
5888         vm_object_t             shadow_object;
5889         vm_object_offset_t      shadow_offset;
5890         vm_object_offset_t      paging_offset;
5891         vm_object_offset_t      base_offset;
5892
5893         upl_encrypt_upls++;
5894         upl_encrypt_pages += crypt_size / PAGE_SIZE;
5895
5896         upl_lock(upl);
5897
5898         upl_object = upl->map_object;
5899         upl_offset = upl->offset;
5900         upl_size = upl->size;
5901
5902         upl_unlock(upl);
5903
5904         vm_object_lock(upl_object);
5905
5906         /*
5907          * Find the VM object that contains the actual pages.
5908          */
5909         if (upl_object->pageout) {
5910                 shadow_object = upl_object->shadow;
5911                 /*
5912                  * The offset in the shadow object is actually also
5913                  * accounted for in upl->offset.  It possibly shouldn't be
5914                  * this way, but for now don't account for it twice.
5915                  */
5916                 shadow_offset = 0;
5917                 assert(upl_object->paging_offset == 0); /* XXX ? */
5918                 vm_object_lock(shadow_object);
5919         } else {
5920                 shadow_object = upl_object;
5921                 shadow_offset = 0;
5922         }
5923
5924         paging_offset = shadow_object->paging_offset;
5925         vm_object_paging_begin(shadow_object);
5926
5927         if (shadow_object != upl_object) {
5928                 vm_object_unlock(shadow_object);
5929         }
5930         vm_object_unlock(upl_object);
5931
5932         base_offset = shadow_offset;
5933         base_offset += upl_offset;
5934         base_offset += crypt_offset;
5935         base_offset -= paging_offset;
5936         /*
5937          * Unmap the pages, so that nobody can continue accessing them while
5938          * they're encrypted.  After that point, all accesses to these pages
5939          * will cause a page fault and block while the page is being encrypted
5940          * (busy).  After the encryption completes, any access will cause a
5941          * page fault and the page gets decrypted at that time.
5942          */
5943         assert(crypt_offset + crypt_size <= upl_size);
5944         vm_object_pmap_protect(shadow_object,
5945                                base_offset,
5946                                (vm_object_size_t)crypt_size,
5947                                PMAP_NULL,
5948                                0,
5949                                VM_PROT_NONE);
5950
5951         /* XXX FBDP could the object have changed significantly here ? */
5952         vm_object_lock(shadow_object);
5953
5954         for (upl_offset = 0;
5955              upl_offset < crypt_size;
5956              upl_offset += PAGE_SIZE) {
5957                 page = vm_page_lookup(shadow_object,
5958                                       base_offset + upl_offset);
5959                 if (page == VM_PAGE_NULL) {
5960                         panic("upl_encrypt: "
5961                               "no page for (obj=%p,off=%lld+%d)!\n",
5962                               shadow_object,
5963                               base_offset,
5964                               upl_offset);
5965                 }
5966                 vm_page_encrypt(page, 0);
5967         }
5968
5969         vm_object_paging_end(shadow_object);
5970         vm_object_unlock(shadow_object);
5971 }
5972
5973 vm_size_t
5974 upl_get_internal_pagelist_offset(void)
5975 {
5976         return sizeof(struct upl);
5977 }
5978
5979 void
5980 upl_set_dirty(
5981         upl_t   upl)
5982 {
5983         upl->flags |= UPL_CLEAR_DIRTY;
5984 }
5985
5986 void
5987 upl_clear_dirty(
5988         upl_t   upl)
5989 {
5990         upl->flags &= ~UPL_CLEAR_DIRTY;
5991 }
5992
5993
5994 #ifdef MACH_BSD
5995
5996 boolean_t  upl_page_present(upl_page_info_t *upl, int index)
5997 {
5998         return(UPL_PAGE_PRESENT(upl, index));
5999 }
6000 boolean_t  upl_dirty_page(upl_page_info_t *upl, int index)
6001 {
6002         return(UPL_DIRTY_PAGE(upl, index));
6003 }
6004 boolean_t  upl_valid_page(upl_page_info_t *upl, int index)
6005 {
6006         return(UPL_VALID_PAGE(upl, index));
6007 }
6008 ppnum_t  upl_phys_page(upl_page_info_t *upl, int index)
6009 {
6010         return(UPL_PHYS_PAGE(upl, index));
6011 }
6012
6013 void
6014 vm_countdirtypages(void)
6015 {
6016         vm_page_t m;
6017         int dpages;
6018         int pgopages;
6019         int precpages;
6020
6021
6022         dpages=0;
6023         pgopages=0;
6024         precpages=0;
6025
6026         vm_page_lock_queues();
6027         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6028         do {
6029                 if (m ==(vm_page_t )0) break;
6030
6031                 if(m->dirty) dpages++;
6032                 if(m->pageout) pgopages++;
6033                 if(m->precious) precpages++;
6034
6035                 assert(m->object != kernel_object);
6036                 m = (vm_page_t) queue_next(&m->pageq);
6037                 if (m ==(vm_page_t )0) break;
6038
6039         } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
6040         vm_page_unlock_queues();
6041
6042         vm_page_lock_queues();
6043         m = (vm_page_t) queue_first(&vm_page_queue_zf);
6044         do {
6045                 if (m ==(vm_page_t )0) break;
6046
6047                 if(m->dirty) dpages++;
6048                 if(m->pageout) pgopages++;
6049                 if(m->precious) precpages++;
6050
6051                 assert(m->object != kernel_object);
6052                 m = (vm_page_t) queue_next(&m->pageq);
6053                 if (m ==(vm_page_t )0) break;
6054
6055         } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
6056         vm_page_unlock_queues();
6057
6058         printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
6059
6060         dpages=0;
6061         pgopages=0;
6062         precpages=0;
6063
6064         vm_page_lock_queues();
6065         m = (vm_page_t) queue_first(&vm_page_queue_active);
6066
6067         do {
6068                 if(m == (vm_page_t )0) break;
6069                 if(m->dirty) dpages++;
6070                 if(m->pageout) pgopages++;
6071                 if(m->precious) precpages++;
6072
6073                 assert(m->object != kernel_object);
6074                 m = (vm_page_t) queue_next(&m->pageq);
6075                 if(m == (vm_page_t )0) break;
6076
6077         } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
6078         vm_page_unlock_queues();
6079
6080         printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
6081
6082 }
6083 #endif /* MACH_BSD */
6084
6085 #ifdef UPL_DEBUG
6086 kern_return_t  upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
6087 {
6088         upl->ubc_alias1 = alias1;
6089         upl->ubc_alias2 = alias2;
6090         return KERN_SUCCESS;
6091 }
6092 int  upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
6093 {
6094         if(al)
6095                 *al = upl->ubc_alias1;
6096         if(al2)
6097                 *al2 = upl->ubc_alias2;
6098         return KERN_SUCCESS;
6099 }
6100 #endif /* UPL_DEBUG */
6101
6102
6103
6104 #if     MACH_KDB
6105 #include <ddb/db_output.h>
6106 #include <ddb/db_print.h>
6107 #include <vm/vm_print.h>
6108
6109 #define printf  kdbprintf
6110 void            db_pageout(void);
6111
6112 void
6113 db_vm(void)
6114 {
6115
6116         iprintf("VM Statistics:\n");
6117         db_indent += 2;
6118         iprintf("pages:\n");
6119         db_indent += 2;
6120         iprintf("activ %5d  inact %5d  free  %5d",
6121                 vm_page_active_count, vm_page_inactive_count,
6122                 vm_page_free_count);
6123         printf("   wire  %5d  gobbl %5d\n",
6124                vm_page_wire_count, vm_page_gobble_count);
6125         db_indent -= 2;
6126         iprintf("target:\n");
6127         db_indent += 2;
6128         iprintf("min   %5d  inact %5d  free  %5d",
6129                 vm_page_free_min, vm_page_inactive_target,
6130                 vm_page_free_target);
6131         printf("   resrv %5d\n", vm_page_free_reserved);
6132         db_indent -= 2;
6133         iprintf("pause:\n");
6134         db_pageout();
6135         db_indent -= 2;
6136 }
6137
6138 #if     MACH_COUNTERS
6139 extern int c_laundry_pages_freed;
6140 #endif  /* MACH_COUNTERS */
6141
6142 void
6143 db_pageout(void)
6144 {
6145         iprintf("Pageout Statistics:\n");
6146         db_indent += 2;
6147         iprintf("active %5d  inactv %5d\n",
6148                 vm_pageout_active, vm_pageout_inactive);
6149         iprintf("nolock %5d  avoid  %5d  busy   %5d  absent %5d\n",
6150                 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
6151                 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
6152         iprintf("used   %5d  clean  %5d  dirty  %5d\n",
6153                 vm_pageout_inactive_used, vm_pageout_inactive_clean,
6154                 vm_pageout_inactive_dirty);
6155 #if     MACH_COUNTERS
6156         iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
6157 #endif  /* MACH_COUNTERS */
6158 #if     MACH_CLUSTER_STATS
6159         iprintf("Cluster Statistics:\n");
6160         db_indent += 2;
6161         iprintf("dirtied   %5d   cleaned  %5d   collisions  %5d\n",
6162                 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
6163                 vm_pageout_cluster_collisions);
6164         iprintf("clusters  %5d   conversions  %5d\n",
6165                 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
6166         db_indent -= 2;
6167         iprintf("Target Statistics:\n");
6168         db_indent += 2;
6169         iprintf("collisions   %5d   page_dirtied  %5d   page_freed  %5d\n",
6170                 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
6171                 vm_pageout_target_page_freed);
6172         db_indent -= 2;
6173 #endif  /* MACH_CLUSTER_STATS */
6174         db_indent -= 2;
6175 }
6176
6177 #endif  /* MACH_KDB */