osfmk/vm/vm_pageout.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*
  24  * @OSF_COPYRIGHT@
  25  */
  26 /*
  27  * Mach Operating System
  28  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  29  * All Rights Reserved.
  30  *
  31  * Permission to use, copy, modify and distribute this software and its
  32  * documentation is hereby granted, provided that both the copyright
  33  * notice and this permission notice appear in all copies of the
  34  * software, derivative works or modified versions, and any portions
  35  * thereof, and that both notices appear in supporting documentation.
  36  *
  37  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  38  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  39  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  40  *
  41  * Carnegie Mellon requests users of this software to return to
  42  *
  43  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  44  *  School of Computer Science
  45  *  Carnegie Mellon University
  46  *  Pittsburgh PA 15213-3890
  47  *
  48  * any improvements or extensions that they make and grant Carnegie Mellon
  49  * the rights to redistribute these changes.
  50  */
  51 /*
  52  */
  53 /*
  54  *      File:   vm/vm_pageout.c
  55  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  56  *      Date:   1985
  57  *
  58  *      The proverbial page-out daemon.
  59  */
  60
  61 #include <stdint.h>
  62
  63 #include <debug.h>
  64 #include <mach_pagemap.h>
  65 #include <mach_cluster_stats.h>
  66 #include <mach_kdb.h>
  67 #include <advisory_pageout.h>
  68
  69 #include <mach/mach_types.h>
  70 #include <mach/memory_object.h>
  71 #include <mach/memory_object_default.h>
  72 #include <mach/memory_object_control_server.h>
  73 #include <mach/mach_host_server.h>
  74 #include <mach/upl.h>
  75 #include <mach/vm_map.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_statistics.h>
  78
  79 #include <kern/kern_types.h>
  80 #include <kern/counters.h>
  81 #include <kern/host_statistics.h>
  82 #include <kern/machine.h>
  83 #include <kern/misc_protos.h>
  84 #include <kern/thread.h>
  85 #include <kern/xpr.h>
  86 #include <kern/kalloc.h>
  87
  88 #include <machine/vm_tuning.h>
  89
  90 #include <vm/pmap.h>
  91 #include <vm/vm_fault.h>
  92 #include <vm/vm_map.h>
  93 #include <vm/vm_object.h>
  94 #include <vm/vm_page.h>
  95 #include <vm/vm_pageout.h>
  96 #include <vm/vm_protos.h> /* must be last */
  97
  98 /*
  99  * ENCRYPTED SWAP:
 100  */
 101 #ifdef __ppc__
 102 #include <ppc/mappings.h>
 103 #endif /* __ppc__ */
 104 #include <../bsd/crypto/aes/aes.h>
 105
 106 extern ipc_port_t       memory_manager_default;
 107
 108
 109 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE
 110 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  10000  /* maximum iterations of the active queue to move pages to inactive */
 111 #endif
 112
 113 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE
 114 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
 115 #endif
 116
 117 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
 118 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
 119 #endif
 120
 121 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
 122 #define VM_PAGEOUT_INACTIVE_RELIEF 50   /* minimum number of pages to move to the inactive q */
 123 #endif
 124
 125 #ifndef VM_PAGE_LAUNDRY_MAX
 126 #define VM_PAGE_LAUNDRY_MAX     16UL    /* maximum pageouts on a given pageout queue */
 127 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
 128
 129 #ifndef VM_PAGEOUT_BURST_WAIT
 130 #define VM_PAGEOUT_BURST_WAIT   30      /* milliseconds per page */
 131 #endif  /* VM_PAGEOUT_BURST_WAIT */
 132
 133 #ifndef VM_PAGEOUT_EMPTY_WAIT
 134 #define VM_PAGEOUT_EMPTY_WAIT   200     /* milliseconds */
 135 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
 136
 137 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
 138 #define VM_PAGEOUT_DEADLOCK_WAIT        300     /* milliseconds */
 139 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
 140
 141 #ifndef VM_PAGEOUT_IDLE_WAIT
 142 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
 143 #endif  /* VM_PAGEOUT_IDLE_WAIT */
 144
 145
 146 /*
 147  *      To obtain a reasonable LRU approximation, the inactive queue
 148  *      needs to be large enough to give pages on it a chance to be
 149  *      referenced a second time.  This macro defines the fraction
 150  *      of active+inactive pages that should be inactive.
 151  *      The pageout daemon uses it to update vm_page_inactive_target.
 152  *
 153  *      If vm_page_free_count falls below vm_page_free_target and
 154  *      vm_page_inactive_count is below vm_page_inactive_target,
 155  *      then the pageout daemon starts running.
 156  */
 157
 158 #ifndef VM_PAGE_INACTIVE_TARGET
 159 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 3)
 160 #endif  /* VM_PAGE_INACTIVE_TARGET */
 161
 162 /*
 163  *      Once the pageout daemon starts running, it keeps going
 164  *      until vm_page_free_count meets or exceeds vm_page_free_target.
 165  */
 166
 167 #ifndef VM_PAGE_FREE_TARGET
 168 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
 169 #endif  /* VM_PAGE_FREE_TARGET */
 170
 171 /*
 172  *      The pageout daemon always starts running once vm_page_free_count
 173  *      falls below vm_page_free_min.
 174  */
 175
 176 #ifndef VM_PAGE_FREE_MIN
 177 #define VM_PAGE_FREE_MIN(free)  (10 + (free) / 100)
 178 #endif  /* VM_PAGE_FREE_MIN */
 179
 180 /*
 181  *      When vm_page_free_count falls below vm_page_free_reserved,
 182  *      only vm-privileged threads can allocate pages.  vm-privilege
 183  *      allows the pageout daemon and default pager (and any other
 184  *      associated threads needed for default pageout) to continue
 185  *      operation by dipping into the reserved pool of pages.
 186  */
 187
 188 #ifndef VM_PAGE_FREE_RESERVED
 189 #define VM_PAGE_FREE_RESERVED(n)        \
 190         ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
 191 #endif  /* VM_PAGE_FREE_RESERVED */
 192
 193
 194 /*
 195  * must hold the page queues lock to
 196  * manipulate this structure
 197  */
 198 struct vm_pageout_queue {
 199         queue_head_t    pgo_pending;    /* laundry pages to be processed by pager's iothread */
 200         unsigned int    pgo_laundry;    /* current count of laundry pages on queue or in flight */
 201         unsigned int    pgo_maxlaundry;
 202
 203         unsigned int    pgo_idle:1,     /* iothread is blocked waiting for work to do */
 204                         pgo_busy:1,     /* iothread is currently processing request from pgo_pending */
 205                         pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
 206                         :0;
 207 };
 208
 209 #define VM_PAGE_Q_THROTTLED(q)          \
 210         ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
 211
 212
 213 /*
 214  * Exported variable used to broadcast the activation of the pageout scan
 215  * Working Set uses this to throttle its use of pmap removes.  In this
 216  * way, code which runs within memory in an uncontested context does
 217  * not keep encountering soft faults.
 218  */
 219
 220 unsigned int    vm_pageout_scan_event_counter = 0;
 221
 222 /*
 223  * Forward declarations for internal routines.
 224  */
 225
 226 static void vm_pageout_garbage_collect(int);
 227 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
 228 static void vm_pageout_iothread_external(void);
 229 static void vm_pageout_iothread_internal(void);
 230 static void vm_pageout_queue_steal(vm_page_t);
 231
 232 extern void vm_pageout_continue(void);
 233 extern void vm_pageout_scan(void);
 234
 235 unsigned int vm_pageout_reserved_internal = 0;
 236 unsigned int vm_pageout_reserved_really = 0;
 237
 238 unsigned int vm_pageout_idle_wait = 0;          /* milliseconds */
 239 unsigned int vm_pageout_empty_wait = 0;         /* milliseconds */
 240 unsigned int vm_pageout_burst_wait = 0;         /* milliseconds */
 241 unsigned int vm_pageout_deadlock_wait = 0;      /* milliseconds */
 242 unsigned int vm_pageout_deadlock_relief = 0;
 243 unsigned int vm_pageout_inactive_relief = 0;
 244 unsigned int vm_pageout_burst_active_throttle = 0;
 245 unsigned int vm_pageout_burst_inactive_throttle = 0;
 246
 247 /*
 248  *      Protection against zero fill flushing live working sets derived
 249  *      from existing backing store and files
 250  */
 251 unsigned int vm_accellerate_zf_pageout_trigger = 400;
 252 unsigned int vm_zf_iterator;
 253 unsigned int vm_zf_iterator_count = 40;
 254 unsigned int last_page_zf;
 255 unsigned int vm_zf_count = 0;
 256
 257 /*
 258  *      These variables record the pageout daemon's actions:
 259  *      how many pages it looks at and what happens to those pages.
 260  *      No locking needed because only one thread modifies the variables.
 261  */
 262
 263 unsigned int vm_pageout_active = 0;             /* debugging */
 264 unsigned int vm_pageout_inactive = 0;           /* debugging */
 265 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
 266 unsigned int vm_pageout_inactive_forced = 0;    /* debugging */
 267 unsigned int vm_pageout_inactive_nolock = 0;    /* debugging */
 268 unsigned int vm_pageout_inactive_avoid = 0;     /* debugging */
 269 unsigned int vm_pageout_inactive_busy = 0;      /* debugging */
 270 unsigned int vm_pageout_inactive_absent = 0;    /* debugging */
 271 unsigned int vm_pageout_inactive_used = 0;      /* debugging */
 272 unsigned int vm_pageout_inactive_clean = 0;     /* debugging */
 273 unsigned int vm_pageout_inactive_dirty = 0;     /* debugging */
 274 unsigned int vm_pageout_dirty_no_pager = 0;     /* debugging */
 275 unsigned int vm_pageout_purged_objects = 0;     /* debugging */
 276 unsigned int vm_stat_discard = 0;               /* debugging */
 277 unsigned int vm_stat_discard_sent = 0;          /* debugging */
 278 unsigned int vm_stat_discard_failure = 0;       /* debugging */
 279 unsigned int vm_stat_discard_throttle = 0;      /* debugging */
 280
 281 unsigned int vm_pageout_scan_active_throttled = 0;
 282 unsigned int vm_pageout_scan_inactive_throttled = 0;
 283 unsigned int vm_pageout_scan_throttle = 0;                      /* debugging */
 284 unsigned int vm_pageout_scan_burst_throttle = 0;                /* debugging */
 285 unsigned int vm_pageout_scan_empty_throttle = 0;                /* debugging */
 286 unsigned int vm_pageout_scan_deadlock_detected = 0;             /* debugging */
 287 unsigned int vm_pageout_scan_active_throttle_success = 0;       /* debugging */
 288 unsigned int vm_pageout_scan_inactive_throttle_success = 0;     /* debugging */
 289 /*
 290  * Backing store throttle when BS is exhausted
 291  */
 292 unsigned int    vm_backing_store_low = 0;
 293
 294 unsigned int vm_pageout_out_of_line  = 0;
 295 unsigned int vm_pageout_in_place  = 0;
 296
 297 /*
 298  * ENCRYPTED SWAP:
 299  * counters and statistics...
 300  */
 301 unsigned long vm_page_decrypt_counter = 0;
 302 unsigned long vm_page_decrypt_for_upl_counter = 0;
 303 unsigned long vm_page_encrypt_counter = 0;
 304 unsigned long vm_page_encrypt_abort_counter = 0;
 305 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
 306 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
 307
 308
 309 struct  vm_pageout_queue vm_pageout_queue_internal;
 310 struct  vm_pageout_queue vm_pageout_queue_external;
 311
 312
 313 /*
 314  *      Routine:        vm_backing_store_disable
 315  *      Purpose:
 316  *              Suspend non-privileged threads wishing to extend
 317  *              backing store when we are low on backing store
 318  *              (Synchronized by caller)
 319  */
 320 void
 321 vm_backing_store_disable(
 322         boolean_t       disable)
 323 {
 324         if(disable) {
 325                 vm_backing_store_low = 1;
 326         } else {
 327                 if(vm_backing_store_low) {
 328                         vm_backing_store_low = 0;
 329                         thread_wakeup((event_t) &vm_backing_store_low);
 330                 }
 331         }
 332 }
 333
 334
 335 /*
 336  *      Routine:        vm_pageout_object_allocate
 337  *      Purpose:
 338  *              Allocate an object for use as out-of-line memory in a
 339  *              data_return/data_initialize message.
 340  *              The page must be in an unlocked object.
 341  *
 342  *              If the page belongs to a trusted pager, cleaning in place
 343  *              will be used, which utilizes a special "pageout object"
 344  *              containing private alias pages for the real page frames.
 345  *              Untrusted pagers use normal out-of-line memory.
 346  */
 347 vm_object_t
 348 vm_pageout_object_allocate(
 349         vm_page_t               m,
 350         vm_size_t               size,
 351         vm_object_offset_t      offset)
 352 {
 353         vm_object_t     object = m->object;
 354         vm_object_t     new_object;
 355
 356         assert(object->pager_ready);
 357
 358         new_object = vm_object_allocate(size);
 359
 360         if (object->pager_trusted) {
 361                 assert (offset < object->size);
 362
 363                 vm_object_lock(new_object);
 364                 new_object->pageout = TRUE;
 365                 new_object->shadow = object;
 366                 new_object->can_persist = FALSE;
 367                 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
 368                 new_object->shadow_offset = offset;
 369                 vm_object_unlock(new_object);
 370
 371                 /*
 372                  * Take a paging reference on the object. This will be dropped
 373                  * in vm_pageout_object_terminate()
 374                  */
 375                 vm_object_lock(object);
 376                 vm_object_paging_begin(object);
 377                 vm_page_lock_queues();
 378                 vm_page_unlock_queues();
 379                 vm_object_unlock(object);
 380
 381                 vm_pageout_in_place++;
 382         } else
 383                 vm_pageout_out_of_line++;
 384         return(new_object);
 385 }
 386
 387 #if MACH_CLUSTER_STATS
 388 unsigned long vm_pageout_cluster_dirtied = 0;
 389 unsigned long vm_pageout_cluster_cleaned = 0;
 390 unsigned long vm_pageout_cluster_collisions = 0;
 391 unsigned long vm_pageout_cluster_clusters = 0;
 392 unsigned long vm_pageout_cluster_conversions = 0;
 393 unsigned long vm_pageout_target_collisions = 0;
 394 unsigned long vm_pageout_target_page_dirtied = 0;
 395 unsigned long vm_pageout_target_page_freed = 0;
 396 #define CLUSTER_STAT(clause)    clause
 397 #else   /* MACH_CLUSTER_STATS */
 398 #define CLUSTER_STAT(clause)
 399 #endif  /* MACH_CLUSTER_STATS */
 400
 401 /*
 402  *      Routine:        vm_pageout_object_terminate
 403  *      Purpose:
 404  *              Destroy the pageout_object allocated by
 405  *              vm_pageout_object_allocate(), and perform all of the
 406  *              required cleanup actions.
 407  *
 408  *      In/Out conditions:
 409  *              The object must be locked, and will be returned locked.
 410  */
 411 void
 412 vm_pageout_object_terminate(
 413         vm_object_t     object)
 414 {
 415         vm_object_t     shadow_object;
 416         boolean_t       shadow_internal;
 417
 418         /*
 419          * Deal with the deallocation (last reference) of a pageout object
 420          * (used for cleaning-in-place) by dropping the paging references/
 421          * freeing pages in the original object.
 422          */
 423
 424         assert(object->pageout);
 425         shadow_object = object->shadow;
 426         vm_object_lock(shadow_object);
 427         shadow_internal = shadow_object->internal;
 428
 429         while (!queue_empty(&object->memq)) {
 430                 vm_page_t               p, m;
 431                 vm_object_offset_t      offset;
 432
 433                 p = (vm_page_t) queue_first(&object->memq);
 434
 435                 assert(p->private);
 436                 assert(p->pageout);
 437                 p->pageout = FALSE;
 438                 assert(!p->cleaning);
 439
 440                 offset = p->offset;
 441                 VM_PAGE_FREE(p);
 442                 p = VM_PAGE_NULL;
 443
 444                 m = vm_page_lookup(shadow_object,
 445                         offset + object->shadow_offset);
 446
 447                 if(m == VM_PAGE_NULL)
 448                         continue;
 449                 assert(m->cleaning);
 450                 /* used as a trigger on upl_commit etc to recognize the */
 451                 /* pageout daemon's subseqent desire to pageout a cleaning */
 452                 /* page.  When the bit is on the upl commit code will   */
 453                 /* respect the pageout bit in the target page over the  */
 454                 /* caller's page list indication */
 455                 m->dump_cleaning = FALSE;
 456
 457                 /*
 458                  * Account for the paging reference taken when
 459                  * m->cleaning was set on this page.
 460                  */
 461                 vm_object_paging_end(shadow_object);
 462                 assert((m->dirty) || (m->precious) ||
 463                                 (m->busy && m->cleaning));
 464
 465                 /*
 466                  * Handle the trusted pager throttle.
 467                  * Also decrement the burst throttle (if external).
 468                  */
 469                 vm_page_lock_queues();
 470                 if (m->laundry) {
 471                         vm_pageout_throttle_up(m);
 472                 }
 473
 474                 /*
 475                  * Handle the "target" page(s). These pages are to be freed if
 476                  * successfully cleaned. Target pages are always busy, and are
 477                  * wired exactly once. The initial target pages are not mapped,
 478                  * (so cannot be referenced or modified) but converted target
 479                  * pages may have been modified between the selection as an
 480                  * adjacent page and conversion to a target.
 481                  */
 482                 if (m->pageout) {
 483                         assert(m->busy);
 484                         assert(m->wire_count == 1);
 485                         m->cleaning = FALSE;
 486                         m->pageout = FALSE;
 487 #if MACH_CLUSTER_STATS
 488                         if (m->wanted) vm_pageout_target_collisions++;
 489 #endif
 490                         /*
 491                          * Revoke all access to the page. Since the object is
 492                          * locked, and the page is busy, this prevents the page
 493                          * from being dirtied after the pmap_disconnect() call
 494                          * returns.
 495                          *
 496                          * Since the page is left "dirty" but "not modifed", we
 497                          * can detect whether the page was redirtied during
 498                          * pageout by checking the modify state.
 499                          */
 500                         if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
 501                               m->dirty = TRUE;
 502                         else
 503                               m->dirty = FALSE;
 504
 505                         if (m->dirty) {
 506                                 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
 507                                 vm_page_unwire(m);/* reactivates */
 508                                 VM_STAT(reactivations++);
 509                                 PAGE_WAKEUP_DONE(m);
 510                         } else {
 511                                 CLUSTER_STAT(vm_pageout_target_page_freed++;)
 512                                 vm_page_free(m);/* clears busy, etc. */
 513                         }
 514                         vm_page_unlock_queues();
 515                         continue;
 516                 }
 517                 /*
 518                  * Handle the "adjacent" pages. These pages were cleaned in
 519                  * place, and should be left alone.
 520                  * If prep_pin_count is nonzero, then someone is using the
 521                  * page, so make it active.
 522                  */
 523                 if (!m->active && !m->inactive && !m->private) {
 524                         if (m->reference)
 525                                 vm_page_activate(m);
 526                         else
 527                                 vm_page_deactivate(m);
 528                 }
 529                 if((m->busy) && (m->cleaning)) {
 530
 531                         /* the request_page_list case, (COPY_OUT_FROM FALSE) */
 532                         m->busy = FALSE;
 533
 534                         /* We do not re-set m->dirty ! */
 535                         /* The page was busy so no extraneous activity     */
 536                         /* could have occurred. COPY_INTO is a read into the */
 537                         /* new pages. CLEAN_IN_PLACE does actually write   */
 538                         /* out the pages but handling outside of this code */
 539                         /* will take care of resetting dirty. We clear the */
 540                         /* modify however for the Programmed I/O case.     */
 541                         pmap_clear_modify(m->phys_page);
 542                         if(m->absent) {
 543                                 m->absent = FALSE;
 544                                 if(shadow_object->absent_count == 1)
 545                                         vm_object_absent_release(shadow_object);
 546                                 else
 547                                         shadow_object->absent_count--;
 548                         }
 549                         m->overwriting = FALSE;
 550                 } else if (m->overwriting) {
 551                         /* alternate request page list, write to page_list */
 552                         /* case.  Occurs when the original page was wired  */
 553                         /* at the time of the list request */
 554                         assert(m->wire_count != 0);
 555                         vm_page_unwire(m);/* reactivates */
 556                         m->overwriting = FALSE;
 557                 } else {
 558                 /*
 559                  * Set the dirty state according to whether or not the page was
 560                  * modified during the pageout. Note that we purposefully do
 561                  * NOT call pmap_clear_modify since the page is still mapped.
 562                  * If the page were to be dirtied between the 2 calls, this
 563                  * this fact would be lost. This code is only necessary to
 564                  * maintain statistics, since the pmap module is always
 565                  * consulted if m->dirty is false.
 566                  */
 567 #if MACH_CLUSTER_STATS
 568                         m->dirty = pmap_is_modified(m->phys_page);
 569
 570                         if (m->dirty)   vm_pageout_cluster_dirtied++;
 571                         else            vm_pageout_cluster_cleaned++;
 572                         if (m->wanted)  vm_pageout_cluster_collisions++;
 573 #else
 574                         m->dirty = 0;
 575 #endif
 576                 }
 577                 m->cleaning = FALSE;
 578
 579                 /*
 580                  * Wakeup any thread waiting for the page to be un-cleaning.
 581                  */
 582                 PAGE_WAKEUP(m);
 583                 vm_page_unlock_queues();
 584         }
 585         /*
 586          * Account for the paging reference taken in vm_paging_object_allocate.
 587          */
 588         vm_object_paging_end(shadow_object);
 589         vm_object_unlock(shadow_object);
 590
 591         assert(object->ref_count == 0);
 592         assert(object->paging_in_progress == 0);
 593         assert(object->resident_page_count == 0);
 594         return;
 595 }
 596
 597 /*
 598  *      Routine:        vm_pageout_setup
 599  *      Purpose:
 600  *              Set up a page for pageout (clean & flush).
 601  *
 602  *              Move the page to a new object, as part of which it will be
 603  *              sent to its memory manager in a memory_object_data_write or
 604  *              memory_object_initialize message.
 605  *
 606  *              The "new_object" and "new_offset" arguments
 607  *              indicate where the page should be moved.
 608  *
 609  *      In/Out conditions:
 610  *              The page in question must not be on any pageout queues,
 611  *              and must be busy.  The object to which it belongs
 612  *              must be unlocked, and the caller must hold a paging
 613  *              reference to it.  The new_object must not be locked.
 614  *
 615  *              This routine returns a pointer to a place-holder page,
 616  *              inserted at the same offset, to block out-of-order
 617  *              requests for the page.  The place-holder page must
 618  *              be freed after the data_write or initialize message
 619  *              has been sent.
 620  *
 621  *              The original page is put on a paging queue and marked
 622  *              not busy on exit.
 623  */
 624 vm_page_t
 625 vm_pageout_setup(
 626         register vm_page_t      m,
 627         register vm_object_t    new_object,
 628         vm_object_offset_t      new_offset)
 629 {
 630         register vm_object_t    old_object = m->object;
 631         vm_object_offset_t      paging_offset;
 632         vm_object_offset_t      offset;
 633         register vm_page_t      holding_page;
 634         register vm_page_t      new_m;
 635         boolean_t               need_to_wire = FALSE;
 636
 637
 638         XPR(XPR_VM_PAGEOUT,
 639      "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
 640                 (integer_t)m->object, (integer_t)m->offset,
 641                 (integer_t)m, (integer_t)new_object,
 642                 (integer_t)new_offset);
 643         assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
 644                 !m->restart);
 645
 646         assert(m->dirty || m->precious);
 647
 648         /*
 649          *      Create a place-holder page where the old one was, to prevent
 650          *      attempted pageins of this page while we're unlocked.
 651          */
 652         VM_PAGE_GRAB_FICTITIOUS(holding_page);
 653
 654         vm_object_lock(old_object);
 655
 656         offset = m->offset;
 657         paging_offset = offset + old_object->paging_offset;
 658
 659         if (old_object->pager_trusted) {
 660                 /*
 661                  * This pager is trusted, so we can clean this page
 662                  * in place. Leave it in the old object, and mark it
 663                  * cleaning & pageout.
 664                  */
 665                 new_m = holding_page;
 666                 holding_page = VM_PAGE_NULL;
 667
 668                 /*
 669                  * Set up new page to be private shadow of real page.
 670                  */
 671                 new_m->phys_page = m->phys_page;
 672                 new_m->fictitious = FALSE;
 673                 new_m->pageout = TRUE;
 674
 675                 /*
 676                  * Mark real page as cleaning (indicating that we hold a
 677                  * paging reference to be released via m_o_d_r_c) and
 678                  * pageout (indicating that the page should be freed
 679                  * when the pageout completes).
 680                  */
 681                 pmap_clear_modify(m->phys_page);
 682                 vm_page_lock_queues();
 683                 new_m->private = TRUE;
 684                 vm_page_wire(new_m);
 685                 m->cleaning = TRUE;
 686                 m->pageout = TRUE;
 687
 688                 vm_page_wire(m);
 689                 assert(m->wire_count == 1);
 690                 vm_page_unlock_queues();
 691
 692                 m->dirty = TRUE;
 693                 m->precious = FALSE;
 694                 m->page_lock = VM_PROT_NONE;
 695                 m->unusual = FALSE;
 696                 m->unlock_request = VM_PROT_NONE;
 697         } else {
 698                 /*
 699                  * Cannot clean in place, so rip the old page out of the
 700                  * object, and stick the holding page in. Set new_m to the
 701                  * page in the new object.
 702                  */
 703                 vm_page_lock_queues();
 704                 VM_PAGE_QUEUES_REMOVE(m);
 705                 vm_page_remove(m);
 706
 707                 vm_page_insert(holding_page, old_object, offset);
 708                 vm_page_unlock_queues();
 709
 710                 m->dirty = TRUE;
 711                 m->precious = FALSE;
 712                 new_m = m;
 713                 new_m->page_lock = VM_PROT_NONE;
 714                 new_m->unlock_request = VM_PROT_NONE;
 715
 716                 if (old_object->internal)
 717                         need_to_wire = TRUE;
 718         }
 719         /*
 720          *      Record that this page has been written out
 721          */
 722 #if     MACH_PAGEMAP
 723         vm_external_state_set(old_object->existence_map, offset);
 724 #endif  /* MACH_PAGEMAP */
 725
 726         vm_object_unlock(old_object);
 727
 728         vm_object_lock(new_object);
 729
 730         /*
 731          *      Put the page into the new object. If it is a not wired
 732          *      (if it's the real page) it will be activated.
 733          */
 734
 735         vm_page_lock_queues();
 736         vm_page_insert(new_m, new_object, new_offset);
 737         if (need_to_wire)
 738                 vm_page_wire(new_m);
 739         else
 740                 vm_page_activate(new_m);
 741         PAGE_WAKEUP_DONE(new_m);
 742         vm_page_unlock_queues();
 743
 744         vm_object_unlock(new_object);
 745
 746         /*
 747          *      Return the placeholder page to simplify cleanup.
 748          */
 749         return (holding_page);
 750 }
 751
 752 /*
 753  * Routine:     vm_pageclean_setup
 754  *
 755  * Purpose:     setup a page to be cleaned (made non-dirty), but not
 756  *              necessarily flushed from the VM page cache.
 757  *              This is accomplished by cleaning in place.
 758  *
 759  *              The page must not be busy, and the object and page
 760  *              queues must be locked.
 761  *
 762  */
 763 void
 764 vm_pageclean_setup(
 765         vm_page_t               m,
 766         vm_page_t               new_m,
 767         vm_object_t             new_object,
 768         vm_object_offset_t      new_offset)
 769 {
 770         vm_object_t old_object = m->object;
 771         assert(!m->busy);
 772         assert(!m->cleaning);
 773
 774         XPR(XPR_VM_PAGEOUT,
 775     "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
 776                 (integer_t)old_object, m->offset, (integer_t)m,
 777                 (integer_t)new_m, new_offset);
 778
 779         pmap_clear_modify(m->phys_page);
 780         vm_object_paging_begin(old_object);
 781
 782         /*
 783          *      Record that this page has been written out
 784          */
 785 #if     MACH_PAGEMAP
 786         vm_external_state_set(old_object->existence_map, m->offset);
 787 #endif  /*MACH_PAGEMAP*/
 788
 789         /*
 790          * Mark original page as cleaning in place.
 791          */
 792         m->cleaning = TRUE;
 793         m->dirty = TRUE;
 794         m->precious = FALSE;
 795
 796         /*
 797          * Convert the fictitious page to a private shadow of
 798          * the real page.
 799          */
 800         assert(new_m->fictitious);
 801         new_m->fictitious = FALSE;
 802         new_m->private = TRUE;
 803         new_m->pageout = TRUE;
 804         new_m->phys_page = m->phys_page;
 805         vm_page_wire(new_m);
 806
 807         vm_page_insert(new_m, new_object, new_offset);
 808         assert(!new_m->wanted);
 809         new_m->busy = FALSE;
 810 }
 811
 812 void
 813 vm_pageclean_copy(
 814         vm_page_t               m,
 815         vm_page_t               new_m,
 816         vm_object_t             new_object,
 817         vm_object_offset_t      new_offset)
 818 {
 819         XPR(XPR_VM_PAGEOUT,
 820         "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
 821                 m, new_m, new_object, new_offset, 0);
 822
 823         assert((!m->busy) && (!m->cleaning));
 824
 825         assert(!new_m->private && !new_m->fictitious);
 826
 827         pmap_clear_modify(m->phys_page);
 828
 829         m->busy = TRUE;
 830         vm_object_paging_begin(m->object);
 831         vm_page_unlock_queues();
 832         vm_object_unlock(m->object);
 833
 834         /*
 835          * Copy the original page to the new page.
 836          */
 837         vm_page_copy(m, new_m);
 838
 839         /*
 840          * Mark the old page as clean. A request to pmap_is_modified
 841          * will get the right answer.
 842          */
 843         vm_object_lock(m->object);
 844         m->dirty = FALSE;
 845
 846         vm_object_paging_end(m->object);
 847
 848         vm_page_lock_queues();
 849         if (!m->active && !m->inactive)
 850                 vm_page_activate(m);
 851         PAGE_WAKEUP_DONE(m);
 852
 853         vm_page_insert(new_m, new_object, new_offset);
 854         vm_page_activate(new_m);
 855         new_m->busy = FALSE;    /* No other thread can be waiting */
 856 }
 857
 858
 859 /*
 860  *      Routine:        vm_pageout_initialize_page
 861  *      Purpose:
 862  *              Causes the specified page to be initialized in
 863  *              the appropriate memory object. This routine is used to push
 864  *              pages into a copy-object when they are modified in the
 865  *              permanent object.
 866  *
 867  *              The page is moved to a temporary object and paged out.
 868  *
 869  *      In/out conditions:
 870  *              The page in question must not be on any pageout queues.
 871  *              The object to which it belongs must be locked.
 872  *              The page must be busy, but not hold a paging reference.
 873  *
 874  *      Implementation:
 875  *              Move this page to a completely new object.
 876  */
 877 void
 878 vm_pageout_initialize_page(
 879         vm_page_t       m)
 880 {
 881         vm_object_t             object;
 882         vm_object_offset_t      paging_offset;
 883         vm_page_t               holding_page;
 884
 885
 886         XPR(XPR_VM_PAGEOUT,
 887                 "vm_pageout_initialize_page, page 0x%X\n",
 888                 (integer_t)m, 0, 0, 0, 0);
 889         assert(m->busy);
 890
 891         /*
 892          *      Verify that we really want to clean this page
 893          */
 894         assert(!m->absent);
 895         assert(!m->error);
 896         assert(m->dirty);
 897
 898         /*
 899          *      Create a paging reference to let us play with the object.
 900          */
 901         object = m->object;
 902         paging_offset = m->offset + object->paging_offset;
 903         vm_object_paging_begin(object);
 904         if (m->absent || m->error || m->restart ||
 905             (!m->dirty && !m->precious)) {
 906                 VM_PAGE_FREE(m);
 907                 panic("reservation without pageout?"); /* alan */
 908              vm_object_unlock(object);
 909                 return;
 910         }
 911
 912         /* set the page for future call to vm_fault_list_request */
 913         holding_page = NULL;
 914         vm_page_lock_queues();
 915         pmap_clear_modify(m->phys_page);
 916         m->dirty = TRUE;
 917         m->busy = TRUE;
 918         m->list_req_pending = TRUE;
 919         m->cleaning = TRUE;
 920         m->pageout = TRUE;
 921         vm_page_wire(m);
 922         vm_page_unlock_queues();
 923         vm_object_unlock(object);
 924
 925         /*
 926          *      Write the data to its pager.
 927          *      Note that the data is passed by naming the new object,
 928          *      not a virtual address; the pager interface has been
 929          *      manipulated to use the "internal memory" data type.
 930          *      [The object reference from its allocation is donated
 931          *      to the eventual recipient.]
 932          */
 933         memory_object_data_initialize(object->pager,
 934                                         paging_offset,
 935                                         PAGE_SIZE);
 936
 937         vm_object_lock(object);
 938 }
 939
 940 #if     MACH_CLUSTER_STATS
 941 #define MAXCLUSTERPAGES 16
 942 struct {
 943         unsigned long pages_in_cluster;
 944         unsigned long pages_at_higher_offsets;
 945         unsigned long pages_at_lower_offsets;
 946 } cluster_stats[MAXCLUSTERPAGES];
 947 #endif  /* MACH_CLUSTER_STATS */
 948
 949 boolean_t allow_clustered_pageouts = FALSE;
 950
 951 /*
 952  * vm_pageout_cluster:
 953  *
 954  * Given a page, queue it to the appropriate I/O thread,
 955  * which will page it out and attempt to clean adjacent pages
 956  * in the same operation.
 957  *
 958  * The page must be busy, and the object and queues locked. We will take a
 959  * paging reference to prevent deallocation or collapse when we
 960  * release the object lock back at the call site.  The I/O thread
 961  * is responsible for consuming this reference
 962  *
 963  * The page must not be on any pageout queue.
 964  */
 965
 966 void
 967 vm_pageout_cluster(vm_page_t m)
 968 {
 969         vm_object_t     object = m->object;
 970         struct          vm_pageout_queue *q;
 971
 972
 973         XPR(XPR_VM_PAGEOUT,
 974                 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
 975                 (integer_t)object, m->offset, (integer_t)m, 0, 0);
 976
 977         /*
 978          * Only a certain kind of page is appreciated here.
 979          */
 980         assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
 981         assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
 982
 983         /*
 984          * protect the object from collapse -
 985          * locking in the object's paging_offset.
 986          */
 987         vm_object_paging_begin(object);
 988
 989         /*
 990          * set the page for future call to vm_fault_list_request
 991          * page should already be marked busy
 992          */
 993         vm_page_wire(m);
 994         m->list_req_pending = TRUE;
 995         m->cleaning = TRUE;
 996         m->pageout = TRUE;
 997         m->laundry = TRUE;
 998
 999         if (object->internal == TRUE)
1000                 q = &vm_pageout_queue_internal;
1001         else
1002                 q = &vm_pageout_queue_external;
1003         q->pgo_laundry++;
1004
1005         m->pageout_queue = TRUE;
1006         queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
1007
1008         if (q->pgo_idle == TRUE) {
1009                 q->pgo_idle = FALSE;
1010                 thread_wakeup((event_t) &q->pgo_pending);
1011         }
1012 }
1013
1014
1015 unsigned long vm_pageout_throttle_up_count = 0;
1016
1017 /*
1018  * A page is back from laundry.  See if there are some pages waiting to
1019  * go to laundry and if we can let some of them go now.
1020  *
1021  * Object and page queues must be locked.
1022  */
1023 void
1024 vm_pageout_throttle_up(
1025         vm_page_t       m)
1026 {
1027         struct vm_pageout_queue *q;
1028
1029         vm_pageout_throttle_up_count++;
1030
1031         assert(m->laundry);
1032         assert(m->object != VM_OBJECT_NULL);
1033         assert(m->object != kernel_object);
1034
1035         if (m->object->internal == TRUE)
1036                 q = &vm_pageout_queue_internal;
1037         else
1038                 q = &vm_pageout_queue_external;
1039
1040         m->laundry = FALSE;
1041         q->pgo_laundry--;
1042
1043         if (q->pgo_throttled == TRUE) {
1044                 q->pgo_throttled = FALSE;
1045                 thread_wakeup((event_t) &q->pgo_laundry);
1046         }
1047 }
1048
1049
1050 /*
1051  *      vm_pageout_scan does the dirty work for the pageout daemon.
1052  *      It returns with vm_page_queue_free_lock held and
1053  *      vm_page_free_wanted == 0.
1054  */
1055
1056 #define DELAYED_UNLOCK_LIMIT  (3 * MAX_UPL_TRANSFER)
1057
1058 #define FCS_IDLE                0
1059 #define FCS_DELAYED             1
1060 #define FCS_DEADLOCK_DETECTED   2
1061
1062 struct flow_control {
1063         int             state;
1064         mach_timespec_t ts;
1065 };
1066
1067 extern kern_return_t    sysclk_gettime(mach_timespec_t *);
1068
1069
1070 void
1071 vm_pageout_scan(void)
1072 {
1073         unsigned int loop_count = 0;
1074         unsigned int inactive_burst_count = 0;
1075         unsigned int active_burst_count = 0;
1076         vm_page_t   local_freeq = 0;
1077         int         local_freed = 0;
1078         int         delayed_unlock = 0;
1079         int         need_internal_inactive = 0;
1080         int         refmod_state = 0;
1081         int     vm_pageout_deadlock_target = 0;
1082         struct  vm_pageout_queue *iq;
1083         struct  vm_pageout_queue *eq;
1084         struct  flow_control    flow_control;
1085         boolean_t active_throttled = FALSE;
1086         boolean_t inactive_throttled = FALSE;
1087         mach_timespec_t         ts;
1088         unsigned int msecs = 0;
1089         vm_object_t     object;
1090
1091
1092         flow_control.state = FCS_IDLE;
1093         iq = &vm_pageout_queue_internal;
1094         eq = &vm_pageout_queue_external;
1095
1096         XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1097
1098 /*???*/ /*
1099          *      We want to gradually dribble pages from the active queue
1100          *      to the inactive queue.  If we let the inactive queue get
1101          *      very small, and then suddenly dump many pages into it,
1102          *      those pages won't get a sufficient chance to be referenced
1103          *      before we start taking them from the inactive queue.
1104          *
1105          *      We must limit the rate at which we send pages to the pagers.
1106          *      data_write messages consume memory, for message buffers and
1107          *      for map-copy objects.  If we get too far ahead of the pagers,
1108          *      we can potentially run out of memory.
1109          *
1110          *      We can use the laundry count to limit directly the number
1111          *      of pages outstanding to the default pager.  A similar
1112          *      strategy for external pagers doesn't work, because
1113          *      external pagers don't have to deallocate the pages sent them,
1114          *      and because we might have to send pages to external pagers
1115          *      even if they aren't processing writes.  So we also
1116          *      use a burst count to limit writes to external pagers.
1117          *
1118          *      When memory is very tight, we can't rely on external pagers to
1119          *      clean pages.  They probably aren't running, because they
1120          *      aren't vm-privileged.  If we kept sending dirty pages to them,
1121          *      we could exhaust the free list.
1122          */
1123         vm_page_lock_queues();
1124         delayed_unlock = 1;
1125
1126
1127 Restart:
1128         /*
1129          *      Recalculate vm_page_inactivate_target.
1130          */
1131         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1132                                                           vm_page_inactive_count);
1133         object = NULL;
1134
1135         for (;;) {
1136                 vm_page_t m;
1137
1138                 if (delayed_unlock == 0)
1139                         vm_page_lock_queues();
1140
1141                 active_burst_count = vm_page_active_count;
1142
1143                 if (active_burst_count > vm_pageout_burst_active_throttle)
1144                         active_burst_count = vm_pageout_burst_active_throttle;
1145
1146                 /*
1147                  *      Move pages from active to inactive.
1148                  */
1149                 while ((need_internal_inactive ||
1150                            vm_page_inactive_count < vm_page_inactive_target) &&
1151                        !queue_empty(&vm_page_queue_active) &&
1152                        ((active_burst_count--) > 0)) {
1153
1154                         vm_pageout_active++;
1155
1156                         m = (vm_page_t) queue_first(&vm_page_queue_active);
1157
1158                         assert(m->active && !m->inactive);
1159                         assert(!m->laundry);
1160                         assert(m->object != kernel_object);
1161
1162                         /*
1163                          * Try to lock object; since we've already got the
1164                          * page queues lock, we can only 'try' for this one.
1165                          * if the 'try' fails, we need to do a mutex_pause
1166                          * to allow the owner of the object lock a chance to
1167                          * run... otherwise, we're likely to trip over this
1168                          * object in the same state as we work our way through
1169                          * the queue... clumps of pages associated with the same
1170                          * object are fairly typical on the inactive and active queues
1171                          */
1172                         if (m->object != object) {
1173                                 if (object != NULL) {
1174                                         vm_object_unlock(object);
1175                                         object = NULL;
1176                                 }
1177                                 if (!vm_object_lock_try(m->object)) {
1178                                         /*
1179                                          * move page to end of active queue and continue
1180                                          */
1181                                         queue_remove(&vm_page_queue_active, m,
1182                                                      vm_page_t, pageq);
1183                                         queue_enter(&vm_page_queue_active, m,
1184                                                     vm_page_t, pageq);
1185
1186                                         goto done_with_activepage;
1187                                 }
1188                                 object = m->object;
1189                         }
1190                         /*
1191                          * if the page is BUSY, then we pull it
1192                          * off the active queue and leave it alone.
1193                          * when BUSY is cleared, it will get stuck
1194                          * back on the appropriate queue
1195                          */
1196                         if (m->busy) {
1197                                 queue_remove(&vm_page_queue_active, m,
1198                                              vm_page_t, pageq);
1199                                 m->pageq.next = NULL;
1200                                 m->pageq.prev = NULL;
1201
1202                                 if (!m->fictitious)
1203                                         vm_page_active_count--;
1204                                 m->active = FALSE;
1205
1206                                 goto done_with_activepage;
1207                         }
1208                         if (need_internal_inactive) {
1209                                 /*
1210                                  * If we're unable to make forward progress
1211                                  * with the current set of pages on the
1212                                  * inactive queue due to busy objects or
1213                                  * throttled pageout queues, then
1214                                  * move a page that is already clean
1215                                  * or belongs to a pageout queue that
1216                                  * isn't currently throttled
1217                                  */
1218                                 active_throttled = FALSE;
1219
1220                                 if (object->internal) {
1221                                         if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
1222                                                 active_throttled = TRUE;
1223                                 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1224                                                 active_throttled = TRUE;
1225                                 }
1226                                 if (active_throttled == TRUE) {
1227                                         if (!m->dirty) {
1228                                                 refmod_state = pmap_get_refmod(m->phys_page);
1229
1230                                                 if (refmod_state & VM_MEM_REFERENCED)
1231                                                         m->reference = TRUE;
1232                                                 if (refmod_state & VM_MEM_MODIFIED)
1233                                                         m->dirty = TRUE;
1234                                         }
1235                                         if (m->dirty || m->precious) {
1236                                                 /*
1237                                                  * page is dirty and targets a THROTTLED queue
1238                                                  * so all we can do is move it back to the
1239                                                  * end of the active queue to get it out
1240                                                  * of the way
1241                                                  */
1242                                                 queue_remove(&vm_page_queue_active, m,
1243                                                              vm_page_t, pageq);
1244                                                 queue_enter(&vm_page_queue_active, m,
1245                                                             vm_page_t, pageq);
1246
1247                                                 vm_pageout_scan_active_throttled++;
1248
1249                                                 goto done_with_activepage;
1250                                         }
1251                                 }
1252                                 vm_pageout_scan_active_throttle_success++;
1253                                 need_internal_inactive--;
1254                         }
1255                         /*
1256                          *      Deactivate the page while holding the object
1257                          *      locked, so we know the page is still not busy.
1258                          *      This should prevent races between pmap_enter
1259                          *      and pmap_clear_reference.  The page might be
1260                          *      absent or fictitious, but vm_page_deactivate
1261                          *      can handle that.
1262                          */
1263                         vm_page_deactivate(m);
1264 done_with_activepage:
1265                         if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1266
1267                                 if (object != NULL) {
1268                                         vm_object_unlock(object);
1269                                         object = NULL;
1270                                 }
1271                                 if (local_freeq) {
1272                                         vm_page_free_list(local_freeq);
1273
1274                                         local_freeq = 0;
1275                                         local_freed = 0;
1276                                 }
1277                                 delayed_unlock = 0;
1278                                 vm_page_unlock_queues();
1279
1280                                 mutex_pause();
1281                                 vm_page_lock_queues();
1282                                 /*
1283                                  * continue the while loop processing
1284                                  * the active queue... need to hold
1285                                  * the page queues lock
1286                                  */
1287                                 continue;
1288                         }
1289                 }
1290
1291
1292
1293                 /**********************************************************************
1294                  * above this point we're playing with the active queue
1295                  * below this point we're playing with the throttling mechanisms
1296                  * and the inactive queue
1297                  **********************************************************************/
1298
1299
1300
1301                 /*
1302                  *      We are done if we have met our target *and*
1303                  *      nobody is still waiting for a page.
1304                  */
1305                 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1306                         if (object != NULL) {
1307                                 vm_object_unlock(object);
1308                                 object = NULL;
1309                         }
1310                         if (local_freeq) {
1311                                 vm_page_free_list(local_freeq);
1312
1313                                 local_freeq = 0;
1314                                 local_freed = 0;
1315                         }
1316                         mutex_lock(&vm_page_queue_free_lock);
1317
1318                         if ((vm_page_free_count >= vm_page_free_target) &&
1319                                   (vm_page_free_wanted == 0)) {
1320
1321                                 vm_page_unlock_queues();
1322
1323                                 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1324                                 return;
1325                         }
1326                         mutex_unlock(&vm_page_queue_free_lock);
1327                 }
1328
1329
1330                 /*
1331                  * Sometimes we have to pause:
1332                  *      1) No inactive pages - nothing to do.
1333                  *      2) Flow control - default pageout queue is full
1334                  *      3) Loop control - no acceptable pages found on the inactive queue
1335                  *         within the last vm_pageout_burst_inactive_throttle iterations
1336                  */
1337                 if ((queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf))) {
1338                         vm_pageout_scan_empty_throttle++;
1339                         msecs = vm_pageout_empty_wait;
1340                         goto vm_pageout_scan_delay;
1341
1342                 } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
1343                         vm_pageout_scan_burst_throttle++;
1344                         msecs = vm_pageout_burst_wait;
1345                         goto vm_pageout_scan_delay;
1346
1347                 } else if (VM_PAGE_Q_THROTTLED(iq)) {
1348
1349                         switch (flow_control.state) {
1350
1351                         case FCS_IDLE:
1352 reset_deadlock_timer:
1353                                 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1354                                 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1355                                 sysclk_gettime(&flow_control.ts);
1356                                 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1357
1358                                 flow_control.state = FCS_DELAYED;
1359                                 msecs = vm_pageout_deadlock_wait;
1360
1361                                 break;
1362
1363                         case FCS_DELAYED:
1364                                 sysclk_gettime(&ts);
1365
1366                                 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1367                                         /*
1368                                          * the pageout thread for the default pager is potentially
1369                                          * deadlocked since the
1370                                          * default pager queue has been throttled for more than the
1371                                          * allowable time... we need to move some clean pages or dirty
1372                                          * pages belonging to the external pagers if they aren't throttled
1373                                          * vm_page_free_wanted represents the number of threads currently
1374                                          * blocked waiting for pages... we'll move one page for each of
1375                                          * these plus a fixed amount to break the logjam... once we're done
1376                                          * moving this number of pages, we'll re-enter the FSC_DELAYED state
1377                                          * with a new timeout target since we have no way of knowing
1378                                          * whether we've broken the deadlock except through observation
1379                                          * of the queue associated with the default pager... we need to
1380                                          * stop moving pagings and allow the system to run to see what
1381                                          * state it settles into.
1382                                          */
1383                                         vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted;
1384                                         vm_pageout_scan_deadlock_detected++;
1385                                         flow_control.state = FCS_DEADLOCK_DETECTED;
1386
1387                                         thread_wakeup((event_t) &vm_pageout_garbage_collect);
1388                                         goto consider_inactive;
1389                                 }
1390                                 /*
1391                                  * just resniff instead of trying
1392                                  * to compute a new delay time... we're going to be
1393                                  * awakened immediately upon a laundry completion,
1394                                  * so we won't wait any longer than necessary
1395                                  */
1396                                 msecs = vm_pageout_idle_wait;
1397                                 break;
1398
1399                         case FCS_DEADLOCK_DETECTED:
1400                                 if (vm_pageout_deadlock_target)
1401                                         goto consider_inactive;
1402                                 goto reset_deadlock_timer;
1403
1404                         }
1405                         vm_pageout_scan_throttle++;
1406                         iq->pgo_throttled = TRUE;
1407 vm_pageout_scan_delay:
1408                         if (object != NULL) {
1409                                 vm_object_unlock(object);
1410                                 object = NULL;
1411                         }
1412                         if (local_freeq) {
1413                                 vm_page_free_list(local_freeq);
1414
1415                                 local_freeq = 0;
1416                                 local_freed = 0;
1417                         }
1418                         assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1419
1420                         counter(c_vm_pageout_scan_block++);
1421
1422                         vm_page_unlock_queues();
1423
1424                         thread_block(THREAD_CONTINUE_NULL);
1425
1426                         vm_page_lock_queues();
1427                         delayed_unlock = 1;
1428
1429                         iq->pgo_throttled = FALSE;
1430
1431                         if (loop_count >= vm_page_inactive_count) {
1432                                 if (VM_PAGE_Q_THROTTLED(eq) || VM_PAGE_Q_THROTTLED(iq)) {
1433                                         /*
1434                                          * Make sure we move enough "appropriate"
1435                                          * pages to the inactive queue before trying
1436                                          * again.
1437                                          */
1438                                         need_internal_inactive = vm_pageout_inactive_relief;
1439                                 }
1440                                 loop_count = 0;
1441                         }
1442                         inactive_burst_count = 0;
1443
1444                         goto Restart;
1445                         /*NOTREACHED*/
1446                 }
1447
1448
1449                 flow_control.state = FCS_IDLE;
1450 consider_inactive:
1451                 loop_count++;
1452                 inactive_burst_count++;
1453                 vm_pageout_inactive++;
1454
1455                 if (!queue_empty(&vm_page_queue_inactive)) {
1456                         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1457
1458                         if (m->clustered && (m->no_isync == TRUE)) {
1459                                 goto use_this_page;
1460                         }
1461                 }
1462                 if (vm_zf_count < vm_accellerate_zf_pageout_trigger) {
1463                         vm_zf_iterator = 0;
1464                 } else {
1465                         last_page_zf = 0;
1466                         if((vm_zf_iterator+=1) >= vm_zf_iterator_count) {
1467                                         vm_zf_iterator = 0;
1468                         }
1469                 }
1470                 if (queue_empty(&vm_page_queue_zf) ||
1471                                 (((last_page_zf) || (vm_zf_iterator == 0)) &&
1472                                 !queue_empty(&vm_page_queue_inactive))) {
1473                         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1474                         last_page_zf = 0;
1475                 } else {
1476                         m = (vm_page_t) queue_first(&vm_page_queue_zf);
1477                         last_page_zf = 1;
1478                 }
1479 use_this_page:
1480                 assert(!m->active && m->inactive);
1481                 assert(!m->laundry);
1482                 assert(m->object != kernel_object);
1483
1484                 /*
1485                  * Try to lock object; since we've alread got the
1486                  * page queues lock, we can only 'try' for this one.
1487                  * if the 'try' fails, we need to do a mutex_pause
1488                  * to allow the owner of the object lock a chance to
1489                  * run... otherwise, we're likely to trip over this
1490                  * object in the same state as we work our way through
1491                  * the queue... clumps of pages associated with the same
1492                  * object are fairly typical on the inactive and active queues
1493                  */
1494                 if (m->object != object) {
1495                         if (object != NULL) {
1496                                 vm_object_unlock(object);
1497                                 object = NULL;
1498                         }
1499                         if (!vm_object_lock_try(m->object)) {
1500                                 /*
1501                                  *      Move page to end and continue.
1502                                  *      Don't re-issue ticket
1503                                  */
1504                                 if (m->zero_fill) {
1505                                         queue_remove(&vm_page_queue_zf, m,
1506                                                      vm_page_t, pageq);
1507                                         queue_enter(&vm_page_queue_zf, m,
1508                                                     vm_page_t, pageq);
1509                                 } else {
1510                                         queue_remove(&vm_page_queue_inactive, m,
1511                                                      vm_page_t, pageq);
1512                                         queue_enter(&vm_page_queue_inactive, m,
1513                                                     vm_page_t, pageq);
1514                                 }
1515                                 vm_pageout_inactive_nolock++;
1516
1517                                 /*
1518                                  * force us to dump any collected free pages
1519                                  * and to pause before moving on
1520                                  */
1521                                 delayed_unlock = DELAYED_UNLOCK_LIMIT + 1;
1522
1523                                 goto done_with_inactivepage;
1524                         }
1525                         object = m->object;
1526                 }
1527                 /*
1528                  * If the page belongs to a purgable object with no pending copies
1529                  * against it, then we reap all of the pages in the object
1530                  * and note that the object has been "emptied".  It'll be up to the
1531                  * application the discover this and recreate its contents if desired.
1532                  */
1533                 if ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
1534                      object->purgable == VM_OBJECT_PURGABLE_EMPTY) &&
1535                     object->copy == VM_OBJECT_NULL) {
1536
1537                         (void) vm_object_purge(object);
1538                         vm_pageout_purged_objects++;
1539                         /*
1540                          * we've just taken all of the pages from this object,
1541                          * so drop the lock now since we're not going to find
1542                          * any more pages belonging to it anytime soon
1543                          */
1544                         vm_object_unlock(object);
1545                         object = NULL;
1546
1547                         inactive_burst_count = 0;
1548
1549                         goto done_with_inactivepage;
1550                 }
1551
1552                 /*
1553                  *      Paging out pages of external objects which
1554                  *      are currently being created must be avoided.
1555                  *      The pager may claim for memory, thus leading to a
1556                  *      possible dead lock between it and the pageout thread,
1557                  *      if such pages are finally chosen. The remaining assumption
1558                  *      is that there will finally be enough available pages in the
1559                  *      inactive pool to page out in order to satisfy all memory
1560                  *      claimed by the thread which concurrently creates the pager.
1561                  */
1562                 if (!object->pager_initialized && object->pager_created) {
1563                         /*
1564                          *      Move page to end and continue, hoping that
1565                          *      there will be enough other inactive pages to
1566                          *      page out so that the thread which currently
1567                          *      initializes the pager will succeed.
1568                          *      Don't re-grant the ticket, the page should
1569                          *      pulled from the queue and paged out whenever
1570                          *      one of its logically adjacent fellows is
1571                          *      targeted.
1572                          */
1573                         if (m->zero_fill) {
1574                                 queue_remove(&vm_page_queue_zf, m,
1575                                              vm_page_t, pageq);
1576                                 queue_enter(&vm_page_queue_zf, m,
1577                                             vm_page_t, pageq);
1578                                 last_page_zf = 1;
1579                                 vm_zf_iterator = vm_zf_iterator_count - 1;
1580                         } else {
1581                                 queue_remove(&vm_page_queue_inactive, m,
1582                                              vm_page_t, pageq);
1583                                 queue_enter(&vm_page_queue_inactive, m,
1584                                             vm_page_t, pageq);
1585                                 last_page_zf = 0;
1586                                 vm_zf_iterator = 1;
1587                         }
1588                         vm_pageout_inactive_avoid++;
1589
1590                         goto done_with_inactivepage;
1591                 }
1592                 /*
1593                  *      Remove the page from the inactive list.
1594                  */
1595                 if (m->zero_fill) {
1596                         queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1597                 } else {
1598                         queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1599                 }
1600                 m->pageq.next = NULL;
1601                 m->pageq.prev = NULL;
1602                 m->inactive = FALSE;
1603                 if (!m->fictitious)
1604                         vm_page_inactive_count--;
1605
1606                 if (m->busy || !object->alive) {
1607                         /*
1608                          *      Somebody is already playing with this page.
1609                          *      Leave it off the pageout queues.
1610                          */
1611                         vm_pageout_inactive_busy++;
1612
1613                         goto done_with_inactivepage;
1614                 }
1615
1616                 /*
1617                  *      If it's absent or in error, we can reclaim the page.
1618                  */
1619
1620                 if (m->absent || m->error) {
1621                         vm_pageout_inactive_absent++;
1622 reclaim_page:
1623                         if (vm_pageout_deadlock_target) {
1624                                 vm_pageout_scan_inactive_throttle_success++;
1625                                 vm_pageout_deadlock_target--;
1626                         }
1627                         if (m->tabled)
1628                                 vm_page_remove(m);    /* clears tabled, object, offset */
1629                         if (m->absent)
1630                                 vm_object_absent_release(object);
1631
1632                         assert(m->pageq.next == NULL &&
1633                                m->pageq.prev == NULL);
1634                         m->pageq.next = (queue_entry_t)local_freeq;
1635                         local_freeq = m;
1636                         local_freed++;
1637
1638                         inactive_burst_count = 0;
1639
1640                         goto done_with_inactivepage;
1641                 }
1642
1643                 assert(!m->private);
1644                 assert(!m->fictitious);
1645
1646                 /*
1647                  *      If already cleaning this page in place, convert from
1648                  *      "adjacent" to "target". We can leave the page mapped,
1649                  *      and vm_pageout_object_terminate will determine whether
1650                  *      to free or reactivate.
1651                  */
1652
1653                 if (m->cleaning) {
1654                         m->busy = TRUE;
1655                         m->pageout = TRUE;
1656                         m->dump_cleaning = TRUE;
1657                         vm_page_wire(m);
1658
1659                         CLUSTER_STAT(vm_pageout_cluster_conversions++);
1660
1661                         inactive_burst_count = 0;
1662
1663                         goto done_with_inactivepage;
1664                 }
1665
1666                 /*
1667                  *      If it's being used, reactivate.
1668                  *      (Fictitious pages are either busy or absent.)
1669                  */
1670                 if ( (!m->reference) ) {
1671                         refmod_state = pmap_get_refmod(m->phys_page);
1672
1673                         if (refmod_state & VM_MEM_REFERENCED)
1674                                 m->reference = TRUE;
1675                         if (refmod_state & VM_MEM_MODIFIED)
1676                                 m->dirty = TRUE;
1677                 }
1678                 if (m->reference) {
1679 was_referenced:
1680                         vm_page_activate(m);
1681                         VM_STAT(reactivations++);
1682
1683                         vm_pageout_inactive_used++;
1684                         last_page_zf = 0;
1685                         inactive_burst_count = 0;
1686
1687                         goto done_with_inactivepage;
1688                 }
1689
1690                 XPR(XPR_VM_PAGEOUT,
1691                 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1692                 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1693
1694                 /*
1695                  * we've got a candidate page to steal...
1696                  *
1697                  * m->dirty is up to date courtesy of the
1698                  * preceding check for m->reference... if
1699                  * we get here, then m->reference had to be
1700                  * FALSE which means we did a pmap_get_refmod
1701                  * and updated both m->reference and m->dirty
1702                  *
1703                  * if it's dirty or precious we need to
1704                  * see if the target queue is throtttled
1705                  * it if is, we need to skip over it by moving it back
1706                  * to the end of the inactive queue
1707                  */
1708                 inactive_throttled = FALSE;
1709
1710                 if (m->dirty || m->precious) {
1711                         if (object->internal) {
1712                                 if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
1713                                         inactive_throttled = TRUE;
1714                         } else if (VM_PAGE_Q_THROTTLED(eq)) {
1715                                         inactive_throttled = TRUE;
1716                         }
1717                 }
1718                 if (inactive_throttled == TRUE) {
1719                         if (m->zero_fill) {
1720                                 queue_enter(&vm_page_queue_zf, m,
1721                                             vm_page_t, pageq);
1722                         } else {
1723                                 queue_enter(&vm_page_queue_inactive, m,
1724                                             vm_page_t, pageq);
1725                         }
1726                         if (!m->fictitious)
1727                                 vm_page_inactive_count++;
1728                         m->inactive = TRUE;
1729
1730                         vm_pageout_scan_inactive_throttled++;
1731
1732                         goto done_with_inactivepage;
1733                 }
1734                 /*
1735                  * we've got a page that we can steal...
1736                  * eliminate all mappings and make sure
1737                  * we have the up-to-date modified state
1738                  * first take the page BUSY, so that no new
1739                  * mappings can be made
1740                  */
1741                 m->busy = TRUE;
1742
1743                 /*
1744                  * if we need to do a pmap_disconnect then we
1745                  * need to re-evaluate m->dirty since the pmap_disconnect
1746                  * provides the true state atomically... the
1747                  * page was still mapped up to the pmap_disconnect
1748                  * and may have been dirtied at the last microsecond
1749                  *
1750                  * we also check for the page being referenced 'late'
1751                  * if it was, we first need to do a WAKEUP_DONE on it
1752                  * since we already set m->busy = TRUE, before
1753                  * going off to reactivate it
1754                  *
1755                  * if we don't need the pmap_disconnect, then
1756                  * m->dirty is up to date courtesy of the
1757                  * earlier check for m->reference... if
1758                  * we get here, then m->reference had to be
1759                  * FALSE which means we did a pmap_get_refmod
1760                  * and updated both m->reference and m->dirty...
1761                  */
1762                 if (m->no_isync == FALSE) {
1763                         refmod_state = pmap_disconnect(m->phys_page);
1764
1765                         if (refmod_state & VM_MEM_MODIFIED)
1766                                 m->dirty = TRUE;
1767                         if (refmod_state & VM_MEM_REFERENCED) {
1768                                 m->reference = TRUE;
1769
1770                                 PAGE_WAKEUP_DONE(m);
1771                                 goto was_referenced;
1772                         }
1773                 }
1774                 /*
1775                  *      If it's clean and not precious, we can free the page.
1776                  */
1777                 if (!m->dirty && !m->precious) {
1778                         vm_pageout_inactive_clean++;
1779                         goto reclaim_page;
1780                 }
1781                 vm_pageout_cluster(m);
1782
1783                 vm_pageout_inactive_dirty++;
1784
1785                 inactive_burst_count = 0;
1786
1787 done_with_inactivepage:
1788                 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1789
1790                         if (object != NULL) {
1791                                 vm_object_unlock(object);
1792                                 object = NULL;
1793                         }
1794                         if (local_freeq) {
1795                                 vm_page_free_list(local_freeq);
1796
1797                                 local_freeq = 0;
1798                                 local_freed = 0;
1799                         }
1800                         delayed_unlock = 0;
1801                         vm_page_unlock_queues();
1802                         mutex_pause();
1803                 }
1804                 /*
1805                  * back to top of pageout scan loop
1806                  */
1807         }
1808 }
1809
1810
1811 int vm_page_free_count_init;
1812
1813 void
1814 vm_page_free_reserve(
1815         int pages)
1816 {
1817         int             free_after_reserve;
1818
1819         vm_page_free_reserved += pages;
1820
1821         free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1822
1823         vm_page_free_min = vm_page_free_reserved +
1824                 VM_PAGE_FREE_MIN(free_after_reserve);
1825
1826         vm_page_free_target = vm_page_free_reserved +
1827                 VM_PAGE_FREE_TARGET(free_after_reserve);
1828
1829         if (vm_page_free_target < vm_page_free_min + 5)
1830                 vm_page_free_target = vm_page_free_min + 5;
1831 }
1832
1833 /*
1834  *      vm_pageout is the high level pageout daemon.
1835  */
1836
1837 void
1838 vm_pageout_continue(void)
1839 {
1840         vm_pageout_scan_event_counter++;
1841         vm_pageout_scan();
1842         /* we hold vm_page_queue_free_lock now */
1843         assert(vm_page_free_wanted == 0);
1844         assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1845         mutex_unlock(&vm_page_queue_free_lock);
1846
1847         counter(c_vm_pageout_block++);
1848         thread_block((thread_continue_t)vm_pageout_continue);
1849         /*NOTREACHED*/
1850 }
1851
1852
1853 /*
1854  * must be called with the
1855  * queues and object locks held
1856  */
1857 static void
1858 vm_pageout_queue_steal(vm_page_t m)
1859 {
1860         struct vm_pageout_queue *q;
1861
1862         if (m->object->internal == TRUE)
1863                 q = &vm_pageout_queue_internal;
1864         else
1865                 q = &vm_pageout_queue_external;
1866
1867         m->laundry = FALSE;
1868         m->pageout_queue = FALSE;
1869         queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
1870
1871         m->pageq.next = NULL;
1872         m->pageq.prev = NULL;
1873
1874         vm_object_paging_end(m->object);
1875
1876         q->pgo_laundry--;
1877 }
1878
1879
1880 #ifdef FAKE_DEADLOCK
1881
1882 #define FAKE_COUNT      5000
1883
1884 int internal_count = 0;
1885 int fake_deadlock = 0;
1886
1887 #endif
1888
1889 static void
1890 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
1891 {
1892         vm_page_t       m = NULL;
1893         vm_object_t     object;
1894         boolean_t       need_wakeup;
1895
1896         vm_page_lock_queues();
1897
1898         while ( !queue_empty(&q->pgo_pending) ) {
1899
1900                    q->pgo_busy = TRUE;
1901                    queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
1902                    m->pageout_queue = FALSE;
1903                    vm_page_unlock_queues();
1904
1905                    m->pageq.next = NULL;
1906                    m->pageq.prev = NULL;
1907 #ifdef FAKE_DEADLOCK
1908                    if (q == &vm_pageout_queue_internal) {
1909                            vm_offset_t addr;
1910                            int  pg_count;
1911
1912                            internal_count++;
1913
1914                            if ((internal_count == FAKE_COUNT)) {
1915
1916                                    pg_count = vm_page_free_count + vm_page_free_reserved;
1917
1918                                    if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
1919                                            kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
1920                                    }
1921                                    internal_count = 0;
1922                                    fake_deadlock++;
1923                            }
1924                    }
1925 #endif
1926                    object = m->object;
1927
1928                    if (!object->pager_initialized) {
1929                            vm_object_lock(object);
1930
1931                            /*
1932                             *   If there is no memory object for the page, create
1933                             *   one and hand it to the default pager.
1934                             */
1935
1936                            if (!object->pager_initialized)
1937                                    vm_object_collapse(object, (vm_object_offset_t)0);
1938                            if (!object->pager_initialized)
1939                                    vm_object_pager_create(object);
1940                            if (!object->pager_initialized) {
1941                                    /*
1942                                     *   Still no pager for the object.
1943                                     *   Reactivate the page.
1944                                     *
1945                                     *   Should only happen if there is no
1946                                     *   default pager.
1947                                     */
1948                                    m->list_req_pending = FALSE;
1949                                    m->cleaning = FALSE;
1950                                    m->pageout = FALSE;
1951                                    vm_page_unwire(m);
1952
1953                                    vm_pageout_throttle_up(m);
1954
1955                                    vm_page_lock_queues();
1956                                    vm_pageout_dirty_no_pager++;
1957                                    vm_page_activate(m);
1958                                    vm_page_unlock_queues();
1959
1960                                    /*
1961                                     *   And we are done with it.
1962                                     */
1963                                    PAGE_WAKEUP_DONE(m);
1964
1965                                    vm_object_paging_end(object);
1966                                    vm_object_unlock(object);
1967
1968                                    vm_page_lock_queues();
1969                                    continue;
1970                            } else if (object->pager == MEMORY_OBJECT_NULL) {
1971                                    /*
1972                                     * This pager has been destroyed by either
1973                                     * memory_object_destroy or vm_object_destroy, and
1974                                     * so there is nowhere for the page to go.
1975                                     * Just free the page... VM_PAGE_FREE takes
1976                                     * care of cleaning up all the state...
1977                                     * including doing the vm_pageout_throttle_up
1978                                     */
1979                                    VM_PAGE_FREE(m);
1980
1981                                    vm_object_paging_end(object);
1982                                    vm_object_unlock(object);
1983
1984                                    vm_page_lock_queues();
1985                                    continue;
1986                            }
1987                            vm_object_unlock(object);
1988                    }
1989                    /*
1990                     * we expect the paging_in_progress reference to have
1991                     * already been taken on the object before it was added
1992                     * to the appropriate pageout I/O queue... this will
1993                     * keep the object from being terminated and/or the
1994                     * paging_offset from changing until the I/O has
1995                     * completed... therefore no need to lock the object to
1996                     * pull the paging_offset from it.
1997                     *
1998                     * Send the data to the pager.
1999                     * any pageout clustering happens there
2000                     */
2001                    memory_object_data_return(object->pager,
2002                                              m->offset + object->paging_offset,
2003                                              PAGE_SIZE,
2004                                              NULL,
2005                                              NULL,
2006                                              FALSE,
2007                                              FALSE,
2008                                              0);
2009
2010                    vm_object_lock(object);
2011                    vm_object_paging_end(object);
2012                    vm_object_unlock(object);
2013
2014                    vm_page_lock_queues();
2015         }
2016         assert_wait((event_t) q, THREAD_UNINT);
2017
2018
2019         if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2020                 q->pgo_throttled = FALSE;
2021                 need_wakeup = TRUE;
2022         } else
2023                 need_wakeup = FALSE;
2024
2025         q->pgo_busy = FALSE;
2026         q->pgo_idle = TRUE;
2027         vm_page_unlock_queues();
2028
2029         if (need_wakeup == TRUE)
2030                 thread_wakeup((event_t) &q->pgo_laundry);
2031
2032         thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2033         /*NOTREACHED*/
2034 }
2035
2036
2037 static void
2038 vm_pageout_iothread_external(void)
2039 {
2040
2041         vm_pageout_iothread_continue(&vm_pageout_queue_external);
2042         /*NOTREACHED*/
2043 }
2044
2045
2046 static void
2047 vm_pageout_iothread_internal(void)
2048 {
2049         thread_t        self = current_thread();
2050
2051         self->options |= TH_OPT_VMPRIV;
2052
2053         vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2054         /*NOTREACHED*/
2055 }
2056
2057 static void
2058 vm_pageout_garbage_collect(int collect)
2059 {
2060         if (collect) {
2061                 stack_collect();
2062
2063                 /*
2064                  * consider_zone_gc should be last, because the other operations
2065                  * might return memory to zones.
2066                  */
2067                 consider_machine_collect();
2068                 consider_zone_gc();
2069
2070                 consider_machine_adjust();
2071         }
2072
2073         assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2074
2075         thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2076         /*NOTREACHED*/
2077 }
2078
2079
2080
2081 void
2082 vm_pageout(void)
2083 {
2084         thread_t        self = current_thread();
2085         thread_t        thread;
2086         kern_return_t   result;
2087         spl_t           s;
2088
2089         /*
2090          * Set thread privileges.
2091          */
2092         s = splsched();
2093         thread_lock(self);
2094         self->priority = BASEPRI_PREEMPT - 1;
2095         set_sched_pri(self, self->priority);
2096         thread_unlock(self);
2097         splx(s);
2098
2099         /*
2100          *      Initialize some paging parameters.
2101          */
2102
2103         if (vm_pageout_idle_wait == 0)
2104                 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2105
2106         if (vm_pageout_burst_wait == 0)
2107                 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2108
2109         if (vm_pageout_empty_wait == 0)
2110                 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2111
2112         if (vm_pageout_deadlock_wait == 0)
2113                 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2114
2115         if (vm_pageout_deadlock_relief == 0)
2116                 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2117
2118         if (vm_pageout_inactive_relief == 0)
2119                 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2120
2121         if (vm_pageout_burst_active_throttle == 0)
2122                 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2123
2124         if (vm_pageout_burst_inactive_throttle == 0)
2125                 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2126
2127         /*
2128          * Set kernel task to low backing store privileged
2129          * status
2130          */
2131         task_lock(kernel_task);
2132         kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2133         task_unlock(kernel_task);
2134
2135         vm_page_free_count_init = vm_page_free_count;
2136         vm_zf_iterator = 0;
2137         /*
2138          * even if we've already called vm_page_free_reserve
2139          * call it again here to insure that the targets are
2140          * accurately calculated (it uses vm_page_free_count_init)
2141          * calling it with an arg of 0 will not change the reserve
2142          * but will re-calculate free_min and free_target
2143          */
2144         if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2145                 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
2146         } else
2147                 vm_page_free_reserve(0);
2148
2149
2150         queue_init(&vm_pageout_queue_external.pgo_pending);
2151         vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2152         vm_pageout_queue_external.pgo_laundry = 0;
2153         vm_pageout_queue_external.pgo_idle = FALSE;
2154         vm_pageout_queue_external.pgo_busy = FALSE;
2155         vm_pageout_queue_external.pgo_throttled = FALSE;
2156
2157         queue_init(&vm_pageout_queue_internal.pgo_pending);
2158         vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2159         vm_pageout_queue_internal.pgo_laundry = 0;
2160         vm_pageout_queue_internal.pgo_idle = FALSE;
2161         vm_pageout_queue_internal.pgo_busy = FALSE;
2162         vm_pageout_queue_internal.pgo_throttled = FALSE;
2163
2164
2165         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &thread);
2166         if (result != KERN_SUCCESS)
2167                 panic("vm_pageout_iothread_internal: create failed");
2168
2169         thread_deallocate(thread);
2170
2171
2172         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, BASEPRI_PREEMPT - 1, &thread);
2173         if (result != KERN_SUCCESS)
2174                 panic("vm_pageout_iothread_external: create failed");
2175
2176         thread_deallocate(thread);
2177
2178
2179         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, BASEPRI_PREEMPT - 2, &thread);
2180         if (result != KERN_SUCCESS)
2181                 panic("vm_pageout_garbage_collect: create failed");
2182
2183         thread_deallocate(thread);
2184
2185
2186         vm_pageout_continue();
2187         /*NOTREACHED*/
2188 }
2189
2190
2191 static upl_t
2192 upl_create(
2193         int                flags,
2194         upl_size_t       size)
2195 {
2196         upl_t   upl;
2197         int     page_field_size;  /* bit field in word size buf */
2198
2199         page_field_size = 0;
2200         if (flags & UPL_CREATE_LITE) {
2201                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2202                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2203         }
2204         if(flags & UPL_CREATE_INTERNAL) {
2205                 upl = (upl_t)kalloc(sizeof(struct upl)
2206                         + (sizeof(struct upl_page_info)*(size/PAGE_SIZE))
2207                         + page_field_size);
2208         } else {
2209                 upl = (upl_t)kalloc(sizeof(struct upl) + page_field_size);
2210         }
2211         upl->flags = 0;
2212         upl->src_object = NULL;
2213         upl->kaddr = (vm_offset_t)0;
2214         upl->size = 0;
2215         upl->map_object = NULL;
2216         upl->ref_count = 1;
2217         upl_lock_init(upl);
2218 #ifdef UPL_DEBUG
2219         upl->ubc_alias1 = 0;
2220         upl->ubc_alias2 = 0;
2221 #endif /* UPL_DEBUG */
2222         return(upl);
2223 }
2224
2225 static void
2226 upl_destroy(
2227         upl_t   upl)
2228 {
2229         int     page_field_size;  /* bit field in word size buf */
2230
2231 #ifdef UPL_DEBUG
2232         {
2233                 upl_t   upl_ele;
2234                 vm_object_t     object;
2235                 if (upl->map_object->pageout) {
2236                         object = upl->map_object->shadow;
2237                 } else {
2238                         object = upl->map_object;
2239                 }
2240                 vm_object_lock(object);
2241                 queue_iterate(&object->uplq, upl_ele, upl_t, uplq) {
2242                         if(upl_ele == upl) {
2243                                 queue_remove(&object->uplq,
2244                                                 upl_ele, upl_t, uplq);
2245                                 break;
2246                         }
2247                 }
2248                 vm_object_unlock(object);
2249         }
2250 #endif /* UPL_DEBUG */
2251         /* drop a reference on the map_object whether or */
2252         /* not a pageout object is inserted */
2253         if(upl->map_object->pageout)
2254                 vm_object_deallocate(upl->map_object);
2255
2256         page_field_size = 0;
2257         if (upl->flags & UPL_LITE) {
2258                 page_field_size = ((upl->size/PAGE_SIZE) + 7) >> 3;
2259                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2260         }
2261         if(upl->flags & UPL_INTERNAL) {
2262                 kfree(upl,
2263                       sizeof(struct upl) +
2264                       (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
2265                       + page_field_size);
2266         } else {
2267                 kfree(upl, sizeof(struct upl) + page_field_size);
2268         }
2269 }
2270
2271 void uc_upl_dealloc(upl_t upl);
2272 __private_extern__ void
2273 uc_upl_dealloc(
2274         upl_t   upl)
2275 {
2276         upl->ref_count -= 1;
2277         if(upl->ref_count == 0) {
2278                 upl_destroy(upl);
2279         }
2280 }
2281
2282 void
2283 upl_deallocate(
2284         upl_t   upl)
2285 {
2286
2287         upl->ref_count -= 1;
2288         if(upl->ref_count == 0) {
2289                 upl_destroy(upl);
2290         }
2291 }
2292
2293 /*
2294  * Statistics about UPL enforcement of copy-on-write obligations.
2295  */
2296 unsigned long upl_cow = 0;
2297 unsigned long upl_cow_again = 0;
2298 unsigned long upl_cow_contiguous = 0;
2299 unsigned long upl_cow_pages = 0;
2300 unsigned long upl_cow_again_pages = 0;
2301 unsigned long upl_cow_contiguous_pages = 0;
2302
2303 /*
2304  *      Routine:        vm_object_upl_request
2305  *      Purpose:
2306  *              Cause the population of a portion of a vm_object.
2307  *              Depending on the nature of the request, the pages
2308  *              returned may be contain valid data or be uninitialized.
2309  *              A page list structure, listing the physical pages
2310  *              will be returned upon request.
2311  *              This function is called by the file system or any other
2312  *              supplier of backing store to a pager.
2313  *              IMPORTANT NOTE: The caller must still respect the relationship
2314  *              between the vm_object and its backing memory object.  The
2315  *              caller MUST NOT substitute changes in the backing file
2316  *              without first doing a memory_object_lock_request on the
2317  *              target range unless it is know that the pages are not
2318  *              shared with another entity at the pager level.
2319  *              Copy_in_to:
2320  *                      if a page list structure is present
2321  *                      return the mapped physical pages, where a
2322  *                      page is not present, return a non-initialized
2323  *                      one.  If the no_sync bit is turned on, don't
2324  *                      call the pager unlock to synchronize with other
2325  *                      possible copies of the page. Leave pages busy
2326  *                      in the original object, if a page list structure
2327  *                      was specified.  When a commit of the page list
2328  *                      pages is done, the dirty bit will be set for each one.
2329  *              Copy_out_from:
2330  *                      If a page list structure is present, return
2331  *                      all mapped pages.  Where a page does not exist
2332  *                      map a zero filled one. Leave pages busy in
2333  *                      the original object.  If a page list structure
2334  *                      is not specified, this call is a no-op.
2335  *
2336  *              Note:  access of default pager objects has a rather interesting
2337  *              twist.  The caller of this routine, presumably the file system
2338  *              page cache handling code, will never actually make a request
2339  *              against a default pager backed object.  Only the default
2340  *              pager will make requests on backing store related vm_objects
2341  *              In this way the default pager can maintain the relationship
2342  *              between backing store files (abstract memory objects) and
2343  *              the vm_objects (cache objects), they support.
2344  *
2345  */
2346
2347 __private_extern__ kern_return_t
2348 vm_object_upl_request(
2349         vm_object_t             object,
2350         vm_object_offset_t      offset,
2351         upl_size_t              size,
2352         upl_t                   *upl_ptr,
2353         upl_page_info_array_t   user_page_list,
2354         unsigned int            *page_list_count,
2355         int                     cntrl_flags)
2356 {
2357         vm_page_t               dst_page = VM_PAGE_NULL;
2358         vm_object_offset_t      dst_offset = offset;
2359         upl_size_t              xfer_size = size;
2360         boolean_t               do_m_lock = FALSE;
2361         boolean_t               dirty;
2362         boolean_t               hw_dirty;
2363         upl_t                   upl = NULL;
2364         unsigned int            entry;
2365 #if MACH_CLUSTER_STATS
2366         boolean_t               encountered_lrp = FALSE;
2367 #endif
2368         vm_page_t               alias_page = NULL;
2369         int                     page_ticket;
2370         int                     refmod_state;
2371         wpl_array_t             lite_list = NULL;
2372         vm_object_t             last_copy_object;
2373
2374
2375         if (cntrl_flags & ~UPL_VALID_FLAGS) {
2376                 /*
2377                  * For forward compatibility's sake,
2378                  * reject any unknown flag.
2379                  */
2380                 return KERN_INVALID_VALUE;
2381         }
2382
2383         page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2384                                         >> UPL_PAGE_TICKET_SHIFT;
2385
2386         if(((size/PAGE_SIZE) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2387                 size = MAX_UPL_TRANSFER * PAGE_SIZE;
2388         }
2389
2390         if(cntrl_flags & UPL_SET_INTERNAL)
2391                 if(page_list_count != NULL)
2392                         *page_list_count = MAX_UPL_TRANSFER;
2393
2394         if((!object->internal) && (object->paging_offset != 0))
2395                 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
2396
2397         if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2398                 return KERN_SUCCESS;
2399         }
2400
2401         vm_object_lock(object);
2402         vm_object_paging_begin(object);
2403         vm_object_unlock(object);
2404
2405         if(upl_ptr) {
2406                 if(cntrl_flags & UPL_SET_INTERNAL) {
2407                         if(cntrl_flags & UPL_SET_LITE) {
2408                                 uintptr_t page_field_size;
2409                                 upl = upl_create(
2410                                         UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
2411                                         size);
2412                                 user_page_list = (upl_page_info_t *)
2413                                    (((uintptr_t)upl) + sizeof(struct upl));
2414                                 lite_list = (wpl_array_t)
2415                                         (((uintptr_t)user_page_list) +
2416                                         ((size/PAGE_SIZE) *
2417                                                 sizeof(upl_page_info_t)));
2418                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2419                                 page_field_size =
2420                                         (page_field_size + 3) & 0xFFFFFFFC;
2421                                 bzero((char *)lite_list, page_field_size);
2422                                 upl->flags =
2423                                         UPL_LITE | UPL_INTERNAL;
2424                         } else {
2425                                 upl = upl_create(UPL_CREATE_INTERNAL, size);
2426                                 user_page_list = (upl_page_info_t *)
2427                                         (((uintptr_t)upl) + sizeof(struct upl));
2428                                 upl->flags = UPL_INTERNAL;
2429                         }
2430                 } else {
2431                         if(cntrl_flags & UPL_SET_LITE) {
2432                                 uintptr_t page_field_size;
2433                                 upl = upl_create(UPL_CREATE_LITE, size);
2434                                 lite_list = (wpl_array_t)
2435                                    (((uintptr_t)upl) + sizeof(struct upl));
2436                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2437                                 page_field_size =
2438                                         (page_field_size + 3) & 0xFFFFFFFC;
2439                                 bzero((char *)lite_list, page_field_size);
2440                                 upl->flags = UPL_LITE;
2441                         } else {
2442                                 upl = upl_create(UPL_CREATE_EXTERNAL, size);
2443                                 upl->flags = 0;
2444                         }
2445                 }
2446
2447                 if (object->phys_contiguous) {
2448                         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2449                             object->copy != VM_OBJECT_NULL) {
2450                                 /* Honor copy-on-write obligations */
2451
2452                                 /*
2453                                  * XXX FBDP
2454                                  * We could still have a race...
2455                                  * A is here building the UPL for a write().
2456                                  * A pushes the pages to the current copy
2457                                  * object.
2458                                  * A returns the UPL to the caller.
2459                                  * B comes along and establishes another
2460                                  * private mapping on this object, inserting
2461                                  * a new copy object between the original
2462                                  * object and the old copy object.
2463                                  * B reads a page and gets the original contents
2464                                  * from the original object.
2465                                  * A modifies the page in the original object.
2466                                  * B reads the page again and sees A's changes,
2467                                  * which is wrong...
2468                                  *
2469                                  * The problem is that the pages are not
2470                                  * marked "busy" in the original object, so
2471                                  * nothing prevents B from reading it before
2472                                  * before A's changes are completed.
2473                                  *
2474                                  * The "paging_in_progress" might protect us
2475                                  * from the insertion of a new copy object
2476                                  * though...  To be verified.
2477                                  */
2478                                 vm_object_lock_request(object,
2479                                                        offset,
2480                                                        size,
2481                                                        FALSE,
2482                                                        MEMORY_OBJECT_COPY_SYNC,
2483                                                        VM_PROT_NO_CHANGE);
2484                                 upl_cow_contiguous++;
2485                                 upl_cow_contiguous_pages += size >> PAGE_SHIFT;
2486                         }
2487
2488                         upl->map_object = object;
2489                         /* don't need any shadow mappings for this one */
2490                         /* since it is already I/O memory */
2491                         upl->flags |= UPL_DEVICE_MEMORY;
2492
2493
2494                         /* paging_in_progress protects paging_offset */
2495                         upl->offset = offset + object->paging_offset;
2496                         upl->size = size;
2497                         *upl_ptr = upl;
2498                         if(user_page_list) {
2499                                 user_page_list[0].phys_addr =
2500                                    (offset + object->shadow_offset)>>PAGE_SHIFT;
2501                                 user_page_list[0].device = TRUE;
2502                         }
2503
2504                         if(page_list_count != NULL) {
2505                                 if (upl->flags & UPL_INTERNAL) {
2506                                         *page_list_count = 0;
2507                                 } else {
2508                                         *page_list_count = 1;
2509                                 }
2510                         }
2511
2512                         return KERN_SUCCESS;
2513                 }
2514
2515                 if(user_page_list)
2516                         user_page_list[0].device = FALSE;
2517
2518                 if(cntrl_flags & UPL_SET_LITE) {
2519                         upl->map_object = object;
2520                 } else {
2521                         upl->map_object = vm_object_allocate(size);
2522                         /*
2523                          * No neeed to lock the new object: nobody else knows
2524                          * about it yet, so it's all ours so far.
2525                          */
2526                         upl->map_object->shadow = object;
2527                         upl->map_object->pageout = TRUE;
2528                         upl->map_object->can_persist = FALSE;
2529                         upl->map_object->copy_strategy =
2530                                         MEMORY_OBJECT_COPY_NONE;
2531                         upl->map_object->shadow_offset = offset;
2532                         upl->map_object->wimg_bits = object->wimg_bits;
2533                 }
2534
2535         }
2536         if (!(cntrl_flags & UPL_SET_LITE)) {
2537                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2538         }
2539
2540         /*
2541          * ENCRYPTED SWAP:
2542          * Just mark the UPL as "encrypted" here.
2543          * We'll actually encrypt the pages later,
2544          * in upl_encrypt(), when the caller has
2545          * selected which pages need to go to swap.
2546          */
2547         if (cntrl_flags & UPL_ENCRYPT) {
2548                 upl->flags |= UPL_ENCRYPTED;
2549         }
2550         if (cntrl_flags & UPL_FOR_PAGEOUT) {
2551                 upl->flags |= UPL_PAGEOUT;
2552         }
2553         vm_object_lock(object);
2554
2555         /* we can lock in the paging_offset once paging_in_progress is set */
2556         if(upl_ptr) {
2557                 upl->size = size;
2558                 upl->offset = offset + object->paging_offset;
2559                 *upl_ptr = upl;
2560 #ifdef UPL_DEBUG
2561                 queue_enter(&object->uplq, upl, upl_t, uplq);
2562 #endif /* UPL_DEBUG */
2563         }
2564
2565         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2566             object->copy != VM_OBJECT_NULL) {
2567                 /* Honor copy-on-write obligations */
2568
2569                 /*
2570                  * The caller is gathering these pages and
2571                  * might modify their contents.  We need to
2572                  * make sure that the copy object has its own
2573                  * private copies of these pages before we let
2574                  * the caller modify them.
2575                  */
2576                 vm_object_update(object,
2577                                  offset,
2578                                  size,
2579                                  NULL,
2580                                  NULL,
2581                                  FALSE, /* should_return */
2582                                  MEMORY_OBJECT_COPY_SYNC,
2583                                  VM_PROT_NO_CHANGE);
2584                 upl_cow++;
2585                 upl_cow_pages += size >> PAGE_SHIFT;
2586
2587         }
2588         /* remember which copy object we synchronized with */
2589         last_copy_object = object->copy;
2590
2591         entry = 0;
2592         if(cntrl_flags & UPL_COPYOUT_FROM) {
2593                 upl->flags |= UPL_PAGE_SYNC_DONE;
2594
2595                 while (xfer_size) {
2596                         if((alias_page == NULL) &&
2597                                 !(cntrl_flags & UPL_SET_LITE)) {
2598                                 vm_object_unlock(object);
2599                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2600                                 vm_object_lock(object);
2601                         }
2602                         if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
2603                                 dst_page->fictitious ||
2604                                 dst_page->absent ||
2605                                 dst_page->error ||
2606                                (dst_page->wire_count && !dst_page->pageout) ||
2607
2608                              ((!dst_page->inactive) && (cntrl_flags & UPL_FOR_PAGEOUT) &&
2609                                (dst_page->page_ticket != page_ticket) &&
2610                               ((dst_page->page_ticket+1) != page_ticket)) ) {
2611
2612                                 if (user_page_list)
2613                                         user_page_list[entry].phys_addr = 0;
2614                         } else {
2615                                 /*
2616                                  * grab this up front...
2617                                  * a high percentange of the time we're going to
2618                                  * need the hardware modification state a bit later
2619                                  * anyway... so we can eliminate an extra call into
2620                                  * the pmap layer by grabbing it here and recording it
2621                                  */
2622                                 refmod_state = pmap_get_refmod(dst_page->phys_page);
2623
2624                                 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
2625                                         /*
2626                                          * we're only asking for DIRTY pages to be returned
2627                                          */
2628
2629                                         if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
2630                                                 /*
2631                                                  * if we were the page stolen by vm_pageout_scan to be
2632                                                  * cleaned (as opposed to a buddy being clustered in
2633                                                  * or this request is not being driven by a PAGEOUT cluster
2634                                                  * then we only need to check for the page being diry or
2635                                                  * precious to decide whether to return it
2636                                                  */
2637                                                 if (dst_page->dirty || dst_page->precious ||
2638                                                     (refmod_state & VM_MEM_MODIFIED)) {
2639                                                         goto check_busy;
2640                                                 }
2641                                         }
2642                                         /*
2643                                          * this is a request for a PAGEOUT cluster and this page
2644                                          * is merely along for the ride as a 'buddy'... not only
2645                                          * does it have to be dirty to be returned, but it also
2646                                          * can't have been referenced recently... note that we've
2647                                          * already filtered above based on whether this page is
2648                                          * currently on the inactive queue or it meets the page
2649                                          * ticket (generation count) check
2650                                          */
2651                                         if ( !(refmod_state & VM_MEM_REFERENCED) &&
2652                                              ((refmod_state & VM_MEM_MODIFIED) ||
2653                                               dst_page->dirty || dst_page->precious) ) {
2654                                                 goto check_busy;
2655                                         }
2656                                         /*
2657                                          * if we reach here, we're not to return
2658                                          * the page... go on to the next one
2659                                          */
2660                                         if (user_page_list)
2661                                                 user_page_list[entry].phys_addr = 0;
2662                                         entry++;
2663                                         dst_offset += PAGE_SIZE_64;
2664                                         xfer_size -= PAGE_SIZE;
2665                                         continue;
2666                                 }
2667 check_busy:
2668                                 if(dst_page->busy &&
2669                                         (!(dst_page->list_req_pending &&
2670                                                 dst_page->pageout))) {
2671                                         if(cntrl_flags & UPL_NOBLOCK) {
2672                                                 if(user_page_list) {
2673                                                         user_page_list[entry].phys_addr = 0;
2674                                                 }
2675                                                 entry++;
2676                                                 dst_offset += PAGE_SIZE_64;
2677                                                 xfer_size -= PAGE_SIZE;
2678                                                 continue;
2679                                         }
2680                                         /*
2681                                          * someone else is playing with the
2682                                          * page.  We will have to wait.
2683                                          */
2684                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2685                                         continue;
2686                                 }
2687                                 /* Someone else already cleaning the page? */
2688                                 if((dst_page->cleaning || dst_page->absent ||
2689                                         dst_page->wire_count != 0) &&
2690                                         !dst_page->list_req_pending) {
2691                                    if(user_page_list) {
2692                                            user_page_list[entry].phys_addr = 0;
2693                                    }
2694                                    entry++;
2695                                    dst_offset += PAGE_SIZE_64;
2696                                    xfer_size -= PAGE_SIZE;
2697                                    continue;
2698                                 }
2699                                 /* eliminate all mappings from the */
2700                                 /* original object and its prodigy */
2701
2702                                 vm_page_lock_queues();
2703
2704                                 if (dst_page->pageout_queue == TRUE)
2705                                         /*
2706                                          * we've buddied up a page for a clustered pageout
2707                                          * that has already been moved to the pageout
2708                                          * queue by pageout_scan... we need to remove
2709                                          * it from the queue and drop the laundry count
2710                                          * on that queue
2711                                          */
2712                                         vm_pageout_queue_steal(dst_page);
2713 #if MACH_CLUSTER_STATS
2714                                 /* pageout statistics gathering.  count  */
2715                                 /* all the pages we will page out that   */
2716                                 /* were not counted in the initial       */
2717                                 /* vm_pageout_scan work                  */
2718                                 if(dst_page->list_req_pending)
2719                                         encountered_lrp = TRUE;
2720                                 if((dst_page->dirty ||
2721                                         (dst_page->object->internal &&
2722                                         dst_page->precious)) &&
2723                                         (dst_page->list_req_pending
2724                                         == FALSE)) {
2725                                         if(encountered_lrp) {
2726                                                 CLUSTER_STAT
2727                                                 (pages_at_higher_offsets++;)
2728                                         } else {
2729                                                 CLUSTER_STAT
2730                                                 (pages_at_lower_offsets++;)
2731                                         }
2732                                 }
2733 #endif
2734                                 /* Turn off busy indication on pending */
2735                                 /* pageout.  Note: we can only get here */
2736                                 /* in the request pending case.  */
2737                                 dst_page->list_req_pending = FALSE;
2738                                 dst_page->busy = FALSE;
2739                                 dst_page->cleaning = FALSE;
2740
2741                                 hw_dirty = refmod_state & VM_MEM_MODIFIED;
2742                                 dirty = hw_dirty ? TRUE : dst_page->dirty;
2743
2744                                 if(cntrl_flags & UPL_SET_LITE) {
2745                                         int     pg_num;
2746                                         pg_num = (dst_offset-offset)/PAGE_SIZE;
2747                                         lite_list[pg_num>>5] |=
2748                                                         1 << (pg_num & 31);
2749                                         if (hw_dirty)
2750                                                 pmap_clear_modify(dst_page->phys_page);
2751                                         /*
2752                                          * Record that this page has been
2753                                          * written out
2754                                          */
2755 #if     MACH_PAGEMAP
2756                                         vm_external_state_set(
2757                                                 object->existence_map,
2758                                                 dst_page->offset);
2759 #endif  /*MACH_PAGEMAP*/
2760
2761                                         /*
2762                                          * Mark original page as cleaning
2763                                          * in place.
2764                                          */
2765                                         dst_page->cleaning = TRUE;
2766                                         dst_page->dirty = TRUE;
2767                                         dst_page->precious = FALSE;
2768                                 } else {
2769                                         /* use pageclean setup, it is more */
2770                                         /* convenient even for the pageout */
2771                                         /* cases here */
2772
2773                                         vm_object_lock(upl->map_object);
2774                                         vm_pageclean_setup(dst_page,
2775                                                 alias_page, upl->map_object,
2776                                                 size - xfer_size);
2777                                         vm_object_unlock(upl->map_object);
2778
2779                                         alias_page->absent = FALSE;
2780                                         alias_page = NULL;
2781                                 }
2782
2783                                 if(!dirty) {
2784                                         dst_page->dirty = FALSE;
2785                                         dst_page->precious = TRUE;
2786                                 }
2787
2788                                 if(dst_page->pageout)
2789                                         dst_page->busy = TRUE;
2790
2791                                 if ( (cntrl_flags & UPL_ENCRYPT) ) {
2792                                         /*
2793                                          * ENCRYPTED SWAP:
2794                                          * We want to deny access to the target page
2795                                          * because its contents are about to be
2796                                          * encrypted and the user would be very
2797                                          * confused to see encrypted data instead
2798                                          * of their data.
2799                                          */
2800                                         dst_page->busy = TRUE;
2801                                 }
2802                                 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
2803                                         /*
2804                                          * deny access to the target page
2805                                          * while it is being worked on
2806                                          */
2807                                         if ((!dst_page->pageout) &&
2808                                             (dst_page->wire_count == 0)) {
2809                                                 dst_page->busy = TRUE;
2810                                                 dst_page->pageout = TRUE;
2811                                                 vm_page_wire(dst_page);
2812                                         }
2813                                 }
2814
2815                                 if(user_page_list) {
2816                                         user_page_list[entry].phys_addr
2817                                                 = dst_page->phys_page;
2818                                         user_page_list[entry].dirty =
2819                                                         dst_page->dirty;
2820                                         user_page_list[entry].pageout =
2821                                                         dst_page->pageout;
2822                                         user_page_list[entry].absent =
2823                                                         dst_page->absent;
2824                                         user_page_list[entry].precious =
2825                                                         dst_page->precious;
2826                                 }
2827                                 vm_page_unlock_queues();
2828
2829                                 /*
2830                                  * ENCRYPTED SWAP:
2831                                  * The caller is gathering this page and might
2832                                  * access its contents later on.  Decrypt the
2833                                  * page before adding it to the UPL, so that
2834                                  * the caller never sees encrypted data.
2835                                  */
2836                                 if (! (cntrl_flags & UPL_ENCRYPT) &&
2837                                     dst_page->encrypted) {
2838                                         assert(dst_page->busy);
2839
2840                                         vm_page_decrypt(dst_page, 0);
2841                                         vm_page_decrypt_for_upl_counter++;
2842
2843                                         /*
2844                                          * Retry this page, since anything
2845                                          * could have changed while we were
2846                                          * decrypting.
2847                                          */
2848                                         continue;
2849                                 }
2850                         }
2851                         entry++;
2852                         dst_offset += PAGE_SIZE_64;
2853                         xfer_size -= PAGE_SIZE;
2854                 }
2855         } else {
2856                 while (xfer_size) {
2857                         if((alias_page == NULL) &&
2858                                 !(cntrl_flags & UPL_SET_LITE)) {
2859                                 vm_object_unlock(object);
2860                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2861                                 vm_object_lock(object);
2862                         }
2863
2864                         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2865                             object->copy != last_copy_object) {
2866                                 /* Honor copy-on-write obligations */
2867
2868                                 /*
2869                                  * The copy object has changed since we
2870                                  * last synchronized for copy-on-write.
2871                                  * Another copy object might have been
2872                                  * inserted while we released the object's
2873                                  * lock.  Since someone could have seen the
2874                                  * original contents of the remaining pages
2875                                  * through that new object, we have to
2876                                  * synchronize with it again for the remaining
2877                                  * pages only.  The previous pages are "busy"
2878                                  * so they can not be seen through the new
2879                                  * mapping.  The new mapping will see our
2880                                  * upcoming changes for those previous pages,
2881                                  * but that's OK since they couldn't see what
2882                                  * was there before.  It's just a race anyway
2883                                  * and there's no guarantee of consistency or
2884                                  * atomicity.  We just don't want new mappings
2885                                  * to see both the *before* and *after* pages.
2886                                  */
2887                                 if (object->copy != VM_OBJECT_NULL) {
2888                                         vm_object_update(
2889                                                 object,
2890                                                 dst_offset,/* current offset */
2891                                                 xfer_size, /* remaining size */
2892                                                 NULL,
2893                                                 NULL,
2894                                                 FALSE,     /* should_return */
2895                                                 MEMORY_OBJECT_COPY_SYNC,
2896                                                 VM_PROT_NO_CHANGE);
2897                                         upl_cow_again++;
2898                                         upl_cow_again_pages +=
2899                                                 xfer_size >> PAGE_SHIFT;
2900                                 }
2901                                 /* remember the copy object we synced with */
2902                                 last_copy_object = object->copy;
2903                         }
2904
2905                         dst_page = vm_page_lookup(object, dst_offset);
2906
2907                         if(dst_page != VM_PAGE_NULL) {
2908                                 if((cntrl_flags & UPL_RET_ONLY_ABSENT) &&
2909                                         !((dst_page->list_req_pending)
2910                                                 && (dst_page->absent))) {
2911                                         /* we are doing extended range */
2912                                         /* requests.  we want to grab  */
2913                                         /* pages around some which are */
2914                                         /* already present.  */
2915                                         if(user_page_list) {
2916                                                 user_page_list[entry].phys_addr = 0;
2917                                         }
2918                                         entry++;
2919                                         dst_offset += PAGE_SIZE_64;
2920                                         xfer_size -= PAGE_SIZE;
2921                                         continue;
2922                                 }
2923                                 if((dst_page->cleaning) &&
2924                                    !(dst_page->list_req_pending)) {
2925                                         /*someone else is writing to the */
2926                                         /* page.  We will have to wait.  */
2927                                         PAGE_SLEEP(object,dst_page,THREAD_UNINT);
2928                                         continue;
2929                                 }
2930                                 if ((dst_page->fictitious &&
2931                                      dst_page->list_req_pending)) {
2932                                         /* dump the fictitious page */
2933                                         dst_page->list_req_pending = FALSE;
2934                                         dst_page->clustered = FALSE;
2935
2936                                         vm_page_lock_queues();
2937                                         vm_page_free(dst_page);
2938                                         vm_page_unlock_queues();
2939
2940                                         dst_page = NULL;
2941                                 } else if ((dst_page->absent &&
2942                                             dst_page->list_req_pending)) {
2943                                         /* the default_pager case */
2944                                         dst_page->list_req_pending = FALSE;
2945                                         dst_page->busy = FALSE;
2946                                 }
2947                         }
2948                         if(dst_page == VM_PAGE_NULL) {
2949                                 if(object->private) {
2950                                         /*
2951                                          * This is a nasty wrinkle for users
2952                                          * of upl who encounter device or
2953                                          * private memory however, it is
2954                                          * unavoidable, only a fault can
2955                                          * reslove the actual backing
2956                                          * physical page by asking the
2957                                          * backing device.
2958                                          */
2959                                         if(user_page_list) {
2960                                                 user_page_list[entry].phys_addr = 0;
2961                                         }
2962                                         entry++;
2963                                         dst_offset += PAGE_SIZE_64;
2964                                         xfer_size -= PAGE_SIZE;
2965                                         continue;
2966                                 }
2967                                 /* need to allocate a page */
2968                                 dst_page = vm_page_alloc(object, dst_offset);
2969                                 if (dst_page == VM_PAGE_NULL) {
2970                                         vm_object_unlock(object);
2971                                         VM_PAGE_WAIT();
2972                                         vm_object_lock(object);
2973                                         continue;
2974                                 }
2975                                 dst_page->busy = FALSE;
2976 #if 0
2977                                 if(cntrl_flags & UPL_NO_SYNC) {
2978                                         dst_page->page_lock = 0;
2979                                         dst_page->unlock_request = 0;
2980                                 }
2981 #endif
2982                                 if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
2983                                         /*
2984                                          * if UPL_RET_ONLY_ABSENT was specified,
2985                                          * than we're definitely setting up a
2986                                          * upl for a clustered read/pagein
2987                                          * operation... mark the pages as clustered
2988                                          * so vm_fault can correctly attribute them
2989                                          * to the 'pagein' bucket the first time
2990                                          * a fault happens on them
2991                                          */
2992                                         dst_page->clustered = TRUE;
2993                                 }
2994                                 dst_page->absent = TRUE;
2995                                 object->absent_count++;
2996                         }
2997 #if 1
2998                         if(cntrl_flags & UPL_NO_SYNC) {
2999                                 dst_page->page_lock = 0;
3000                                 dst_page->unlock_request = 0;
3001                         }
3002 #endif /* 1 */
3003
3004                         /*
3005                          * ENCRYPTED SWAP:
3006                          */
3007                         if (cntrl_flags & UPL_ENCRYPT) {
3008                                 /*
3009                                  * The page is going to be encrypted when we
3010                                  * get it from the pager, so mark it so.
3011                                  */
3012                                 dst_page->encrypted = TRUE;
3013                         } else {
3014                                 /*
3015                                  * Otherwise, the page will not contain
3016                                  * encrypted data.
3017                                  */
3018                                 dst_page->encrypted = FALSE;
3019                         }
3020
3021                         dst_page->overwriting = TRUE;
3022                         if(dst_page->fictitious) {
3023                                 panic("need corner case for fictitious page");
3024                         }
3025                         if(dst_page->page_lock) {
3026                                 do_m_lock = TRUE;
3027                         }
3028                         if(upl_ptr) {
3029
3030                                 /* eliminate all mappings from the */
3031                                 /* original object and its prodigy */
3032
3033                                 if(dst_page->busy) {
3034                                         /*someone else is playing with the */
3035                                         /* page.  We will have to wait.    */
3036                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3037                                         continue;
3038                                 }
3039                                 vm_page_lock_queues();
3040
3041                                 if( !(cntrl_flags & UPL_FILE_IO))
3042                                         hw_dirty = pmap_disconnect(dst_page->phys_page) & VM_MEM_MODIFIED;
3043                                 else
3044                                         hw_dirty = pmap_get_refmod(dst_page->phys_page) & VM_MEM_MODIFIED;
3045                                 dirty = hw_dirty ? TRUE : dst_page->dirty;
3046
3047                                 if(cntrl_flags & UPL_SET_LITE) {
3048                                         int     pg_num;
3049                                         pg_num = (dst_offset-offset)/PAGE_SIZE;
3050                                         lite_list[pg_num>>5] |=
3051                                                         1 << (pg_num & 31);
3052                                         if (hw_dirty)
3053                                                 pmap_clear_modify(dst_page->phys_page);
3054                                         /*
3055                                          * Record that this page has been
3056                                          * written out
3057                                          */
3058 #if     MACH_PAGEMAP
3059                                         vm_external_state_set(
3060                                                 object->existence_map,
3061                                                 dst_page->offset);
3062 #endif  /*MACH_PAGEMAP*/
3063
3064                                         /*
3065                                          * Mark original page as cleaning
3066                                          * in place.
3067                                          */
3068                                         dst_page->cleaning = TRUE;
3069                                         dst_page->dirty = TRUE;
3070                                         dst_page->precious = FALSE;
3071                                 } else {
3072                                         /* use pageclean setup, it is more */
3073                                         /* convenient even for the pageout */
3074                                         /* cases here */
3075                                         vm_object_lock(upl->map_object);
3076                                         vm_pageclean_setup(dst_page,
3077                                                 alias_page, upl->map_object,
3078                                                 size - xfer_size);
3079                                         vm_object_unlock(upl->map_object);
3080
3081                                         alias_page->absent = FALSE;
3082                                         alias_page = NULL;
3083                                 }
3084
3085                                 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
3086                                         /* clean in place for read implies   */
3087                                         /* that a write will be done on all  */
3088                                         /* the pages that are dirty before   */
3089                                         /* a upl commit is done.  The caller */
3090                                         /* is obligated to preserve the      */
3091                                         /* contents of all pages marked      */
3092                                         /* dirty. */
3093                                         upl->flags |= UPL_CLEAR_DIRTY;
3094                                 }
3095
3096                                 if(!dirty) {
3097                                         dst_page->dirty = FALSE;
3098                                         dst_page->precious = TRUE;
3099                                 }
3100
3101                                 if (dst_page->wire_count == 0) {
3102                                    /* deny access to the target page while */
3103                                    /* it is being worked on */
3104                                         dst_page->busy = TRUE;
3105                                 } else {
3106                                         vm_page_wire(dst_page);
3107                                 }
3108                                 if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
3109                                         /*
3110                                          * expect the page not to be used
3111                                          * since it's coming in as part
3112                                          * of a cluster and could be
3113                                          * speculative... pages that
3114                                          * are 'consumed' will get a
3115                                          * hardware reference
3116                                          */
3117                                         dst_page->reference = FALSE;
3118                                 } else {
3119                                         /*
3120                                          * expect the page to be used
3121                                          */
3122                                         dst_page->reference = TRUE;
3123                                 }
3124                                 dst_page->precious =
3125                                         (cntrl_flags & UPL_PRECIOUS)
3126                                                         ? TRUE : FALSE;
3127                                 if(user_page_list) {
3128                                         user_page_list[entry].phys_addr
3129                                                 = dst_page->phys_page;
3130                                         user_page_list[entry].dirty =
3131                                                         dst_page->dirty;
3132                                         user_page_list[entry].pageout =
3133                                                         dst_page->pageout;
3134                                         user_page_list[entry].absent =
3135                                                         dst_page->absent;
3136                                         user_page_list[entry].precious =
3137                                                         dst_page->precious;
3138                                 }
3139                                 vm_page_unlock_queues();
3140                         }
3141                         entry++;
3142                         dst_offset += PAGE_SIZE_64;
3143                         xfer_size -= PAGE_SIZE;
3144                 }
3145         }
3146
3147         if (upl->flags & UPL_INTERNAL) {
3148                 if(page_list_count != NULL)
3149                         *page_list_count = 0;
3150         } else if (*page_list_count > entry) {
3151                 if(page_list_count != NULL)
3152                         *page_list_count = entry;
3153         }
3154
3155         if(alias_page != NULL) {
3156                 vm_page_lock_queues();
3157                 vm_page_free(alias_page);
3158                 vm_page_unlock_queues();
3159         }
3160
3161         if(do_m_lock) {
3162            vm_prot_t    access_required;
3163            /* call back all associated pages from other users of the pager */
3164            /* all future updates will be on data which is based on the     */
3165            /* changes we are going to make here. Note: it is assumed that  */
3166            /* we already hold copies of the data so we will not be seeing  */
3167            /* an avalanche of incoming data from the pager */
3168            access_required = (cntrl_flags & UPL_COPYOUT_FROM)
3169                                         ? VM_PROT_READ : VM_PROT_WRITE;
3170            while (TRUE) {
3171                 kern_return_t   rc;
3172
3173                 if(!object->pager_ready) {
3174                    wait_result_t wait_result;
3175
3176                    wait_result = vm_object_sleep(object,
3177                                                 VM_OBJECT_EVENT_PAGER_READY,
3178                                                 THREAD_UNINT);
3179                    if (wait_result !=  THREAD_AWAKENED) {
3180                         vm_object_unlock(object);
3181                         return KERN_FAILURE;
3182                    }
3183                    continue;
3184                 }
3185
3186                 vm_object_unlock(object);
3187                 rc = memory_object_data_unlock(
3188                         object->pager,
3189                         dst_offset + object->paging_offset,
3190                         size,
3191                         access_required);
3192                 if (rc != KERN_SUCCESS && rc != MACH_SEND_INTERRUPTED)
3193                         return KERN_FAILURE;
3194                 vm_object_lock(object);
3195
3196                 if (rc == KERN_SUCCESS)
3197                         break;
3198            }
3199
3200            /* lets wait on the last page requested */
3201            /* NOTE: we will have to update lock completed routine to signal */
3202            if(dst_page != VM_PAGE_NULL &&
3203                 (access_required & dst_page->page_lock) != access_required) {
3204                 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
3205                 vm_object_unlock(object);
3206                 thread_block(THREAD_CONTINUE_NULL);
3207                 return KERN_SUCCESS;
3208            }
3209         }
3210
3211         vm_object_unlock(object);
3212         return KERN_SUCCESS;
3213 }
3214
3215 /* JMM - Backward compatability for now */
3216 kern_return_t
3217 vm_fault_list_request(                  /* forward */
3218         memory_object_control_t         control,
3219         vm_object_offset_t      offset,
3220         upl_size_t              size,
3221         upl_t                   *upl_ptr,
3222         upl_page_info_t         **user_page_list_ptr,
3223         int                     page_list_count,
3224         int                     cntrl_flags);
3225 kern_return_t
3226 vm_fault_list_request(
3227         memory_object_control_t         control,
3228         vm_object_offset_t      offset,
3229         upl_size_t              size,
3230         upl_t                   *upl_ptr,
3231         upl_page_info_t         **user_page_list_ptr,
3232         int                     page_list_count,
3233         int                     cntrl_flags)
3234 {
3235         int                     local_list_count;
3236         upl_page_info_t         *user_page_list;
3237         kern_return_t           kr;
3238
3239         if (user_page_list_ptr != NULL) {
3240                 local_list_count = page_list_count;
3241                 user_page_list = *user_page_list_ptr;
3242         } else {
3243                 local_list_count = 0;
3244                 user_page_list = NULL;
3245         }
3246         kr =  memory_object_upl_request(control,
3247                                 offset,
3248                                 size,
3249                                 upl_ptr,
3250                                 user_page_list,
3251                                 &local_list_count,
3252                                 cntrl_flags);
3253
3254         if(kr != KERN_SUCCESS)
3255                 return kr;
3256
3257         if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3258                 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3259         }
3260
3261         return KERN_SUCCESS;
3262 }
3263
3264
3265
3266 /*
3267  *      Routine:        vm_object_super_upl_request
3268  *      Purpose:
3269  *              Cause the population of a portion of a vm_object
3270  *              in much the same way as memory_object_upl_request.
3271  *              Depending on the nature of the request, the pages
3272  *              returned may be contain valid data or be uninitialized.
3273  *              However, the region may be expanded up to the super
3274  *              cluster size provided.
3275  */
3276
3277 __private_extern__ kern_return_t
3278 vm_object_super_upl_request(
3279         vm_object_t object,
3280         vm_object_offset_t      offset,
3281         upl_size_t              size,
3282         upl_size_t              super_cluster,
3283         upl_t                   *upl,
3284         upl_page_info_t         *user_page_list,
3285         unsigned int            *page_list_count,
3286         int                     cntrl_flags)
3287 {
3288         vm_page_t       target_page;
3289         int             ticket;
3290
3291
3292         if(object->paging_offset > offset)
3293                 return KERN_FAILURE;
3294
3295         assert(object->paging_in_progress);
3296         offset = offset - object->paging_offset;
3297
3298         if(cntrl_flags & UPL_FOR_PAGEOUT) {
3299
3300                 vm_object_lock(object);
3301
3302                 if((target_page = vm_page_lookup(object, offset))
3303                                                         != VM_PAGE_NULL) {
3304                         ticket = target_page->page_ticket;
3305                         cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
3306                         cntrl_flags = cntrl_flags |
3307                                 ((ticket << UPL_PAGE_TICKET_SHIFT)
3308                                                         & UPL_PAGE_TICKET_MASK);
3309                 }
3310                 vm_object_unlock(object);
3311         }
3312
3313         if (super_cluster > size) {
3314
3315                 vm_object_offset_t      base_offset;
3316                 upl_size_t              super_size;
3317
3318                 base_offset = (offset &
3319                         ~((vm_object_offset_t) super_cluster - 1));
3320                 super_size = (offset+size) > (base_offset + super_cluster) ?
3321                                 super_cluster<<1 : super_cluster;
3322                 super_size = ((base_offset + super_size) > object->size) ?
3323                                 (object->size - base_offset) : super_size;
3324                 if(offset > (base_offset + super_size))
3325                    panic("vm_object_super_upl_request: Missed target pageout"
3326                          " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3327                          offset, base_offset, super_size, super_cluster,
3328                          size, object->paging_offset);
3329                 /*
3330                  * apparently there is a case where the vm requests a
3331                  * page to be written out who's offset is beyond the
3332                  * object size
3333                  */
3334                 if((offset + size) > (base_offset + super_size))
3335                    super_size = (offset + size) - base_offset;
3336
3337                 offset = base_offset;
3338                 size = super_size;
3339         }
3340         return vm_object_upl_request(object, offset, size,
3341                                      upl, user_page_list, page_list_count,
3342                                      cntrl_flags);
3343 }
3344
3345
3346 kern_return_t
3347 vm_map_create_upl(
3348         vm_map_t                map,
3349         vm_map_address_t        offset,
3350         upl_size_t              *upl_size,
3351         upl_t                   *upl,
3352         upl_page_info_array_t   page_list,
3353         unsigned int            *count,
3354         int                     *flags)
3355 {
3356         vm_map_entry_t  entry;
3357         int             caller_flags;
3358         int             force_data_sync;
3359         int             sync_cow_data;
3360         vm_object_t     local_object;
3361         vm_map_offset_t local_offset;
3362         vm_map_offset_t local_start;
3363         kern_return_t   ret;
3364
3365         caller_flags = *flags;
3366
3367         if (caller_flags & ~UPL_VALID_FLAGS) {
3368                 /*
3369                  * For forward compatibility's sake,
3370                  * reject any unknown flag.
3371                  */
3372                 return KERN_INVALID_VALUE;
3373         }
3374
3375         force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3376         sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3377
3378         if(upl == NULL)
3379                 return KERN_INVALID_ARGUMENT;
3380
3381
3382 REDISCOVER_ENTRY:
3383         vm_map_lock(map);
3384         if (vm_map_lookup_entry(map, offset, &entry)) {
3385                 if (entry->object.vm_object == VM_OBJECT_NULL ||
3386                         !entry->object.vm_object->phys_contiguous) {
3387                         if((*upl_size/page_size) > MAX_UPL_TRANSFER) {
3388                                 *upl_size = MAX_UPL_TRANSFER * page_size;
3389                         }
3390                 }
3391                 if((entry->vme_end - offset) < *upl_size) {
3392                         *upl_size = entry->vme_end - offset;
3393                 }
3394                 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3395                         if (entry->object.vm_object == VM_OBJECT_NULL) {
3396                                 *flags = 0;
3397                         } else if (entry->object.vm_object->private) {
3398                                 *flags = UPL_DEV_MEMORY;
3399                                 if (entry->object.vm_object->phys_contiguous) {
3400                                         *flags |= UPL_PHYS_CONTIG;
3401                                 }
3402                         } else  {
3403                                 *flags = 0;
3404                         }
3405                         vm_map_unlock(map);
3406                         return KERN_SUCCESS;
3407                 }
3408                 /*
3409                  *      Create an object if necessary.
3410                  */
3411                 if (entry->object.vm_object == VM_OBJECT_NULL) {
3412                         entry->object.vm_object = vm_object_allocate(
3413                                 (vm_size_t)(entry->vme_end - entry->vme_start));
3414                         entry->offset = 0;
3415                 }
3416                 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3417                         if (!(entry->protection & VM_PROT_WRITE)) {
3418                                 vm_map_unlock(map);
3419                                 return KERN_PROTECTION_FAILURE;
3420                         }
3421                         if (entry->needs_copy)  {
3422                                 vm_map_t                local_map;
3423                                 vm_object_t             object;
3424                                 vm_map_offset_t         offset_hi;
3425                                 vm_map_offset_t         offset_lo;
3426                                 vm_object_offset_t      new_offset;
3427                                 vm_prot_t               prot;
3428                                 boolean_t               wired;
3429                                 vm_behavior_t           behavior;
3430                                 vm_map_version_t        version;
3431                                 vm_map_t                real_map;
3432
3433                                 local_map = map;
3434                                 vm_map_lock_write_to_read(map);
3435                                 if(vm_map_lookup_locked(&local_map,
3436                                         offset, VM_PROT_WRITE,
3437                                         &version, &object,
3438                                         &new_offset, &prot, &wired,
3439                                         &behavior, &offset_lo,
3440                                         &offset_hi, &real_map)) {
3441                                         vm_map_unlock(local_map);
3442                                         return KERN_FAILURE;
3443                                 }
3444                                 if (real_map != map) {
3445                                         vm_map_unlock(real_map);
3446                                 }
3447                                 vm_object_unlock(object);
3448                                 vm_map_unlock(local_map);
3449
3450                                 goto REDISCOVER_ENTRY;
3451                         }
3452                 }
3453                 if (entry->is_sub_map) {
3454                         vm_map_t        submap;
3455
3456                         submap = entry->object.sub_map;
3457                         local_start = entry->vme_start;
3458                         local_offset = entry->offset;
3459                         vm_map_reference(submap);
3460                         vm_map_unlock(map);
3461
3462                         ret = (vm_map_create_upl(submap,
3463                                 local_offset + (offset - local_start),
3464                                 upl_size, upl, page_list, count,
3465                                 flags));
3466
3467                         vm_map_deallocate(submap);
3468                         return ret;
3469                 }
3470
3471                 if (sync_cow_data) {
3472                         if (entry->object.vm_object->shadow
3473                                     || entry->object.vm_object->copy) {
3474
3475                                 local_object = entry->object.vm_object;
3476                                 local_start = entry->vme_start;
3477                                 local_offset = entry->offset;
3478                                 vm_object_reference(local_object);
3479                                 vm_map_unlock(map);
3480
3481                                 if (entry->object.vm_object->shadow &&
3482                                            entry->object.vm_object->copy) {
3483                                    vm_object_lock_request(
3484                                         local_object->shadow,
3485                                         (vm_object_offset_t)
3486                                         ((offset - local_start) +
3487                                          local_offset) +
3488                                         local_object->shadow_offset,
3489                                         *upl_size, FALSE,
3490                                         MEMORY_OBJECT_DATA_SYNC,
3491                                         VM_PROT_NO_CHANGE);
3492                                 }
3493                                 sync_cow_data = FALSE;
3494                                 vm_object_deallocate(local_object);
3495                                 goto REDISCOVER_ENTRY;
3496                         }
3497                 }
3498
3499                 if (force_data_sync) {
3500
3501                         local_object = entry->object.vm_object;
3502                         local_start = entry->vme_start;
3503                         local_offset = entry->offset;
3504                         vm_object_reference(local_object);
3505                         vm_map_unlock(map);
3506
3507                         vm_object_lock_request(
3508                                    local_object,
3509                                    (vm_object_offset_t)
3510                                    ((offset - local_start) + local_offset),
3511                                    (vm_object_size_t)*upl_size, FALSE,
3512                                    MEMORY_OBJECT_DATA_SYNC,
3513                                    VM_PROT_NO_CHANGE);
3514                         force_data_sync = FALSE;
3515                         vm_object_deallocate(local_object);
3516                         goto REDISCOVER_ENTRY;
3517                 }
3518
3519                 if(!(entry->object.vm_object->private)) {
3520                         if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
3521                                 *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
3522                         if(entry->object.vm_object->phys_contiguous) {
3523                                 *flags = UPL_PHYS_CONTIG;
3524                         } else {
3525                                 *flags = 0;
3526                         }
3527                 } else {
3528                         *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
3529                 }
3530                 local_object = entry->object.vm_object;
3531                 local_offset = entry->offset;
3532                 local_start = entry->vme_start;
3533                 vm_object_reference(local_object);
3534                 vm_map_unlock(map);
3535                 if(caller_flags & UPL_SET_IO_WIRE) {
3536                         ret = (vm_object_iopl_request(local_object,
3537                                 (vm_object_offset_t)
3538                                    ((offset - local_start)
3539                                                 + local_offset),
3540                                 *upl_size,
3541                                 upl,
3542                                 page_list,
3543                                 count,
3544                                 caller_flags));
3545                 } else {
3546                         ret = (vm_object_upl_request(local_object,
3547                                 (vm_object_offset_t)
3548                                    ((offset - local_start)
3549                                                 + local_offset),
3550                                 *upl_size,
3551                                 upl,
3552                                 page_list,
3553                                 count,
3554                                 caller_flags));
3555                 }
3556                 vm_object_deallocate(local_object);
3557                 return(ret);
3558         }
3559
3560         vm_map_unlock(map);
3561         return(KERN_FAILURE);
3562
3563 }
3564
3565 /*
3566  * Internal routine to enter a UPL into a VM map.
3567  *
3568  * JMM - This should just be doable through the standard
3569  * vm_map_enter() API.
3570  */
3571 kern_return_t
3572 vm_map_enter_upl(
3573         vm_map_t                map,
3574         upl_t                   upl,
3575         vm_map_offset_t *dst_addr)
3576 {
3577         vm_map_size_t           size;
3578         vm_object_offset_t      offset;
3579         vm_map_offset_t         addr;
3580         vm_page_t               m;
3581         kern_return_t           kr;
3582
3583         if (upl == UPL_NULL)
3584                 return KERN_INVALID_ARGUMENT;
3585
3586         upl_lock(upl);
3587
3588         /* check to see if already mapped */
3589         if(UPL_PAGE_LIST_MAPPED & upl->flags) {
3590                 upl_unlock(upl);
3591                 return KERN_FAILURE;
3592         }
3593
3594         if((!(upl->map_object->pageout)) &&
3595                 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3596                                         (upl->map_object->phys_contiguous))) {
3597                 vm_object_t             object;
3598                 vm_page_t               alias_page;
3599                 vm_object_offset_t      new_offset;
3600                 int                     pg_num;
3601                 wpl_array_t             lite_list;
3602
3603                 if(upl->flags & UPL_INTERNAL) {
3604                         lite_list = (wpl_array_t)
3605                                 ((((uintptr_t)upl) + sizeof(struct upl))
3606                                 + ((upl->size/PAGE_SIZE)
3607                                                 * sizeof(upl_page_info_t)));
3608                 } else {
3609                         lite_list = (wpl_array_t)
3610                                 (((uintptr_t)upl) + sizeof(struct upl));
3611                 }
3612                 object = upl->map_object;
3613                 upl->map_object = vm_object_allocate(upl->size);
3614                 vm_object_lock(upl->map_object);
3615                 upl->map_object->shadow = object;
3616                 upl->map_object->pageout = TRUE;
3617                 upl->map_object->can_persist = FALSE;
3618                 upl->map_object->copy_strategy =
3619                                 MEMORY_OBJECT_COPY_NONE;
3620                 upl->map_object->shadow_offset =
3621                                 upl->offset - object->paging_offset;
3622                 upl->map_object->wimg_bits = object->wimg_bits;
3623                 offset = upl->map_object->shadow_offset;
3624                 new_offset = 0;
3625                 size = upl->size;
3626
3627                 vm_object_lock(object);
3628
3629                 while(size) {
3630                    pg_num = (new_offset)/PAGE_SIZE;
3631                    if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3632                         vm_object_unlock(object);
3633                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
3634                         vm_object_lock(object);
3635                         m = vm_page_lookup(object, offset);
3636                         if (m == VM_PAGE_NULL) {
3637                                 panic("vm_upl_map: page missing\n");
3638                         }
3639
3640                         vm_object_paging_begin(object);
3641
3642                         /*
3643                         * Convert the fictitious page to a private
3644                          * shadow of the real page.
3645                          */
3646                         assert(alias_page->fictitious);
3647                         alias_page->fictitious = FALSE;
3648                         alias_page->private = TRUE;
3649                         alias_page->pageout = TRUE;
3650                         alias_page->phys_page = m->phys_page;
3651
3652                         vm_page_lock_queues();
3653                         vm_page_wire(alias_page);
3654                         vm_page_unlock_queues();
3655
3656                         /*
3657                          * ENCRYPTED SWAP:
3658                          * The virtual page ("m") has to be wired in some way
3659                          * here or its physical page ("m->phys_page") could
3660                          * be recycled at any time.
3661                          * Assuming this is enforced by the caller, we can't
3662                          * get an encrypted page here.  Since the encryption
3663                          * key depends on the VM page's "pager" object and
3664                          * the "paging_offset", we couldn't handle 2 pageable
3665                          * VM pages (with different pagers and paging_offsets)
3666                          * sharing the same physical page:  we could end up
3667                          * encrypting with one key (via one VM page) and
3668                          * decrypting with another key (via the alias VM page).
3669                          */
3670                         ASSERT_PAGE_DECRYPTED(m);
3671
3672                         vm_page_insert(alias_page,
3673                                         upl->map_object, new_offset);
3674                         assert(!alias_page->wanted);
3675                         alias_page->busy = FALSE;
3676                         alias_page->absent = FALSE;
3677                    }
3678
3679                    size -= PAGE_SIZE;
3680                    offset += PAGE_SIZE_64;
3681                    new_offset += PAGE_SIZE_64;
3682                 }
3683                 vm_object_unlock(object);
3684                 vm_object_unlock(upl->map_object);
3685         }
3686         if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3687                 offset = upl->offset - upl->map_object->paging_offset;
3688         else
3689                 offset = 0;
3690
3691         size = upl->size;
3692
3693         vm_object_lock(upl->map_object);
3694         upl->map_object->ref_count++;
3695         vm_object_res_reference(upl->map_object);
3696         vm_object_unlock(upl->map_object);
3697
3698         *dst_addr = 0;
3699
3700
3701         /* NEED A UPL_MAP ALIAS */
3702         kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3703                 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
3704                 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3705
3706         if (kr != KERN_SUCCESS) {
3707                 upl_unlock(upl);
3708                 return(kr);
3709         }
3710
3711         vm_object_lock(upl->map_object);
3712
3713         for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
3714                 m = vm_page_lookup(upl->map_object, offset);
3715                 if(m) {
3716                    unsigned int cache_attr;
3717                    cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3718
3719                    PMAP_ENTER(map->pmap, addr,
3720                                 m, VM_PROT_ALL,
3721                                 cache_attr, TRUE);
3722                 }
3723                 offset+=PAGE_SIZE_64;
3724         }
3725         vm_object_unlock(upl->map_object);
3726
3727         upl->ref_count++;  /* hold a reference for the mapping */
3728         upl->flags |= UPL_PAGE_LIST_MAPPED;
3729         upl->kaddr = *dst_addr;
3730         upl_unlock(upl);
3731         return KERN_SUCCESS;
3732 }
3733
3734 /*
3735  * Internal routine to remove a UPL mapping from a VM map.
3736  *
3737  * XXX - This should just be doable through a standard
3738  * vm_map_remove() operation.  Otherwise, implicit clean-up
3739  * of the target map won't be able to correctly remove
3740  * these (and release the reference on the UPL).  Having
3741  * to do this means we can't map these into user-space
3742  * maps yet.
3743  */
3744 kern_return_t
3745 vm_map_remove_upl(
3746         vm_map_t        map,
3747         upl_t           upl)
3748 {
3749         vm_address_t    addr;
3750         upl_size_t      size;
3751
3752         if (upl == UPL_NULL)
3753                 return KERN_INVALID_ARGUMENT;
3754
3755         upl_lock(upl);
3756         if(upl->flags & UPL_PAGE_LIST_MAPPED) {
3757                 addr = upl->kaddr;
3758                 size = upl->size;
3759                 assert(upl->ref_count > 1);
3760                 upl->ref_count--;               /* removing mapping ref */
3761                 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3762                 upl->kaddr = (vm_offset_t) 0;
3763                 upl_unlock(upl);
3764
3765                 vm_map_remove(  map,
3766                                 vm_map_trunc_page(addr),
3767                                 vm_map_round_page(addr + size),
3768                                 VM_MAP_NO_FLAGS);
3769                 return KERN_SUCCESS;
3770         }
3771         upl_unlock(upl);
3772         return KERN_FAILURE;
3773 }
3774
3775 kern_return_t
3776 upl_commit_range(
3777         upl_t                   upl,
3778         upl_offset_t            offset,
3779         upl_size_t              size,
3780         int                     flags,
3781         upl_page_info_t         *page_list,
3782         mach_msg_type_number_t  count,
3783         boolean_t               *empty)
3784 {
3785         upl_size_t              xfer_size = size;
3786         vm_object_t             shadow_object;
3787         vm_object_t             object = upl->map_object;
3788         vm_object_offset_t      target_offset;
3789         int                     entry;
3790         wpl_array_t             lite_list;
3791         int                     occupied;
3792         int                     delayed_unlock = 0;
3793         int                     clear_refmod = 0;
3794         boolean_t               shadow_internal;
3795
3796         *empty = FALSE;
3797
3798         if (upl == UPL_NULL)
3799                 return KERN_INVALID_ARGUMENT;
3800
3801
3802         if (count == 0)
3803                 page_list = NULL;
3804
3805         if (object->pageout) {
3806                 shadow_object = object->shadow;
3807         } else {
3808                 shadow_object = object;
3809         }
3810
3811         upl_lock(upl);
3812
3813         if (upl->flags & UPL_ACCESS_BLOCKED) {
3814                 /*
3815                  * We used this UPL to block access to the pages by marking
3816                  * them "busy".  Now we need to clear the "busy" bit to allow
3817                  * access to these pages again.
3818                  */
3819                 flags |= UPL_COMMIT_ALLOW_ACCESS;
3820         }
3821
3822         if (upl->flags & UPL_CLEAR_DIRTY)
3823                 flags |= UPL_COMMIT_CLEAR_DIRTY;
3824
3825         if (upl->flags & UPL_DEVICE_MEMORY) {
3826                 xfer_size = 0;
3827         } else if ((offset + size) > upl->size) {
3828                 upl_unlock(upl);
3829                 return KERN_FAILURE;
3830         }
3831
3832         if (upl->flags & UPL_INTERNAL) {
3833                 lite_list = (wpl_array_t)
3834                         ((((uintptr_t)upl) + sizeof(struct upl))
3835                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3836         } else {
3837                 lite_list = (wpl_array_t)
3838                         (((uintptr_t)upl) + sizeof(struct upl));
3839         }
3840         if (object != shadow_object)
3841                 vm_object_lock(object);
3842         vm_object_lock(shadow_object);
3843
3844         shadow_internal = shadow_object->internal;
3845
3846         entry = offset/PAGE_SIZE;
3847         target_offset = (vm_object_offset_t)offset;
3848
3849         while (xfer_size) {
3850                 vm_page_t       t,m;
3851                 upl_page_info_t *p;
3852
3853                 m = VM_PAGE_NULL;
3854
3855                 if (upl->flags & UPL_LITE) {
3856                         int     pg_num;
3857
3858                         pg_num = target_offset/PAGE_SIZE;
3859
3860                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3861                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3862                                 m = vm_page_lookup(shadow_object,
3863                                                    target_offset + (upl->offset -
3864                                                                     shadow_object->paging_offset));
3865                         }
3866                 }
3867                 if (object->pageout) {
3868                         if ((t = vm_page_lookup(object, target_offset)) != NULL) {
3869                                 t->pageout = FALSE;
3870
3871                                 if (delayed_unlock) {
3872                                         delayed_unlock = 0;
3873                                         vm_page_unlock_queues();
3874                                 }
3875                                 VM_PAGE_FREE(t);
3876
3877                                 if (m == NULL) {
3878                                         m = vm_page_lookup(
3879                                             shadow_object,
3880                                             target_offset +
3881                                                 object->shadow_offset);
3882                                 }
3883                                 if (m != VM_PAGE_NULL)
3884                                         vm_object_paging_end(m->object);
3885                         }
3886                 }
3887                 if (m != VM_PAGE_NULL) {
3888
3889                    clear_refmod = 0;
3890
3891                    if (upl->flags & UPL_IO_WIRE) {
3892
3893                         if (delayed_unlock == 0)
3894                                 vm_page_lock_queues();
3895
3896                         vm_page_unwire(m);
3897
3898                         if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3899                                 delayed_unlock = 0;
3900                                 vm_page_unlock_queues();
3901                         }
3902                         if (page_list) {
3903                                 page_list[entry].phys_addr = 0;
3904                         }
3905                         if (flags & UPL_COMMIT_SET_DIRTY) {
3906                                 m->dirty = TRUE;
3907                         } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3908                                 m->dirty = FALSE;
3909                                 clear_refmod |= VM_MEM_MODIFIED;
3910                         }
3911                         if (flags & UPL_COMMIT_INACTIVATE) {
3912                                 m->reference = FALSE;
3913                                 clear_refmod |= VM_MEM_REFERENCED;
3914                                 vm_page_deactivate(m);
3915                         }
3916                         if (clear_refmod)
3917                                 pmap_clear_refmod(m->phys_page, clear_refmod);
3918
3919                         if (flags & UPL_COMMIT_ALLOW_ACCESS) {
3920                                 /*
3921                                  * We blocked access to the pages in this UPL.
3922                                  * Clear the "busy" bit and wake up any waiter
3923                                  * for this page.
3924                                  */
3925                                 PAGE_WAKEUP_DONE(m);
3926                         }
3927
3928                         target_offset += PAGE_SIZE_64;
3929                         xfer_size -= PAGE_SIZE;
3930                         entry++;
3931                         continue;
3932                    }
3933                    if (delayed_unlock == 0)
3934                         vm_page_lock_queues();
3935                    /*
3936                     * make sure to clear the hardware
3937                     * modify or reference bits before
3938                     * releasing the BUSY bit on this page
3939                     * otherwise we risk losing a legitimate
3940                     * change of state
3941                     */
3942                    if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3943                         m->dirty = FALSE;
3944                         clear_refmod |= VM_MEM_MODIFIED;
3945                    }
3946                    if (flags & UPL_COMMIT_INACTIVATE)
3947                         clear_refmod |= VM_MEM_REFERENCED;
3948
3949                    if (clear_refmod)
3950                         pmap_clear_refmod(m->phys_page, clear_refmod);
3951
3952                    if (page_list) {
3953                         p = &(page_list[entry]);
3954                         if(p->phys_addr && p->pageout && !m->pageout) {
3955                                 m->busy = TRUE;
3956                                 m->pageout = TRUE;
3957                                 vm_page_wire(m);
3958                         } else if (page_list[entry].phys_addr &&
3959                                         !p->pageout && m->pageout &&
3960                                         !m->dump_cleaning) {
3961                                 m->pageout = FALSE;
3962                                 m->absent = FALSE;
3963                                 m->overwriting = FALSE;
3964                                 vm_page_unwire(m);
3965                                 PAGE_WAKEUP_DONE(m);
3966                         }
3967                         page_list[entry].phys_addr = 0;
3968                    }
3969                    m->dump_cleaning = FALSE;
3970                    if(m->laundry) {
3971                            vm_pageout_throttle_up(m);
3972                    }
3973                    if(m->pageout) {
3974                       m->cleaning = FALSE;
3975                       m->pageout = FALSE;
3976 #if MACH_CLUSTER_STATS
3977                       if (m->wanted) vm_pageout_target_collisions++;
3978 #endif
3979                       if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
3980                               m->dirty = TRUE;
3981                       else
3982                               m->dirty = FALSE;
3983
3984                       if(m->dirty) {
3985                               vm_page_unwire(m);/* reactivates */
3986
3987                               if (upl->flags & UPL_PAGEOUT) {
3988                                       CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
3989                                       VM_STAT(reactivations++);
3990                               }
3991                               PAGE_WAKEUP_DONE(m);
3992                       } else {
3993                             vm_page_free(m);/* clears busy, etc. */
3994
3995                             if (upl->flags & UPL_PAGEOUT) {
3996                                     CLUSTER_STAT(vm_pageout_target_page_freed++;)
3997
3998                                     if (page_list[entry].dirty)
3999                                             VM_STAT(pageouts++);
4000                             }
4001                       }
4002                       if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4003                             delayed_unlock = 0;
4004                             vm_page_unlock_queues();
4005                       }
4006                       target_offset += PAGE_SIZE_64;
4007                       xfer_size -= PAGE_SIZE;
4008                       entry++;
4009                       continue;
4010                    }
4011 #if MACH_CLUSTER_STATS
4012                    m->dirty = pmap_is_modified(m->phys_page);
4013
4014                    if (m->dirty)   vm_pageout_cluster_dirtied++;
4015                    else            vm_pageout_cluster_cleaned++;
4016                    if (m->wanted)  vm_pageout_cluster_collisions++;
4017 #else
4018                    m->dirty = 0;
4019 #endif
4020
4021                    if((m->busy) && (m->cleaning)) {
4022                         /* the request_page_list case */
4023                         if(m->absent) {
4024                                 m->absent = FALSE;
4025                                 if(shadow_object->absent_count == 1)
4026                                       vm_object_absent_release(shadow_object);
4027                                 else
4028                                       shadow_object->absent_count--;
4029                         }
4030                         m->overwriting = FALSE;
4031                         m->busy = FALSE;
4032                         m->dirty = FALSE;
4033                    } else if (m->overwriting) {
4034                          /* alternate request page list, write to
4035                           * page_list case.  Occurs when the original
4036                           * page was wired at the time of the list
4037                           * request */
4038                          assert(m->wire_count != 0);
4039                          vm_page_unwire(m);/* reactivates */
4040                          m->overwriting = FALSE;
4041                    }
4042                    m->cleaning = FALSE;
4043
4044                    /* It is a part of the semantic of COPYOUT_FROM */
4045                    /* UPLs that a commit implies cache sync           */
4046                    /* between the vm page and the backing store    */
4047                    /* this can be used to strip the precious bit   */
4048                    /* as well as clean */
4049                    if (upl->flags & UPL_PAGE_SYNC_DONE)
4050                          m->precious = FALSE;
4051
4052                    if (flags & UPL_COMMIT_SET_DIRTY)
4053                         m->dirty = TRUE;
4054
4055                    if (flags & UPL_COMMIT_INACTIVATE) {
4056                         m->reference = FALSE;
4057                         vm_page_deactivate(m);
4058                    } else if (!m->active && !m->inactive) {
4059                         if (m->reference)
4060                                 vm_page_activate(m);
4061                         else
4062                                 vm_page_deactivate(m);
4063                    }
4064
4065                    if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4066                            /*
4067                             * We blocked access to the pages in this URL.
4068                             * Clear the "busy" bit on this page before we
4069                             * wake up any waiter.
4070                             */
4071                            m->busy = FALSE;
4072                    }
4073
4074                    /*
4075                     * Wakeup any thread waiting for the page to be un-cleaning.
4076                     */
4077                    PAGE_WAKEUP(m);
4078
4079                    if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4080                          delayed_unlock = 0;
4081                          vm_page_unlock_queues();
4082                    }
4083                 }
4084                 target_offset += PAGE_SIZE_64;
4085                 xfer_size -= PAGE_SIZE;
4086                 entry++;
4087         }
4088         if (delayed_unlock)
4089                 vm_page_unlock_queues();
4090
4091         occupied = 1;
4092
4093         if (upl->flags & UPL_DEVICE_MEMORY)  {
4094                 occupied = 0;
4095         } else if (upl->flags & UPL_LITE) {
4096                 int     pg_num;
4097                 int     i;
4098                 pg_num = upl->size/PAGE_SIZE;
4099                 pg_num = (pg_num + 31) >> 5;
4100                 occupied = 0;
4101                 for(i= 0; i<pg_num; i++) {
4102                         if(lite_list[i] != 0) {
4103                                 occupied = 1;
4104                                 break;
4105                         }
4106                 }
4107         } else {
4108                 if(queue_empty(&upl->map_object->memq)) {
4109                         occupied = 0;
4110                 }
4111         }
4112
4113         if(occupied == 0) {
4114                 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
4115                         *empty = TRUE;
4116                 }
4117                 if(object == shadow_object)
4118                         vm_object_paging_end(shadow_object);
4119         }
4120         vm_object_unlock(shadow_object);
4121         if (object != shadow_object)
4122                 vm_object_unlock(object);
4123         upl_unlock(upl);
4124
4125         return KERN_SUCCESS;
4126 }
4127
4128 kern_return_t
4129 upl_abort_range(
4130         upl_t                   upl,
4131         upl_offset_t            offset,
4132         upl_size_t              size,
4133         int                     error,
4134         boolean_t               *empty)
4135 {
4136         upl_size_t              xfer_size = size;
4137         vm_object_t             shadow_object;
4138         vm_object_t             object = upl->map_object;
4139         vm_object_offset_t      target_offset;
4140         int                     entry;
4141         wpl_array_t             lite_list;
4142         int                     occupied;
4143         boolean_t               shadow_internal;
4144
4145         *empty = FALSE;
4146
4147         if (upl == UPL_NULL)
4148                 return KERN_INVALID_ARGUMENT;
4149
4150         if (upl->flags & UPL_IO_WIRE) {
4151                 return upl_commit_range(upl,
4152                         offset, size, 0,
4153                         NULL, 0, empty);
4154         }
4155
4156         if(object->pageout) {
4157                 shadow_object = object->shadow;
4158         } else {
4159                 shadow_object = object;
4160         }
4161
4162         upl_lock(upl);
4163         if(upl->flags & UPL_DEVICE_MEMORY) {
4164                 xfer_size = 0;
4165         } else if ((offset + size) > upl->size) {
4166                 upl_unlock(upl);
4167                 return KERN_FAILURE;
4168         }
4169         if (object != shadow_object)
4170                 vm_object_lock(object);
4171         vm_object_lock(shadow_object);
4172
4173         shadow_internal = shadow_object->internal;
4174
4175         if(upl->flags & UPL_INTERNAL) {
4176                 lite_list = (wpl_array_t)
4177                         ((((uintptr_t)upl) + sizeof(struct upl))
4178                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4179         } else {
4180                 lite_list = (wpl_array_t)
4181                         (((uintptr_t)upl) + sizeof(struct upl));
4182         }
4183
4184         entry = offset/PAGE_SIZE;
4185         target_offset = (vm_object_offset_t)offset;
4186         while(xfer_size) {
4187                 vm_page_t       t,m;
4188
4189                 m = VM_PAGE_NULL;
4190                 if(upl->flags & UPL_LITE) {
4191                         int     pg_num;
4192                         pg_num = target_offset/PAGE_SIZE;
4193                         if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4194                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4195                                 m = vm_page_lookup(shadow_object,
4196                                         target_offset + (upl->offset -
4197                                                 shadow_object->paging_offset));
4198                         }
4199                 }
4200                 if(object->pageout) {
4201                         if ((t = vm_page_lookup(object, target_offset))
4202                                                                 != NULL) {
4203                                 t->pageout = FALSE;
4204                                 VM_PAGE_FREE(t);
4205                                 if(m == NULL) {
4206                                         m = vm_page_lookup(
4207                                             shadow_object,
4208                                             target_offset +
4209                                                 object->shadow_offset);
4210                                 }
4211                                 if(m != VM_PAGE_NULL)
4212                                         vm_object_paging_end(m->object);
4213                         }
4214                 }
4215                 if(m != VM_PAGE_NULL) {
4216                         vm_page_lock_queues();
4217                         if(m->absent) {
4218                                 boolean_t must_free = TRUE;
4219
4220                                 /* COPYOUT = FALSE case */
4221                                 /* check for error conditions which must */
4222                                 /* be passed back to the pages customer  */
4223                                 if(error & UPL_ABORT_RESTART) {
4224                                         m->restart = TRUE;
4225                                         m->absent = FALSE;
4226                                         vm_object_absent_release(m->object);
4227                                         m->page_error = KERN_MEMORY_ERROR;
4228                                         m->error = TRUE;
4229                                         must_free = FALSE;
4230                                 } else if(error & UPL_ABORT_UNAVAILABLE) {
4231                                         m->restart = FALSE;
4232                                         m->unusual = TRUE;
4233                                         must_free = FALSE;
4234                                 } else if(error & UPL_ABORT_ERROR) {
4235                                         m->restart = FALSE;
4236                                         m->absent = FALSE;
4237                                         vm_object_absent_release(m->object);
4238                                         m->page_error = KERN_MEMORY_ERROR;
4239                                         m->error = TRUE;
4240                                         must_free = FALSE;
4241                                 }
4242
4243                                 /*
4244                                  * ENCRYPTED SWAP:
4245                                  * If the page was already encrypted,
4246                                  * we don't really need to decrypt it
4247                                  * now.  It will get decrypted later,
4248                                  * on demand, as soon as someone needs
4249                                  * to access its contents.
4250                                  */
4251
4252                                 m->cleaning = FALSE;
4253                                 m->overwriting = FALSE;
4254                                 PAGE_WAKEUP_DONE(m);
4255
4256                                 if (must_free == TRUE) {
4257                                         vm_page_free(m);
4258                                 } else {
4259                                         vm_page_activate(m);
4260                                 }
4261                                 vm_page_unlock_queues();
4262
4263                                 target_offset += PAGE_SIZE_64;
4264                                 xfer_size -= PAGE_SIZE;
4265                                 entry++;
4266                                 continue;
4267                         }
4268                         /*
4269                         * Handle the trusted pager throttle.
4270                         */
4271                         if (m->laundry) {
4272                                 vm_pageout_throttle_up(m);
4273                         }
4274                         if(m->pageout) {
4275                                 assert(m->busy);
4276                                 assert(m->wire_count == 1);
4277                                 m->pageout = FALSE;
4278                                 vm_page_unwire(m);
4279                         }
4280                         m->dump_cleaning = FALSE;
4281                         m->cleaning = FALSE;
4282                         m->overwriting = FALSE;
4283 #if     MACH_PAGEMAP
4284                         vm_external_state_clr(
4285                                 m->object->existence_map, m->offset);
4286 #endif  /* MACH_PAGEMAP */
4287                         if(error & UPL_ABORT_DUMP_PAGES) {
4288                                 vm_page_free(m);
4289                                 pmap_disconnect(m->phys_page);
4290                         } else {
4291                                 PAGE_WAKEUP_DONE(m);
4292                         }
4293                         vm_page_unlock_queues();
4294                 }
4295                 target_offset += PAGE_SIZE_64;
4296                 xfer_size -= PAGE_SIZE;
4297                 entry++;
4298         }
4299         occupied = 1;
4300         if (upl->flags & UPL_DEVICE_MEMORY)  {
4301                 occupied = 0;
4302         } else if (upl->flags & UPL_LITE) {
4303                 int     pg_num;
4304                 int     i;
4305                 pg_num = upl->size/PAGE_SIZE;
4306                 pg_num = (pg_num + 31) >> 5;
4307                 occupied = 0;
4308                 for(i= 0; i<pg_num; i++) {
4309                         if(lite_list[i] != 0) {
4310                                 occupied = 1;
4311                                 break;
4312                         }
4313                 }
4314         } else {
4315                 if(queue_empty(&upl->map_object->memq)) {
4316                         occupied = 0;
4317                 }
4318         }
4319
4320         if(occupied == 0) {
4321                 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
4322                         *empty = TRUE;
4323                 }
4324                 if(object == shadow_object)
4325                         vm_object_paging_end(shadow_object);
4326         }
4327         vm_object_unlock(shadow_object);
4328         if (object != shadow_object)
4329                 vm_object_unlock(object);
4330
4331         upl_unlock(upl);
4332
4333         return KERN_SUCCESS;
4334 }
4335
4336 kern_return_t
4337 upl_abort(
4338         upl_t   upl,
4339         int     error)
4340 {
4341         vm_object_t             object = NULL;
4342         vm_object_t             shadow_object = NULL;
4343         vm_object_offset_t      offset;
4344         vm_object_offset_t      shadow_offset;
4345         vm_object_offset_t      target_offset;
4346         upl_size_t              i;
4347         wpl_array_t             lite_list;
4348         vm_page_t               t,m;
4349         int                     occupied;
4350         boolean_t               shadow_internal;
4351
4352         if (upl == UPL_NULL)
4353                 return KERN_INVALID_ARGUMENT;
4354
4355         if (upl->flags & UPL_IO_WIRE) {
4356                 boolean_t       empty;
4357                 return upl_commit_range(upl,
4358                         0, upl->size, 0,
4359                         NULL, 0, &empty);
4360         }
4361
4362         upl_lock(upl);
4363         if(upl->flags & UPL_DEVICE_MEMORY) {
4364                 upl_unlock(upl);
4365                 return KERN_SUCCESS;
4366         }
4367
4368         object = upl->map_object;
4369
4370         if (object == NULL) {
4371                 panic("upl_abort: upl object is not backed by an object");
4372                 upl_unlock(upl);
4373                 return KERN_INVALID_ARGUMENT;
4374         }
4375
4376         if(object->pageout) {
4377                 shadow_object = object->shadow;
4378                 shadow_offset = object->shadow_offset;
4379         } else {
4380                 shadow_object = object;
4381                 shadow_offset = upl->offset - object->paging_offset;
4382         }
4383
4384         if(upl->flags & UPL_INTERNAL) {
4385                 lite_list = (wpl_array_t)
4386                         ((((uintptr_t)upl) + sizeof(struct upl))
4387                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4388         } else {
4389                 lite_list = (wpl_array_t)
4390                         (((uintptr_t)upl) + sizeof(struct upl));
4391         }
4392         offset = 0;
4393
4394         if (object != shadow_object)
4395                 vm_object_lock(object);
4396         vm_object_lock(shadow_object);
4397
4398         shadow_internal = shadow_object->internal;
4399
4400         for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
4401                 m = VM_PAGE_NULL;
4402                 target_offset = offset + shadow_offset;
4403                 if(upl->flags & UPL_LITE) {
4404                         int     pg_num;
4405                         pg_num = offset/PAGE_SIZE;
4406                         if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4407                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4408                                 m = vm_page_lookup(
4409                                         shadow_object, target_offset);
4410                         }
4411                 }
4412                 if(object->pageout) {
4413                         if ((t = vm_page_lookup(object, offset)) != NULL) {
4414                                 t->pageout = FALSE;
4415                                 VM_PAGE_FREE(t);
4416                                 if(m == NULL) {
4417                                         m = vm_page_lookup(
4418                                             shadow_object, target_offset);
4419                                 }
4420                                 if(m != VM_PAGE_NULL)
4421                                         vm_object_paging_end(m->object);
4422                         }
4423                 }
4424                 if(m != VM_PAGE_NULL) {
4425                         vm_page_lock_queues();
4426                         if(m->absent) {
4427                                 boolean_t must_free = TRUE;
4428
4429                                 /* COPYOUT = FALSE case */
4430                                 /* check for error conditions which must */
4431                                 /* be passed back to the pages customer  */
4432                                 if(error & UPL_ABORT_RESTART) {
4433                                         m->restart = TRUE;
4434                                         m->absent = FALSE;
4435                                         vm_object_absent_release(m->object);
4436                                         m->page_error = KERN_MEMORY_ERROR;
4437                                         m->error = TRUE;
4438                                         must_free = FALSE;
4439                                 } else if(error & UPL_ABORT_UNAVAILABLE) {
4440                                         m->restart = FALSE;
4441                                         m->unusual = TRUE;
4442                                         must_free = FALSE;
4443                                 } else if(error & UPL_ABORT_ERROR) {
4444                                         m->restart = FALSE;
4445                                         m->absent = FALSE;
4446                                         vm_object_absent_release(m->object);
4447                                         m->page_error = KERN_MEMORY_ERROR;
4448                                         m->error = TRUE;
4449                                         must_free = FALSE;
4450                                 }
4451
4452                                 /*
4453                                  * ENCRYPTED SWAP:
4454                                  * If the page was already encrypted,
4455                                  * we don't really need to decrypt it
4456                                  * now.  It will get decrypted later,
4457                                  * on demand, as soon as someone needs
4458                                  * to access its contents.
4459                                  */
4460
4461                                 m->cleaning = FALSE;
4462                                 m->overwriting = FALSE;
4463                                 PAGE_WAKEUP_DONE(m);
4464
4465                                 if (must_free == TRUE) {
4466                                         vm_page_free(m);
4467                                 } else {
4468                                         vm_page_activate(m);
4469                                 }
4470                                 vm_page_unlock_queues();
4471                                 continue;
4472                         }
4473                         /*
4474                          * Handle the trusted pager throttle.
4475                          */
4476                         if (m->laundry) {
4477                                 vm_pageout_throttle_up(m);
4478                         }
4479                         if(m->pageout) {
4480                                 assert(m->busy);
4481                                 assert(m->wire_count == 1);
4482                                 m->pageout = FALSE;
4483                                 vm_page_unwire(m);
4484                         }
4485                         m->dump_cleaning = FALSE;
4486                         m->cleaning = FALSE;
4487                         m->overwriting = FALSE;
4488 #if     MACH_PAGEMAP
4489                         vm_external_state_clr(
4490                                 m->object->existence_map, m->offset);
4491 #endif  /* MACH_PAGEMAP */
4492                         if(error & UPL_ABORT_DUMP_PAGES) {
4493                                 vm_page_free(m);
4494                                 pmap_disconnect(m->phys_page);
4495                         } else {
4496                                 PAGE_WAKEUP_DONE(m);
4497                         }
4498                         vm_page_unlock_queues();
4499                 }
4500         }
4501         occupied = 1;
4502         if (upl->flags & UPL_DEVICE_MEMORY)  {
4503                 occupied = 0;
4504         } else if (upl->flags & UPL_LITE) {
4505                 int     pg_num;
4506                 int     j;
4507                 pg_num = upl->size/PAGE_SIZE;
4508                 pg_num = (pg_num + 31) >> 5;
4509                 occupied = 0;
4510                 for(j= 0; j<pg_num; j++) {
4511                         if(lite_list[j] != 0) {
4512                                 occupied = 1;
4513                                 break;
4514                         }
4515                 }
4516         } else {
4517                 if(queue_empty(&upl->map_object->memq)) {
4518                         occupied = 0;
4519                 }
4520         }
4521
4522         if(occupied == 0) {
4523                 if(object == shadow_object)
4524                         vm_object_paging_end(shadow_object);
4525         }
4526         vm_object_unlock(shadow_object);
4527         if (object != shadow_object)
4528                 vm_object_unlock(object);
4529
4530         upl_unlock(upl);
4531         return KERN_SUCCESS;
4532 }
4533
4534 /* an option on commit should be wire */
4535 kern_return_t
4536 upl_commit(
4537         upl_t                   upl,
4538         upl_page_info_t         *page_list,
4539         mach_msg_type_number_t  count)
4540 {
4541         if (upl == UPL_NULL)
4542                 return KERN_INVALID_ARGUMENT;
4543
4544         if(upl->flags & (UPL_LITE | UPL_IO_WIRE)) {
4545                 boolean_t       empty;
4546                 return upl_commit_range(upl, 0, upl->size, 0,
4547                                         page_list, count, &empty);
4548         }
4549
4550         if (count == 0)
4551                 page_list = NULL;
4552
4553         upl_lock(upl);
4554         if (upl->flags & UPL_DEVICE_MEMORY)
4555                 page_list = NULL;
4556
4557         if (upl->flags & UPL_ENCRYPTED) {
4558                 /*
4559                  * ENCRYPTED SWAP:
4560                  * This UPL was encrypted, but we don't need
4561                  * to decrypt here.  We'll decrypt each page
4562                  * later, on demand, as soon as someone needs
4563                  * to access the page's contents.
4564                  */
4565         }
4566
4567         if ((upl->flags & UPL_CLEAR_DIRTY) ||
4568                 (upl->flags & UPL_PAGE_SYNC_DONE) || page_list) {
4569                 vm_object_t     shadow_object = upl->map_object->shadow;
4570                 vm_object_t     object = upl->map_object;
4571                 vm_object_offset_t target_offset;
4572                 upl_size_t      xfer_end;
4573                 int             entry;
4574
4575                 vm_page_t       t, m;
4576                 upl_page_info_t *p;
4577
4578                 if (object != shadow_object)
4579                         vm_object_lock(object);
4580                 vm_object_lock(shadow_object);
4581
4582                 entry = 0;
4583                 target_offset = object->shadow_offset;
4584                 xfer_end = upl->size + object->shadow_offset;
4585
4586                 while(target_offset < xfer_end) {
4587
4588                         if ((t = vm_page_lookup(object,
4589                                 target_offset - object->shadow_offset))
4590                                 == NULL) {
4591                                 target_offset += PAGE_SIZE_64;
4592                                 entry++;
4593                                 continue;
4594                         }
4595
4596                         m = vm_page_lookup(shadow_object, target_offset);
4597                         if(m != VM_PAGE_NULL) {
4598                             /*
4599                              * ENCRYPTED SWAP:
4600                              * If this page was encrypted, we
4601                              * don't need to decrypt it here.
4602                              * We'll decrypt it later, on demand,
4603                              * as soon as someone needs to access
4604                              * its contents.
4605                              */
4606
4607                             if (upl->flags & UPL_CLEAR_DIRTY) {
4608                                 pmap_clear_modify(m->phys_page);
4609                                 m->dirty = FALSE;
4610                             }
4611                             /* It is a part of the semantic of */
4612                             /* COPYOUT_FROM UPLs that a commit */
4613                             /* implies cache sync between the  */
4614                             /* vm page and the backing store   */
4615                             /* this can be used to strip the   */
4616                             /* precious bit as well as clean   */
4617                             if (upl->flags & UPL_PAGE_SYNC_DONE)
4618                                 m->precious = FALSE;
4619
4620                            if(page_list) {
4621                                 p = &(page_list[entry]);
4622                                 if(page_list[entry].phys_addr &&
4623                                                 p->pageout && !m->pageout) {
4624                                         vm_page_lock_queues();
4625                                         m->busy = TRUE;
4626                                         m->pageout = TRUE;
4627                                         vm_page_wire(m);
4628                                         vm_page_unlock_queues();
4629                                 } else if (page_list[entry].phys_addr &&
4630                                                 !p->pageout && m->pageout &&
4631                                                 !m->dump_cleaning) {
4632                                         vm_page_lock_queues();
4633                                         m->pageout = FALSE;
4634                                         m->absent = FALSE;
4635                                         m->overwriting = FALSE;
4636                                         vm_page_unwire(m);
4637                                         PAGE_WAKEUP_DONE(m);
4638                                         vm_page_unlock_queues();
4639                                 }
4640                                 page_list[entry].phys_addr = 0;
4641                            }
4642                         }
4643                         target_offset += PAGE_SIZE_64;
4644                         entry++;
4645                 }
4646                 vm_object_unlock(shadow_object);
4647                 if (object != shadow_object)
4648                         vm_object_unlock(object);
4649
4650         }
4651         if (upl->flags & UPL_DEVICE_MEMORY)  {
4652                 vm_object_lock(upl->map_object->shadow);
4653                 if(upl->map_object == upl->map_object->shadow)
4654                         vm_object_paging_end(upl->map_object->shadow);
4655                 vm_object_unlock(upl->map_object->shadow);
4656         }
4657         upl_unlock(upl);
4658         return KERN_SUCCESS;
4659 }
4660
4661
4662
4663 kern_return_t
4664 vm_object_iopl_request(
4665         vm_object_t             object,
4666         vm_object_offset_t      offset,
4667         upl_size_t              size,
4668         upl_t                   *upl_ptr,
4669         upl_page_info_array_t   user_page_list,
4670         unsigned int            *page_list_count,
4671         int                     cntrl_flags)
4672 {
4673         vm_page_t               dst_page;
4674         vm_object_offset_t      dst_offset = offset;
4675         upl_size_t              xfer_size = size;
4676         upl_t                   upl = NULL;
4677         unsigned int            entry;
4678         wpl_array_t             lite_list = NULL;
4679         int                     page_field_size;
4680         int                     delayed_unlock = 0;
4681         int                     no_zero_fill = FALSE;
4682         vm_page_t               alias_page = NULL;
4683         kern_return_t           ret;
4684         vm_prot_t               prot;
4685
4686
4687         if (cntrl_flags & ~UPL_VALID_FLAGS) {
4688                 /*
4689                  * For forward compatibility's sake,
4690                  * reject any unknown flag.
4691                  */
4692                 return KERN_INVALID_VALUE;
4693         }
4694
4695         if (cntrl_flags & UPL_ENCRYPT) {
4696                 /*
4697                  * ENCRYPTED SWAP:
4698                  * The paging path doesn't use this interface,
4699                  * so we don't support the UPL_ENCRYPT flag
4700                  * here.  We won't encrypt the pages.
4701                  */
4702                 assert(! (cntrl_flags & UPL_ENCRYPT));
4703         }
4704
4705         if (cntrl_flags & UPL_NOZEROFILL)
4706                 no_zero_fill = TRUE;
4707
4708         if (cntrl_flags & UPL_COPYOUT_FROM)
4709                 prot = VM_PROT_READ;
4710         else
4711                 prot = VM_PROT_READ | VM_PROT_WRITE;
4712
4713         if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
4714                 size = MAX_UPL_TRANSFER * page_size;
4715         }
4716
4717         if(cntrl_flags & UPL_SET_INTERNAL)
4718                 if(page_list_count != NULL)
4719                         *page_list_count = MAX_UPL_TRANSFER;
4720         if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4721            ((page_list_count != NULL) && (*page_list_count != 0)
4722                                 && *page_list_count < (size/page_size)))
4723                 return KERN_INVALID_ARGUMENT;
4724
4725         if((!object->internal) && (object->paging_offset != 0))
4726                 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
4727
4728         if(object->phys_contiguous) {
4729                 /* No paging operations are possible against this memory */
4730                 /* and so no need for map object, ever */
4731                 cntrl_flags |= UPL_SET_LITE;
4732         }
4733
4734         if(upl_ptr) {
4735                 if(cntrl_flags & UPL_SET_INTERNAL) {
4736                         if(cntrl_flags & UPL_SET_LITE) {
4737                                 upl = upl_create(
4738                                         UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
4739                                         size);
4740                                 user_page_list = (upl_page_info_t *)
4741                                    (((uintptr_t)upl) + sizeof(struct upl));
4742                                 lite_list = (wpl_array_t)
4743                                         (((uintptr_t)user_page_list) +
4744                                         ((size/PAGE_SIZE) *
4745                                                 sizeof(upl_page_info_t)));
4746                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4747                                 page_field_size =
4748                                         (page_field_size + 3) & 0xFFFFFFFC;
4749                                 bzero((char *)lite_list, page_field_size);
4750                                 upl->flags =
4751                                         UPL_LITE | UPL_INTERNAL | UPL_IO_WIRE;
4752                         } else {
4753                                 upl = upl_create(UPL_CREATE_INTERNAL, size);
4754                                 user_page_list = (upl_page_info_t *)
4755                                         (((uintptr_t)upl)
4756                                                 + sizeof(struct upl));
4757                                 upl->flags = UPL_INTERNAL | UPL_IO_WIRE;
4758                         }
4759                 } else {
4760                         if(cntrl_flags & UPL_SET_LITE) {
4761                                 upl = upl_create(UPL_CREATE_LITE, size);
4762                                 lite_list = (wpl_array_t)
4763                                    (((uintptr_t)upl) + sizeof(struct upl));
4764                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4765                                 page_field_size =
4766                                         (page_field_size + 3) & 0xFFFFFFFC;
4767                                 bzero((char *)lite_list, page_field_size);
4768                                 upl->flags = UPL_LITE | UPL_IO_WIRE;
4769                         } else {
4770                                 upl = upl_create(UPL_CREATE_EXTERNAL, size);
4771                                 upl->flags = UPL_IO_WIRE;
4772                         }
4773                 }
4774
4775                 if(object->phys_contiguous) {
4776                         upl->map_object = object;
4777                         /* don't need any shadow mappings for this one */
4778                         /* since it is already I/O memory */
4779                         upl->flags |= UPL_DEVICE_MEMORY;
4780
4781                         vm_object_lock(object);
4782                         vm_object_paging_begin(object);
4783                         vm_object_unlock(object);
4784
4785                         /* paging in progress also protects the paging_offset */
4786                         upl->offset = offset + object->paging_offset;
4787                         upl->size = size;
4788                         *upl_ptr = upl;
4789                         if(user_page_list) {
4790                                 user_page_list[0].phys_addr =
4791                                   (offset + object->shadow_offset)>>PAGE_SHIFT;
4792                                 user_page_list[0].device = TRUE;
4793                         }
4794
4795                         if(page_list_count != NULL) {
4796                                 if (upl->flags & UPL_INTERNAL) {
4797                                         *page_list_count = 0;
4798                                 } else {
4799                                         *page_list_count = 1;
4800                                 }
4801                         }
4802                         return KERN_SUCCESS;
4803                 }
4804                 if(user_page_list)
4805                         user_page_list[0].device = FALSE;
4806
4807                 if(cntrl_flags & UPL_SET_LITE) {
4808                         upl->map_object = object;
4809                 } else {
4810                         upl->map_object = vm_object_allocate(size);
4811                         vm_object_lock(upl->map_object);
4812                         upl->map_object->shadow = object;
4813                         upl->map_object->pageout = TRUE;
4814                         upl->map_object->can_persist = FALSE;
4815                         upl->map_object->copy_strategy =
4816                                         MEMORY_OBJECT_COPY_NONE;
4817                         upl->map_object->shadow_offset = offset;
4818                         upl->map_object->wimg_bits = object->wimg_bits;
4819                         vm_object_unlock(upl->map_object);
4820                 }
4821         }
4822         vm_object_lock(object);
4823         vm_object_paging_begin(object);
4824
4825         if (!object->phys_contiguous) {
4826                 /* Protect user space from future COW operations */
4827                 object->true_share = TRUE;
4828                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4829                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4830         }
4831
4832         /* we can lock the upl offset now that paging_in_progress is set */
4833         if(upl_ptr) {
4834                 upl->size = size;
4835                 upl->offset = offset + object->paging_offset;
4836                 *upl_ptr = upl;
4837 #ifdef UPL_DEBUG
4838                 queue_enter(&object->uplq, upl, upl_t, uplq);
4839 #endif /* UPL_DEBUG */
4840         }
4841
4842         if (cntrl_flags & UPL_BLOCK_ACCESS) {
4843                 /*
4844                  * The user requested that access to the pages in this URL
4845                  * be blocked until the UPL is commited or aborted.
4846                  */
4847                 upl->flags |= UPL_ACCESS_BLOCKED;
4848         }
4849
4850         entry = 0;
4851         while (xfer_size) {
4852                 if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
4853                         if (delayed_unlock) {
4854                                 delayed_unlock = 0;
4855                                 vm_page_unlock_queues();
4856                         }
4857                         vm_object_unlock(object);
4858                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
4859                         vm_object_lock(object);
4860                 }
4861                 dst_page = vm_page_lookup(object, dst_offset);
4862
4863                 /*
4864                  * ENCRYPTED SWAP:
4865                  * If the page is encrypted, we need to decrypt it,
4866                  * so force a soft page fault.
4867                  */
4868                 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
4869                     (dst_page->encrypted) ||
4870                     (dst_page->unusual && (dst_page->error ||
4871                                            dst_page->restart ||
4872                                            dst_page->absent ||
4873                                            dst_page->fictitious ||
4874                                            (prot & dst_page->page_lock)))) {
4875                         vm_fault_return_t       result;
4876                    do {
4877                         vm_page_t       top_page;
4878                         kern_return_t   error_code;
4879                         int             interruptible;
4880
4881                         vm_object_offset_t      lo_offset = offset;
4882                         vm_object_offset_t      hi_offset = offset + size;
4883
4884
4885                         if (delayed_unlock) {
4886                                 delayed_unlock = 0;
4887                                 vm_page_unlock_queues();
4888                         }
4889
4890                         if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
4891                                 interruptible = THREAD_ABORTSAFE;
4892                         } else {
4893                                 interruptible = THREAD_UNINT;
4894                         }
4895
4896                         result = vm_fault_page(object, dst_offset,
4897                                 prot | VM_PROT_WRITE, FALSE,
4898                                 interruptible,
4899                                 lo_offset, hi_offset,
4900                                 VM_BEHAVIOR_SEQUENTIAL,
4901                                 &prot, &dst_page, &top_page,
4902                                 (int *)0,
4903                                 &error_code, no_zero_fill, FALSE, NULL, 0);
4904
4905                         switch(result) {
4906                         case VM_FAULT_SUCCESS:
4907
4908                                 PAGE_WAKEUP_DONE(dst_page);
4909
4910                                 /*
4911                                  *      Release paging references and
4912                                  *      top-level placeholder page, if any.
4913                                  */
4914
4915                                 if(top_page != VM_PAGE_NULL) {
4916                                         vm_object_t local_object;
4917                                         local_object =
4918                                                 top_page->object;
4919                                         if(top_page->object
4920                                                 != dst_page->object) {
4921                                                 vm_object_lock(
4922                                                         local_object);
4923                                                 VM_PAGE_FREE(top_page);
4924                                                 vm_object_paging_end(
4925                                                         local_object);
4926                                                 vm_object_unlock(
4927                                                         local_object);
4928                                         } else {
4929                                                 VM_PAGE_FREE(top_page);
4930                                                 vm_object_paging_end(
4931                                                         local_object);
4932                                         }
4933                                 }
4934
4935                                 break;
4936
4937
4938                         case VM_FAULT_RETRY:
4939                                 vm_object_lock(object);
4940                                 vm_object_paging_begin(object);
4941                                 break;
4942
4943                         case VM_FAULT_FICTITIOUS_SHORTAGE:
4944                                 vm_page_more_fictitious();
4945                                 vm_object_lock(object);
4946                                 vm_object_paging_begin(object);
4947                                 break;
4948
4949                         case VM_FAULT_MEMORY_SHORTAGE:
4950                                 if (vm_page_wait(interruptible)) {
4951                                         vm_object_lock(object);
4952                                         vm_object_paging_begin(object);
4953                                         break;
4954                                 }
4955                                 /* fall thru */
4956
4957                         case VM_FAULT_INTERRUPTED:
4958                                 error_code = MACH_SEND_INTERRUPTED;
4959                         case VM_FAULT_MEMORY_ERROR:
4960                                 ret = (error_code ? error_code:
4961                                         KERN_MEMORY_ERROR);
4962                                 vm_object_lock(object);
4963                                 for(; offset < dst_offset;
4964                                                 offset += PAGE_SIZE) {
4965                                    dst_page = vm_page_lookup(
4966                                                 object, offset);
4967                                    if(dst_page == VM_PAGE_NULL)
4968                                         panic("vm_object_iopl_request: Wired pages missing. \n");
4969                                    vm_page_lock_queues();
4970                                    vm_page_unwire(dst_page);
4971                                    vm_page_unlock_queues();
4972                                    VM_STAT(reactivations++);
4973                                 }
4974                                 vm_object_unlock(object);
4975                                 upl_destroy(upl);
4976                                 return ret;
4977                         }
4978                    } while ((result != VM_FAULT_SUCCESS)
4979                                 || (result == VM_FAULT_INTERRUPTED));
4980                 }
4981                 if (delayed_unlock == 0)
4982                         vm_page_lock_queues();
4983                 vm_page_wire(dst_page);
4984
4985                 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4986                         /*
4987                          * Mark the page "busy" to block any future page fault
4988                          * on this page.  We'll also remove the mapping
4989                          * of all these pages before leaving this routine.
4990                          */
4991                         assert(!dst_page->fictitious);
4992                         dst_page->busy = TRUE;
4993                 }
4994
4995                 if (upl_ptr) {
4996                         if (cntrl_flags & UPL_SET_LITE) {
4997                                 int     pg_num;
4998                                 pg_num = (dst_offset-offset)/PAGE_SIZE;
4999                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5000                         } else {
5001                                 /*
5002                                  * Convert the fictitious page to a
5003                                  * private shadow of the real page.
5004                                  */
5005                                 assert(alias_page->fictitious);
5006                                 alias_page->fictitious = FALSE;
5007                                 alias_page->private = TRUE;
5008                                 alias_page->pageout = TRUE;
5009                                 alias_page->phys_page = dst_page->phys_page;
5010                                 vm_page_wire(alias_page);
5011
5012                                 vm_page_insert(alias_page,
5013                                         upl->map_object, size - xfer_size);
5014                                 assert(!alias_page->wanted);
5015                                 alias_page->busy = FALSE;
5016                                 alias_page->absent = FALSE;
5017                         }
5018
5019                         /* expect the page to be used */
5020                         dst_page->reference = TRUE;
5021
5022                         if (!(cntrl_flags & UPL_COPYOUT_FROM))
5023                                 dst_page->dirty = TRUE;
5024                         alias_page = NULL;
5025
5026                         if (user_page_list) {
5027                                 user_page_list[entry].phys_addr
5028                                         = dst_page->phys_page;
5029                                 user_page_list[entry].dirty =
5030                                                 dst_page->dirty;
5031                                 user_page_list[entry].pageout =
5032                                                 dst_page->pageout;
5033                                 user_page_list[entry].absent =
5034                                                 dst_page->absent;
5035                                 user_page_list[entry].precious =
5036                                                 dst_page->precious;
5037                         }
5038                 }
5039                 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
5040                         delayed_unlock = 0;
5041                         vm_page_unlock_queues();
5042                 }
5043                 entry++;
5044                 dst_offset += PAGE_SIZE_64;
5045                 xfer_size -= PAGE_SIZE;
5046         }
5047         if (delayed_unlock)
5048                 vm_page_unlock_queues();
5049
5050         if (upl->flags & UPL_INTERNAL) {
5051                 if(page_list_count != NULL)
5052                         *page_list_count = 0;
5053         } else if (*page_list_count > entry) {
5054                 if(page_list_count != NULL)
5055                         *page_list_count = entry;
5056         }
5057
5058         if (alias_page != NULL) {
5059                 vm_page_lock_queues();
5060                 vm_page_free(alias_page);
5061                 vm_page_unlock_queues();
5062         }
5063
5064         vm_object_unlock(object);
5065
5066         if (cntrl_flags & UPL_BLOCK_ACCESS) {
5067                 /*
5068                  * We've marked all the pages "busy" so that future
5069                  * page faults will block.
5070                  * Now remove the mapping for these pages, so that they
5071                  * can't be accessed without causing a page fault.
5072                  */
5073                 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5074                                        PMAP_NULL, 0, VM_PROT_NONE);
5075         }
5076
5077         return KERN_SUCCESS;
5078 }
5079
5080 kern_return_t
5081 upl_transpose(
5082         upl_t           upl1,
5083         upl_t           upl2)
5084 {
5085         kern_return_t           retval;
5086         boolean_t               upls_locked;
5087         vm_object_t             object1, object2;
5088
5089         if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) {
5090                 return KERN_INVALID_ARGUMENT;
5091         }
5092
5093         upls_locked = FALSE;
5094
5095         /*
5096          * Since we need to lock both UPLs at the same time,
5097          * avoid deadlocks by always taking locks in the same order.
5098          */
5099         if (upl1 < upl2) {
5100                 upl_lock(upl1);
5101                 upl_lock(upl2);
5102         } else {
5103                 upl_lock(upl2);
5104                 upl_lock(upl1);
5105         }
5106         upls_locked = TRUE;     /* the UPLs will need to be unlocked */
5107
5108         object1 = upl1->map_object;
5109         object2 = upl2->map_object;
5110
5111         if (upl1->offset != 0 || upl2->offset != 0 ||
5112             upl1->size != upl2->size) {
5113                 /*
5114                  * We deal only with full objects, not subsets.
5115                  * That's because we exchange the entire backing store info
5116                  * for the objects: pager, resident pages, etc...  We can't do
5117                  * only part of it.
5118                  */
5119                 retval = KERN_INVALID_VALUE;
5120                 goto done;
5121         }
5122
5123         /*
5124          * Tranpose the VM objects' backing store.
5125          */
5126         retval = vm_object_transpose(object1, object2,
5127                                      (vm_object_size_t) upl1->size);
5128
5129         if (retval == KERN_SUCCESS) {
5130                 /*
5131                  * Make each UPL point to the correct VM object, i.e. the
5132                  * object holding the pages that the UPL refers to...
5133                  */
5134                 upl1->map_object = object2;
5135                 upl2->map_object = object1;
5136         }
5137
5138 done:
5139         /*
5140          * Cleanup.
5141          */
5142         if (upls_locked) {
5143                 upl_unlock(upl1);
5144                 upl_unlock(upl2);
5145                 upls_locked = FALSE;
5146         }
5147
5148         return retval;
5149 }
5150
5151 /*
5152  * ENCRYPTED SWAP:
5153  *
5154  * Rationale:  the user might have some encrypted data on disk (via
5155  * FileVault or any other mechanism).  That data is then decrypted in
5156  * memory, which is safe as long as the machine is secure.  But that
5157  * decrypted data in memory could be paged out to disk by the default
5158  * pager.  The data would then be stored on disk in clear (not encrypted)
5159  * and it could be accessed by anyone who gets physical access to the
5160  * disk (if the laptop or the disk gets stolen for example).  This weakens
5161  * the security offered by FileVault.
5162  *
5163  * Solution:  the default pager will optionally request that all the
5164  * pages it gathers for pageout be encrypted, via the UPL interfaces,
5165  * before it sends this UPL to disk via the vnode_pageout() path.
5166  *
5167  * Notes:
5168  *
5169  * To avoid disrupting the VM LRU algorithms, we want to keep the
5170  * clean-in-place mechanisms, which allow us to send some extra pages to
5171  * swap (clustering) without actually removing them from the user's
5172  * address space.  We don't want the user to unknowingly access encrypted
5173  * data, so we have to actually remove the encrypted pages from the page
5174  * table.  When the user accesses the data, the hardware will fail to
5175  * locate the virtual page in its page table and will trigger a page
5176  * fault.  We can then decrypt the page and enter it in the page table
5177  * again.  Whenever we allow the user to access the contents of a page,
5178  * we have to make sure it's not encrypted.
5179  *
5180  *
5181  */
5182 /*
5183  * ENCRYPTED SWAP:
5184  * Reserve of virtual addresses in the kernel address space.
5185  * We need to map the physical pages in the kernel, so that we
5186  * can call the encryption/decryption routines with a kernel
5187  * virtual address.  We keep this pool of pre-allocated kernel
5188  * virtual addresses so that we don't have to scan the kernel's
5189  * virtaul address space each time we need to encrypt or decrypt
5190  * a physical page.
5191  * It would be nice to be able to encrypt and decrypt in physical
5192  * mode but that might not always be more efficient...
5193  */
5194 decl_simple_lock_data(,vm_paging_lock)
5195 #define VM_PAGING_NUM_PAGES     64
5196 vm_map_offset_t vm_paging_base_address = 0;
5197 boolean_t       vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
5198 int             vm_paging_max_index = 0;
5199 unsigned long   vm_paging_no_kernel_page = 0;
5200 unsigned long   vm_paging_objects_mapped = 0;
5201 unsigned long   vm_paging_pages_mapped = 0;
5202 unsigned long   vm_paging_objects_mapped_slow = 0;
5203 unsigned long   vm_paging_pages_mapped_slow = 0;
5204
5205 /*
5206  * ENCRYPTED SWAP:
5207  * vm_paging_map_object:
5208  *      Maps part of a VM object's pages in the kernel
5209  *      virtual address space, using the pre-allocated
5210  *      kernel virtual addresses, if possible.
5211  * Context:
5212  *      The VM object is locked.  This lock will get
5213  *      dropped and re-acquired though.
5214  */
5215 kern_return_t
5216 vm_paging_map_object(
5217         vm_map_offset_t         *address,
5218         vm_page_t               page,
5219         vm_object_t             object,
5220         vm_object_offset_t      offset,
5221         vm_map_size_t           *size)
5222 {
5223         kern_return_t           kr;
5224         vm_map_offset_t         page_map_offset;
5225         vm_map_size_t           map_size;
5226         vm_object_offset_t      object_offset;
5227 #ifdef __ppc__
5228         int                     i;
5229         vm_map_entry_t          map_entry;
5230 #endif /* __ppc__ */
5231
5232
5233 #ifdef __ppc__
5234         if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
5235                 /*
5236                  * Optimization for the PowerPC.
5237                  * Use one of the pre-allocated kernel virtual addresses
5238                  * and just enter the VM page in the kernel address space
5239                  * at that virtual address.
5240                  */
5241                 vm_object_unlock(object);
5242                 simple_lock(&vm_paging_lock);
5243
5244                 if (vm_paging_base_address == 0) {
5245                         /*
5246                          * Initialize our pool of pre-allocated kernel
5247                          * virtual addresses.
5248                          */
5249                         simple_unlock(&vm_paging_lock);
5250                         page_map_offset = 0;
5251                         kr = vm_map_find_space(kernel_map,
5252                                                &page_map_offset,
5253                                                VM_PAGING_NUM_PAGES * PAGE_SIZE,
5254                                                0,
5255                                                &map_entry);
5256                         if (kr != KERN_SUCCESS) {
5257                                 panic("vm_paging_map_object: "
5258                                       "kernel_map full\n");
5259                         }
5260                         map_entry->object.vm_object = kernel_object;
5261                         map_entry->offset =
5262                                 page_map_offset - VM_MIN_KERNEL_ADDRESS;
5263                         vm_object_reference(kernel_object);
5264                         vm_map_unlock(kernel_map);
5265
5266                         simple_lock(&vm_paging_lock);
5267                         if (vm_paging_base_address != 0) {
5268                                 /* someone raced us and won: undo */
5269                                 simple_unlock(&vm_paging_lock);
5270                                 kr = vm_map_remove(kernel_map,
5271                                                    page_map_offset,
5272                                                    page_map_offset +
5273                                                    (VM_PAGING_NUM_PAGES
5274                                                     * PAGE_SIZE),
5275                                                    VM_MAP_NO_FLAGS);
5276                                 assert(kr == KERN_SUCCESS);
5277                                 simple_lock(&vm_paging_lock);
5278                         } else {
5279                                 vm_paging_base_address = page_map_offset;
5280                         }
5281                 }
5282
5283                 /*
5284                  * Try and find an available kernel virtual address
5285                  * from our pre-allocated pool.
5286                  */
5287                 page_map_offset = 0;
5288                 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
5289                         if (vm_paging_page_inuse[i] == FALSE) {
5290                                 page_map_offset = vm_paging_base_address +
5291                                         (i * PAGE_SIZE);
5292                                 break;
5293                         }
5294                 }
5295
5296                 if (page_map_offset != 0) {
5297                         /*
5298                          * We found a kernel virtual address;
5299                          * map the physical page to that virtual address.
5300                          */
5301                         if (i > vm_paging_max_index) {
5302                                 vm_paging_max_index = i;
5303                         }
5304                         vm_paging_page_inuse[i] = TRUE;
5305                         simple_unlock(&vm_paging_lock);
5306                         pmap_map_block(kernel_pmap,
5307                                        page_map_offset,
5308                                        page->phys_page,
5309                                        1,                                               /* Size is number of 4k pages */
5310                                        VM_PROT_DEFAULT,
5311                                        ((int) page->object->wimg_bits &
5312                                         VM_WIMG_MASK),
5313                                        0);
5314                         vm_paging_objects_mapped++;
5315                         vm_paging_pages_mapped++;
5316                         *address = page_map_offset;
5317                         vm_object_lock(object);
5318
5319                         /* all done and mapped, ready to use ! */
5320                         return KERN_SUCCESS;
5321                 }
5322
5323                 /*
5324                  * We ran out of pre-allocated kernel virtual
5325                  * addresses.  Just map the page in the kernel
5326                  * the slow and regular way.
5327                  */
5328                 vm_paging_no_kernel_page++;
5329                 simple_unlock(&vm_paging_lock);
5330                 vm_object_lock(object);
5331         }
5332 #endif /* __ppc__ */
5333
5334         object_offset = vm_object_trunc_page(offset);
5335         map_size = vm_map_round_page(*size);
5336
5337         /*
5338          * Try and map the required range of the object
5339          * in the kernel_map
5340          */
5341
5342         /* don't go beyond the object's end... */
5343         if (object_offset >= object->size) {
5344                 map_size = 0;
5345         } else if (map_size > object->size - offset) {
5346                 map_size = object->size - offset;
5347         }
5348
5349         vm_object_reference_locked(object);     /* for the map entry */
5350         vm_object_unlock(object);
5351
5352         kr = vm_map_enter(kernel_map,
5353                           address,
5354                           map_size,
5355                           0,
5356                           VM_FLAGS_ANYWHERE,
5357                           object,
5358                           object_offset,
5359                           FALSE,
5360                           VM_PROT_DEFAULT,
5361                           VM_PROT_ALL,
5362                           VM_INHERIT_NONE);
5363         if (kr != KERN_SUCCESS) {
5364                 *address = 0;
5365                 *size = 0;
5366                 vm_object_deallocate(object);   /* for the map entry */
5367                 return kr;
5368         }
5369
5370         *size = map_size;
5371
5372         /*
5373          * Enter the mapped pages in the page table now.
5374          */
5375         vm_object_lock(object);
5376         for (page_map_offset = 0;
5377              map_size != 0;
5378              map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
5379                 unsigned int    cache_attr;
5380
5381                 page = vm_page_lookup(object, offset + page_map_offset);
5382                 if (page == VM_PAGE_NULL) {
5383                         panic("vm_paging_map_object: no page !?");
5384                 }
5385                 if (page->no_isync == TRUE) {
5386                         pmap_sync_page_data_phys(page->phys_page);
5387                 }
5388                 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
5389
5390                 PMAP_ENTER(kernel_pmap,
5391                            *address + page_map_offset,
5392                            page,
5393                            VM_PROT_DEFAULT,
5394                            cache_attr,
5395                            FALSE);
5396         }
5397
5398         vm_paging_objects_mapped_slow++;
5399         vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64;
5400
5401         return KERN_SUCCESS;
5402 }
5403
5404 /*
5405  * ENCRYPTED SWAP:
5406  * vm_paging_unmap_object:
5407  *      Unmaps part of a VM object's pages from the kernel
5408  *      virtual address space.
5409  * Context:
5410  *      The VM object is locked.  This lock will get
5411  *      dropped and re-acquired though.
5412  */
5413 void
5414 vm_paging_unmap_object(
5415         vm_object_t     object,
5416         vm_map_offset_t start,
5417         vm_map_offset_t end)
5418 {
5419         kern_return_t   kr;
5420 #ifdef __ppc__
5421         int             i;
5422 #endif /* __ppc__ */
5423
5424         if ((vm_paging_base_address != 0) &&
5425             ((start < vm_paging_base_address) ||
5426              (end > (vm_paging_base_address
5427                      + (VM_PAGING_NUM_PAGES * PAGE_SIZE))))) {
5428                 /*
5429                  * We didn't use our pre-allocated pool of
5430                  * kernel virtual address.  Deallocate the
5431                  * virtual memory.
5432                  */
5433                 if (object != VM_OBJECT_NULL) {
5434                         vm_object_unlock(object);
5435                 }
5436                 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
5437                 if (object != VM_OBJECT_NULL) {
5438                         vm_object_lock(object);
5439                 }
5440                 assert(kr == KERN_SUCCESS);
5441         } else {
5442                 /*
5443                  * We used a kernel virtual address from our
5444                  * pre-allocated pool.  Put it back in the pool
5445                  * for next time.
5446                  */
5447 #ifdef __ppc__
5448                 assert(end - start == PAGE_SIZE);
5449                 i = (start - vm_paging_base_address) >> PAGE_SHIFT;
5450
5451                 /* undo the pmap mapping */
5452                 mapping_remove(kernel_pmap, start);
5453
5454                 simple_lock(&vm_paging_lock);
5455                 vm_paging_page_inuse[i] = FALSE;
5456                 simple_unlock(&vm_paging_lock);
5457 #endif /* __ppc__ */
5458         }
5459 }
5460
5461 /*
5462  * Encryption data.
5463  * "iv" is the "initial vector".  Ideally, we want to
5464  * have a different one for each page we encrypt, so that
5465  * crackers can't find encryption patterns too easily.
5466  */
5467 #define SWAP_CRYPT_AES_KEY_SIZE 128     /* XXX 192 and 256 don't work ! */
5468 boolean_t               swap_crypt_ctx_initialized = FALSE;
5469 aes_32t                 swap_crypt_key[8]; /* big enough for a 256 key */
5470 aes_ctx                 swap_crypt_ctx;
5471 const unsigned char     swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
5472
5473 #if DEBUG
5474 boolean_t               swap_crypt_ctx_tested = FALSE;
5475 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
5476 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
5477 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
5478 #endif /* DEBUG */
5479
5480 extern u_long random(void);
5481
5482 /*
5483  * Initialize the encryption context: key and key size.
5484  */
5485 void swap_crypt_ctx_initialize(void); /* forward */
5486 void
5487 swap_crypt_ctx_initialize(void)
5488 {
5489         unsigned int    i;
5490
5491         /*
5492          * No need for locking to protect swap_crypt_ctx_initialized
5493          * because the first use of encryption will come from the
5494          * pageout thread (we won't pagein before there's been a pageout)
5495          * and there's only one pageout thread.
5496          */
5497         if (swap_crypt_ctx_initialized == FALSE) {
5498                 for (i = 0;
5499                      i < (sizeof (swap_crypt_key) /
5500                           sizeof (swap_crypt_key[0]));
5501                      i++) {
5502                         swap_crypt_key[i] = random();
5503                 }
5504                 aes_encrypt_key((const unsigned char *) swap_crypt_key,
5505                                 SWAP_CRYPT_AES_KEY_SIZE,
5506                                 &swap_crypt_ctx.encrypt);
5507                 aes_decrypt_key((const unsigned char *) swap_crypt_key,
5508                                 SWAP_CRYPT_AES_KEY_SIZE,
5509                                 &swap_crypt_ctx.decrypt);
5510                 swap_crypt_ctx_initialized = TRUE;
5511         }
5512
5513 #if DEBUG
5514         /*
5515          * Validate the encryption algorithms.
5516          */
5517         if (swap_crypt_ctx_tested == FALSE) {
5518                 /* initialize */
5519                 for (i = 0; i < 4096; i++) {
5520                         swap_crypt_test_page_ref[i] = (char) i;
5521                 }
5522                 /* encrypt */
5523                 aes_encrypt_cbc(swap_crypt_test_page_ref,
5524                                 swap_crypt_null_iv,
5525                                 PAGE_SIZE / AES_BLOCK_SIZE,
5526                                 swap_crypt_test_page_encrypt,
5527                                 &swap_crypt_ctx.encrypt);
5528                 /* decrypt */
5529                 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
5530                                 swap_crypt_null_iv,
5531                                 PAGE_SIZE / AES_BLOCK_SIZE,
5532                                 swap_crypt_test_page_decrypt,
5533                                 &swap_crypt_ctx.decrypt);
5534                 /* compare result with original */
5535                 for (i = 0; i < 4096; i ++) {
5536                         if (swap_crypt_test_page_decrypt[i] !=
5537                             swap_crypt_test_page_ref[i]) {
5538                                 panic("encryption test failed");
5539                         }
5540                 }
5541
5542                 /* encrypt again */
5543                 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
5544                                 swap_crypt_null_iv,
5545                                 PAGE_SIZE / AES_BLOCK_SIZE,
5546                                 swap_crypt_test_page_decrypt,
5547                                 &swap_crypt_ctx.encrypt);
5548                 /* decrypt in place */
5549                 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
5550                                 swap_crypt_null_iv,
5551                                 PAGE_SIZE / AES_BLOCK_SIZE,
5552                                 swap_crypt_test_page_decrypt,
5553                                 &swap_crypt_ctx.decrypt);
5554                 for (i = 0; i < 4096; i ++) {
5555                         if (swap_crypt_test_page_decrypt[i] !=
5556                             swap_crypt_test_page_ref[i]) {
5557                                 panic("in place encryption test failed");
5558                         }
5559                 }
5560
5561                 swap_crypt_ctx_tested = TRUE;
5562         }
5563 #endif /* DEBUG */
5564 }
5565
5566 /*
5567  * ENCRYPTED SWAP:
5568  * vm_page_encrypt:
5569  *      Encrypt the given page, for secure paging.
5570  *      The page might already be mapped at kernel virtual
5571  *      address "kernel_mapping_offset".  Otherwise, we need
5572  *      to map it.
5573  *
5574  * Context:
5575  *      The page's object is locked, but this lock will be released
5576  *      and re-acquired.
5577  *      The page is busy and not accessible by users (not entered in any pmap).
5578  */
5579 void
5580 vm_page_encrypt(
5581         vm_page_t       page,
5582         vm_map_offset_t kernel_mapping_offset)
5583 {
5584         int                     clear_refmod = 0;
5585         kern_return_t           kr;
5586         boolean_t               page_was_referenced;
5587         boolean_t               page_was_modified;
5588         vm_map_size_t           kernel_mapping_size;
5589         vm_offset_t             kernel_vaddr;
5590         union {
5591                 unsigned char   aes_iv[AES_BLOCK_SIZE];
5592                 struct {
5593                         memory_object_t         pager_object;
5594                         vm_object_offset_t      paging_offset;
5595                 } vm;
5596         } encrypt_iv;
5597
5598         if (! vm_pages_encrypted) {
5599                 vm_pages_encrypted = TRUE;
5600         }
5601
5602         assert(page->busy);
5603         assert(page->dirty || page->precious);
5604
5605         if (page->encrypted) {
5606                 /*
5607                  * Already encrypted: no need to do it again.
5608                  */
5609                 vm_page_encrypt_already_encrypted_counter++;
5610                 return;
5611         }
5612         ASSERT_PAGE_DECRYPTED(page);
5613
5614         /*
5615          * Gather the "reference" and "modified" status of the page.
5616          * We'll restore these values after the encryption, so that
5617          * the encryption is transparent to the rest of the system
5618          * and doesn't impact the VM's LRU logic.
5619          */
5620         page_was_referenced =
5621                 (page->reference || pmap_is_referenced(page->phys_page));
5622         page_was_modified =
5623                 (page->dirty || pmap_is_modified(page->phys_page));
5624
5625         if (kernel_mapping_offset == 0) {
5626                 /*
5627                  * The page hasn't already been mapped in kernel space
5628                  * by the caller.  Map it now, so that we can access
5629                  * its contents and encrypt them.
5630                  */
5631                 kernel_mapping_size = PAGE_SIZE;
5632                 kr = vm_paging_map_object(&kernel_mapping_offset,
5633                                           page,
5634                                           page->object,
5635                                           page->offset,
5636                                           &kernel_mapping_size);
5637                 if (kr != KERN_SUCCESS) {
5638                         panic("vm_page_encrypt: "
5639                               "could not map page in kernel: 0x%x\n",
5640                               kr);
5641                 }
5642         } else {
5643                 kernel_mapping_size = 0;
5644         }
5645         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5646
5647         if (swap_crypt_ctx_initialized == FALSE) {
5648                 swap_crypt_ctx_initialize();
5649         }
5650         assert(swap_crypt_ctx_initialized);
5651
5652         /*
5653          * Prepare an "initial vector" for the encryption.
5654          * We use the "pager" and the "paging_offset" for that
5655          * page to obfuscate the encrypted data a bit more and
5656          * prevent crackers from finding patterns that they could
5657          * use to break the key.
5658          */
5659         bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
5660         encrypt_iv.vm.pager_object = page->object->pager;
5661         encrypt_iv.vm.paging_offset =
5662                 page->object->paging_offset + page->offset;
5663
5664         vm_object_unlock(page->object);
5665
5666         /* encrypt the "initial vector" */
5667         aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
5668                         swap_crypt_null_iv,
5669                         1,
5670                         &encrypt_iv.aes_iv[0],
5671                         &swap_crypt_ctx.encrypt);
5672
5673         /*
5674          * Encrypt the page.
5675          */
5676         aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
5677                         &encrypt_iv.aes_iv[0],
5678                         PAGE_SIZE / AES_BLOCK_SIZE,
5679                         (unsigned char *) kernel_vaddr,
5680                         &swap_crypt_ctx.encrypt);
5681
5682         vm_page_encrypt_counter++;
5683
5684         vm_object_lock(page->object);
5685
5686         /*
5687          * Unmap the page from the kernel's address space,
5688          * if we had to map it ourselves.  Otherwise, let
5689          * the caller undo the mapping if needed.
5690          */
5691         if (kernel_mapping_size != 0) {
5692                 vm_paging_unmap_object(page->object,
5693                                        kernel_mapping_offset,
5694                                        kernel_mapping_offset + kernel_mapping_size);
5695         }
5696
5697         /*
5698          * Restore the "reference" and "modified" bits.
5699          * This should clean up any impact the encryption had
5700          * on them.
5701          */
5702         if (! page_was_referenced) {
5703                 clear_refmod |= VM_MEM_REFERENCED;
5704                 page->reference = FALSE;
5705         }
5706         if (! page_was_modified) {
5707                 clear_refmod |= VM_MEM_MODIFIED;
5708                 page->dirty = FALSE;
5709         }
5710         if (clear_refmod)
5711                 pmap_clear_refmod(page->phys_page, clear_refmod);
5712
5713         page->encrypted = TRUE;
5714 }
5715
5716 /*
5717  * ENCRYPTED SWAP:
5718  * vm_page_decrypt:
5719  *      Decrypt the given page.
5720  *      The page might already be mapped at kernel virtual
5721  *      address "kernel_mapping_offset".  Otherwise, we need
5722  *      to map it.
5723  *
5724  * Context:
5725  *      The page's VM object is locked but will be unlocked and relocked.
5726  *      The page is busy and not accessible by users (not entered in any pmap).
5727  */
5728 void
5729 vm_page_decrypt(
5730         vm_page_t       page,
5731         vm_map_offset_t kernel_mapping_offset)
5732 {
5733         int                     clear_refmod = 0;
5734         kern_return_t           kr;
5735         vm_map_size_t           kernel_mapping_size;
5736         vm_offset_t             kernel_vaddr;
5737         boolean_t               page_was_referenced;
5738         union {
5739                 unsigned char   aes_iv[AES_BLOCK_SIZE];
5740                 struct {
5741                         memory_object_t         pager_object;
5742                         vm_object_offset_t      paging_offset;
5743                 } vm;
5744         } decrypt_iv;
5745
5746         assert(page->busy);
5747         assert(page->encrypted);
5748
5749         /*
5750          * Gather the "reference" status of the page.
5751          * We'll restore its value after the decryption, so that
5752          * the decryption is transparent to the rest of the system
5753          * and doesn't impact the VM's LRU logic.
5754          */
5755         page_was_referenced =
5756                 (page->reference || pmap_is_referenced(page->phys_page));
5757
5758         if (kernel_mapping_offset == 0) {
5759                 /*
5760                  * The page hasn't already been mapped in kernel space
5761                  * by the caller.  Map it now, so that we can access
5762                  * its contents and decrypt them.
5763                  */
5764                 kernel_mapping_size = PAGE_SIZE;
5765                 kr = vm_paging_map_object(&kernel_mapping_offset,
5766                                           page,
5767                                           page->object,
5768                                           page->offset,
5769                                           &kernel_mapping_size);
5770                 if (kr != KERN_SUCCESS) {
5771                         panic("vm_page_decrypt: "
5772                               "could not map page in kernel: 0x%x\n");
5773                 }
5774         } else {
5775                 kernel_mapping_size = 0;
5776         }
5777         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5778
5779         assert(swap_crypt_ctx_initialized);
5780
5781         /*
5782          * Prepare an "initial vector" for the decryption.
5783          * It has to be the same as the "initial vector" we
5784          * used to encrypt that page.
5785          */
5786         bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
5787         decrypt_iv.vm.pager_object = page->object->pager;
5788         decrypt_iv.vm.paging_offset =
5789                 page->object->paging_offset + page->offset;
5790
5791         vm_object_unlock(page->object);
5792
5793         /* encrypt the "initial vector" */
5794         aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
5795                         swap_crypt_null_iv,
5796                         1,
5797                         &decrypt_iv.aes_iv[0],
5798                         &swap_crypt_ctx.encrypt);
5799
5800         /*
5801          * Decrypt the page.
5802          */
5803         aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
5804                         &decrypt_iv.aes_iv[0],
5805                         PAGE_SIZE / AES_BLOCK_SIZE,
5806                         (unsigned char *) kernel_vaddr,
5807                         &swap_crypt_ctx.decrypt);
5808         vm_page_decrypt_counter++;
5809
5810         vm_object_lock(page->object);
5811
5812         /*
5813          * Unmap the page from the kernel's address space,
5814          * if we had to map it ourselves.  Otherwise, let
5815          * the caller undo the mapping if needed.
5816          */
5817         if (kernel_mapping_size != 0) {
5818                 vm_paging_unmap_object(page->object,
5819                                        kernel_vaddr,
5820                                        kernel_vaddr + PAGE_SIZE);
5821         }
5822
5823         /*
5824          * After decryption, the page is actually clean.
5825          * It was encrypted as part of paging, which "cleans"
5826          * the "dirty" pages.
5827          * Noone could access it after it was encrypted
5828          * and the decryption doesn't count.
5829          */
5830         page->dirty = FALSE;
5831         clear_refmod = VM_MEM_MODIFIED;
5832
5833         /* restore the "reference" bit */
5834         if (! page_was_referenced) {
5835                 page->reference = FALSE;
5836                 clear_refmod |= VM_MEM_REFERENCED;
5837         }
5838         pmap_clear_refmod(page->phys_page, clear_refmod);
5839
5840         page->encrypted = FALSE;
5841
5842         /*
5843          * We've just modified the page's contents via the data cache and part
5844          * of the new contents might still be in the cache and not yet in RAM.
5845          * Since the page is now available and might get gathered in a UPL to
5846          * be part of a DMA transfer from a driver that expects the memory to
5847          * be coherent at this point, we have to flush the data cache.
5848          */
5849         pmap_sync_page_data_phys(page->phys_page);
5850         /*
5851          * Since the page is not mapped yet, some code might assume that it
5852          * doesn't need to invalidate the instruction cache when writing to
5853          * that page.  That code relies on "no_isync" being set, so that the
5854          * caches get syncrhonized when the page is first mapped.  So we need
5855          * to set "no_isync" here too, despite the fact that we just
5856          * synchronized the caches above...
5857          */
5858         page->no_isync = TRUE;
5859 }
5860
5861 unsigned long upl_encrypt_upls = 0;
5862 unsigned long upl_encrypt_pages = 0;
5863
5864 /*
5865  * ENCRYPTED SWAP:
5866  *
5867  * upl_encrypt:
5868  *      Encrypts all the pages in the UPL, within the specified range.
5869  *
5870  */
5871 void
5872 upl_encrypt(
5873         upl_t                   upl,
5874         upl_offset_t            crypt_offset,
5875         upl_size_t              crypt_size)
5876 {
5877         upl_size_t              upl_size;
5878         upl_offset_t            upl_offset;
5879         vm_object_t             upl_object;
5880         vm_page_t               page;
5881         vm_object_t             shadow_object;
5882         vm_object_offset_t      shadow_offset;
5883         vm_object_offset_t      paging_offset;
5884         vm_object_offset_t      base_offset;
5885
5886         upl_encrypt_upls++;
5887         upl_encrypt_pages += crypt_size / PAGE_SIZE;
5888
5889         upl_lock(upl);
5890
5891         upl_object = upl->map_object;
5892         upl_offset = upl->offset;
5893         upl_size = upl->size;
5894
5895         upl_unlock(upl);
5896
5897         vm_object_lock(upl_object);
5898
5899         /*
5900          * Find the VM object that contains the actual pages.
5901          */
5902         if (upl_object->pageout) {
5903                 shadow_object = upl_object->shadow;
5904                 /*
5905                  * The offset in the shadow object is actually also
5906                  * accounted for in upl->offset.  It possibly shouldn't be
5907                  * this way, but for now don't account for it twice.
5908                  */
5909                 shadow_offset = 0;
5910                 assert(upl_object->paging_offset == 0); /* XXX ? */
5911                 vm_object_lock(shadow_object);
5912         } else {
5913                 shadow_object = upl_object;
5914                 shadow_offset = 0;
5915         }
5916
5917         paging_offset = shadow_object->paging_offset;
5918         vm_object_paging_begin(shadow_object);
5919
5920         if (shadow_object != upl_object) {
5921                 vm_object_unlock(shadow_object);
5922         }
5923         vm_object_unlock(upl_object);
5924
5925         base_offset = shadow_offset;
5926         base_offset += upl_offset;
5927         base_offset += crypt_offset;
5928         base_offset -= paging_offset;
5929         /*
5930          * Unmap the pages, so that nobody can continue accessing them while
5931          * they're encrypted.  After that point, all accesses to these pages
5932          * will cause a page fault and block while the page is being encrypted
5933          * (busy).  After the encryption completes, any access will cause a
5934          * page fault and the page gets decrypted at that time.
5935          */
5936         assert(crypt_offset + crypt_size <= upl_size);
5937         vm_object_pmap_protect(shadow_object,
5938                                base_offset,
5939                                (vm_object_size_t)crypt_size,
5940                                PMAP_NULL,
5941                                0,
5942                                VM_PROT_NONE);
5943
5944         /* XXX FBDP could the object have changed significantly here ? */
5945         vm_object_lock(shadow_object);
5946
5947         for (upl_offset = 0;
5948              upl_offset < crypt_size;
5949              upl_offset += PAGE_SIZE) {
5950                 page = vm_page_lookup(shadow_object,
5951                                       base_offset + upl_offset);
5952                 if (page == VM_PAGE_NULL) {
5953                         panic("upl_encrypt: "
5954                               "no page for (obj=%p,off=%lld+%d)!\n",
5955                               shadow_object,
5956                               base_offset,
5957                               upl_offset);
5958                 }
5959                 vm_page_encrypt(page, 0);
5960         }
5961
5962         vm_object_paging_end(shadow_object);
5963         vm_object_unlock(shadow_object);
5964 }
5965
5966 vm_size_t
5967 upl_get_internal_pagelist_offset(void)
5968 {
5969         return sizeof(struct upl);
5970 }
5971
5972 void
5973 upl_set_dirty(
5974         upl_t   upl)
5975 {
5976         upl->flags |= UPL_CLEAR_DIRTY;
5977 }
5978
5979 void
5980 upl_clear_dirty(
5981         upl_t   upl)
5982 {
5983         upl->flags &= ~UPL_CLEAR_DIRTY;
5984 }
5985
5986
5987 #ifdef MACH_BSD
5988
5989 boolean_t  upl_page_present(upl_page_info_t *upl, int index)
5990 {
5991         return(UPL_PAGE_PRESENT(upl, index));
5992 }
5993 boolean_t  upl_dirty_page(upl_page_info_t *upl, int index)
5994 {
5995         return(UPL_DIRTY_PAGE(upl, index));
5996 }
5997 boolean_t  upl_valid_page(upl_page_info_t *upl, int index)
5998 {
5999         return(UPL_VALID_PAGE(upl, index));
6000 }
6001 ppnum_t  upl_phys_page(upl_page_info_t *upl, int index)
6002 {
6003         return(UPL_PHYS_PAGE(upl, index));
6004 }
6005
6006 void
6007 vm_countdirtypages(void)
6008 {
6009         vm_page_t m;
6010         int dpages;
6011         int pgopages;
6012         int precpages;
6013
6014
6015         dpages=0;
6016         pgopages=0;
6017         precpages=0;
6018
6019         vm_page_lock_queues();
6020         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6021         do {
6022                 if (m ==(vm_page_t )0) break;
6023
6024                 if(m->dirty) dpages++;
6025                 if(m->pageout) pgopages++;
6026                 if(m->precious) precpages++;
6027
6028                 assert(m->object != kernel_object);
6029                 m = (vm_page_t) queue_next(&m->pageq);
6030                 if (m ==(vm_page_t )0) break;
6031
6032         } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
6033         vm_page_unlock_queues();
6034
6035         vm_page_lock_queues();
6036         m = (vm_page_t) queue_first(&vm_page_queue_zf);
6037         do {
6038                 if (m ==(vm_page_t )0) break;
6039
6040                 if(m->dirty) dpages++;
6041                 if(m->pageout) pgopages++;
6042                 if(m->precious) precpages++;
6043
6044                 assert(m->object != kernel_object);
6045                 m = (vm_page_t) queue_next(&m->pageq);
6046                 if (m ==(vm_page_t )0) break;
6047
6048         } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
6049         vm_page_unlock_queues();
6050
6051         printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
6052
6053         dpages=0;
6054         pgopages=0;
6055         precpages=0;
6056
6057         vm_page_lock_queues();
6058         m = (vm_page_t) queue_first(&vm_page_queue_active);
6059
6060         do {
6061                 if(m == (vm_page_t )0) break;
6062                 if(m->dirty) dpages++;
6063                 if(m->pageout) pgopages++;
6064                 if(m->precious) precpages++;
6065
6066                 assert(m->object != kernel_object);
6067                 m = (vm_page_t) queue_next(&m->pageq);
6068                 if(m == (vm_page_t )0) break;
6069
6070         } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
6071         vm_page_unlock_queues();
6072
6073         printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
6074
6075 }
6076 #endif /* MACH_BSD */
6077
6078 #ifdef UPL_DEBUG
6079 kern_return_t  upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
6080 {
6081         upl->ubc_alias1 = alias1;
6082         upl->ubc_alias2 = alias2;
6083         return KERN_SUCCESS;
6084 }
6085 int  upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
6086 {
6087         if(al)
6088                 *al = upl->ubc_alias1;
6089         if(al2)
6090                 *al2 = upl->ubc_alias2;
6091         return KERN_SUCCESS;
6092 }
6093 #endif /* UPL_DEBUG */
6094
6095
6096
6097 #if     MACH_KDB
6098 #include <ddb/db_output.h>
6099 #include <ddb/db_print.h>
6100 #include <vm/vm_print.h>
6101
6102 #define printf  kdbprintf
6103 void            db_pageout(void);
6104
6105 void
6106 db_vm(void)
6107 {
6108
6109         iprintf("VM Statistics:\n");
6110         db_indent += 2;
6111         iprintf("pages:\n");
6112         db_indent += 2;
6113         iprintf("activ %5d  inact %5d  free  %5d",
6114                 vm_page_active_count, vm_page_inactive_count,
6115                 vm_page_free_count);
6116         printf("   wire  %5d  gobbl %5d\n",
6117                vm_page_wire_count, vm_page_gobble_count);
6118         db_indent -= 2;
6119         iprintf("target:\n");
6120         db_indent += 2;
6121         iprintf("min   %5d  inact %5d  free  %5d",
6122                 vm_page_free_min, vm_page_inactive_target,
6123                 vm_page_free_target);
6124         printf("   resrv %5d\n", vm_page_free_reserved);
6125         db_indent -= 2;
6126         iprintf("pause:\n");
6127         db_pageout();
6128         db_indent -= 2;
6129 }
6130
6131 #if     MACH_COUNTERS
6132 extern int c_laundry_pages_freed;
6133 #endif  /* MACH_COUNTERS */
6134
6135 void
6136 db_pageout(void)
6137 {
6138         iprintf("Pageout Statistics:\n");
6139         db_indent += 2;
6140         iprintf("active %5d  inactv %5d\n",
6141                 vm_pageout_active, vm_pageout_inactive);
6142         iprintf("nolock %5d  avoid  %5d  busy   %5d  absent %5d\n",
6143                 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
6144                 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
6145         iprintf("used   %5d  clean  %5d  dirty  %5d\n",
6146                 vm_pageout_inactive_used, vm_pageout_inactive_clean,
6147                 vm_pageout_inactive_dirty);
6148 #if     MACH_COUNTERS
6149         iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
6150 #endif  /* MACH_COUNTERS */
6151 #if     MACH_CLUSTER_STATS
6152         iprintf("Cluster Statistics:\n");
6153         db_indent += 2;
6154         iprintf("dirtied   %5d   cleaned  %5d   collisions  %5d\n",
6155                 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
6156                 vm_pageout_cluster_collisions);
6157         iprintf("clusters  %5d   conversions  %5d\n",
6158                 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
6159         db_indent -= 2;
6160         iprintf("Target Statistics:\n");
6161         db_indent += 2;
6162         iprintf("collisions   %5d   page_dirtied  %5d   page_freed  %5d\n",
6163                 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
6164                 vm_pageout_target_page_freed);
6165         db_indent -= 2;
6166 #endif  /* MACH_CLUSTER_STATS */
6167         db_indent -= 2;
6168 }
6169
6170 #endif  /* MACH_KDB */