osfmk/vm/vm_pageout.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * @OSF_COPYRIGHT@
  24  */
  25 /*
  26  * Mach Operating System
  27  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  28  * All Rights Reserved.
  29  *
  30  * Permission to use, copy, modify and distribute this software and its
  31  * documentation is hereby granted, provided that both the copyright
  32  * notice and this permission notice appear in all copies of the
  33  * software, derivative works or modified versions, and any portions
  34  * thereof, and that both notices appear in supporting documentation.
  35  *
  36  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  37  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  38  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  39  *
  40  * Carnegie Mellon requests users of this software to return to
  41  *
  42  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  43  *  School of Computer Science
  44  *  Carnegie Mellon University
  45  *  Pittsburgh PA 15213-3890
  46  *
  47  * any improvements or extensions that they make and grant Carnegie Mellon
  48  * the rights to redistribute these changes.
  49  */
  50 /*
  51  */
  52 /*
  53  *      File:   vm/vm_pageout.c
  54  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  55  *      Date:   1985
  56  *
  57  *      The proverbial page-out daemon.
  58  */
  59
  60 #include <stdint.h>
  61
  62 #include <debug.h>
  63 #include <mach_pagemap.h>
  64 #include <mach_cluster_stats.h>
  65 #include <mach_kdb.h>
  66 #include <advisory_pageout.h>
  67
  68 #include <mach/mach_types.h>
  69 #include <mach/memory_object.h>
  70 #include <mach/memory_object_default.h>
  71 #include <mach/memory_object_control_server.h>
  72 #include <mach/mach_host_server.h>
  73 #include <mach/upl.h>
  74 #include <mach/vm_map.h>
  75 #include <mach/vm_param.h>
  76 #include <mach/vm_statistics.h>
  77
  78 #include <kern/kern_types.h>
  79 #include <kern/counters.h>
  80 #include <kern/host_statistics.h>
  81 #include <kern/machine.h>
  82 #include <kern/misc_protos.h>
  83 #include <kern/thread.h>
  84 #include <kern/xpr.h>
  85 #include <kern/kalloc.h>
  86
  87 #include <machine/vm_tuning.h>
  88
  89 #include <vm/pmap.h>
  90 #include <vm/vm_fault.h>
  91 #include <vm/vm_map.h>
  92 #include <vm/vm_object.h>
  93 #include <vm/vm_page.h>
  94 #include <vm/vm_pageout.h>
  95 #include <vm/vm_protos.h> /* must be last */
  96
  97 /*
  98  * ENCRYPTED SWAP:
  99  */
 100 #ifdef __ppc__
 101 #include <ppc/mappings.h>
 102 #endif /* __ppc__ */
 103 #include <../bsd/crypto/aes/aes.h>
 104
 105 extern ipc_port_t       memory_manager_default;
 106
 107
 108 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE
 109 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  10000  /* maximum iterations of the active queue to move pages to inactive */
 110 #endif
 111
 112 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE
 113 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
 114 #endif
 115
 116 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
 117 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
 118 #endif
 119
 120 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
 121 #define VM_PAGEOUT_INACTIVE_RELIEF 50   /* minimum number of pages to move to the inactive q */
 122 #endif
 123
 124 #ifndef VM_PAGE_LAUNDRY_MAX
 125 #define VM_PAGE_LAUNDRY_MAX     16UL    /* maximum pageouts on a given pageout queue */
 126 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
 127
 128 #ifndef VM_PAGEOUT_BURST_WAIT
 129 #define VM_PAGEOUT_BURST_WAIT   30      /* milliseconds per page */
 130 #endif  /* VM_PAGEOUT_BURST_WAIT */
 131
 132 #ifndef VM_PAGEOUT_EMPTY_WAIT
 133 #define VM_PAGEOUT_EMPTY_WAIT   200     /* milliseconds */
 134 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
 135
 136 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
 137 #define VM_PAGEOUT_DEADLOCK_WAIT        300     /* milliseconds */
 138 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
 139
 140 #ifndef VM_PAGEOUT_IDLE_WAIT
 141 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
 142 #endif  /* VM_PAGEOUT_IDLE_WAIT */
 143
 144
 145 /*
 146  *      To obtain a reasonable LRU approximation, the inactive queue
 147  *      needs to be large enough to give pages on it a chance to be
 148  *      referenced a second time.  This macro defines the fraction
 149  *      of active+inactive pages that should be inactive.
 150  *      The pageout daemon uses it to update vm_page_inactive_target.
 151  *
 152  *      If vm_page_free_count falls below vm_page_free_target and
 153  *      vm_page_inactive_count is below vm_page_inactive_target,
 154  *      then the pageout daemon starts running.
 155  */
 156
 157 #ifndef VM_PAGE_INACTIVE_TARGET
 158 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 3)
 159 #endif  /* VM_PAGE_INACTIVE_TARGET */
 160
 161 /*
 162  *      Once the pageout daemon starts running, it keeps going
 163  *      until vm_page_free_count meets or exceeds vm_page_free_target.
 164  */
 165
 166 #ifndef VM_PAGE_FREE_TARGET
 167 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
 168 #endif  /* VM_PAGE_FREE_TARGET */
 169
 170 /*
 171  *      The pageout daemon always starts running once vm_page_free_count
 172  *      falls below vm_page_free_min.
 173  */
 174
 175 #ifndef VM_PAGE_FREE_MIN
 176 #define VM_PAGE_FREE_MIN(free)  (10 + (free) / 100)
 177 #endif  /* VM_PAGE_FREE_MIN */
 178
 179 /*
 180  *      When vm_page_free_count falls below vm_page_free_reserved,
 181  *      only vm-privileged threads can allocate pages.  vm-privilege
 182  *      allows the pageout daemon and default pager (and any other
 183  *      associated threads needed for default pageout) to continue
 184  *      operation by dipping into the reserved pool of pages.
 185  */
 186
 187 #ifndef VM_PAGE_FREE_RESERVED
 188 #define VM_PAGE_FREE_RESERVED(n)        \
 189         ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
 190 #endif  /* VM_PAGE_FREE_RESERVED */
 191
 192
 193 /*
 194  * must hold the page queues lock to
 195  * manipulate this structure
 196  */
 197 struct vm_pageout_queue {
 198         queue_head_t    pgo_pending;    /* laundry pages to be processed by pager's iothread */
 199         unsigned int    pgo_laundry;    /* current count of laundry pages on queue or in flight */
 200         unsigned int    pgo_maxlaundry;
 201
 202         unsigned int    pgo_idle:1,     /* iothread is blocked waiting for work to do */
 203                         pgo_busy:1,     /* iothread is currently processing request from pgo_pending */
 204                         pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
 205                         :0;
 206 };
 207
 208 #define VM_PAGE_Q_THROTTLED(q)          \
 209         ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
 210
 211
 212 /*
 213  * Exported variable used to broadcast the activation of the pageout scan
 214  * Working Set uses this to throttle its use of pmap removes.  In this
 215  * way, code which runs within memory in an uncontested context does
 216  * not keep encountering soft faults.
 217  */
 218
 219 unsigned int    vm_pageout_scan_event_counter = 0;
 220
 221 /*
 222  * Forward declarations for internal routines.
 223  */
 224
 225 static void vm_pageout_garbage_collect(int);
 226 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
 227 static void vm_pageout_iothread_external(void);
 228 static void vm_pageout_iothread_internal(void);
 229 static void vm_pageout_queue_steal(vm_page_t);
 230
 231 extern void vm_pageout_continue(void);
 232 extern void vm_pageout_scan(void);
 233
 234 unsigned int vm_pageout_reserved_internal = 0;
 235 unsigned int vm_pageout_reserved_really = 0;
 236
 237 unsigned int vm_pageout_idle_wait = 0;          /* milliseconds */
 238 unsigned int vm_pageout_empty_wait = 0;         /* milliseconds */
 239 unsigned int vm_pageout_burst_wait = 0;         /* milliseconds */
 240 unsigned int vm_pageout_deadlock_wait = 0;      /* milliseconds */
 241 unsigned int vm_pageout_deadlock_relief = 0;
 242 unsigned int vm_pageout_inactive_relief = 0;
 243 unsigned int vm_pageout_burst_active_throttle = 0;
 244 unsigned int vm_pageout_burst_inactive_throttle = 0;
 245
 246 /*
 247  *      Protection against zero fill flushing live working sets derived
 248  *      from existing backing store and files
 249  */
 250 unsigned int vm_accellerate_zf_pageout_trigger = 400;
 251 unsigned int vm_zf_iterator;
 252 unsigned int vm_zf_iterator_count = 40;
 253 unsigned int last_page_zf;
 254 unsigned int vm_zf_count = 0;
 255
 256 /*
 257  *      These variables record the pageout daemon's actions:
 258  *      how many pages it looks at and what happens to those pages.
 259  *      No locking needed because only one thread modifies the variables.
 260  */
 261
 262 unsigned int vm_pageout_active = 0;             /* debugging */
 263 unsigned int vm_pageout_inactive = 0;           /* debugging */
 264 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
 265 unsigned int vm_pageout_inactive_forced = 0;    /* debugging */
 266 unsigned int vm_pageout_inactive_nolock = 0;    /* debugging */
 267 unsigned int vm_pageout_inactive_avoid = 0;     /* debugging */
 268 unsigned int vm_pageout_inactive_busy = 0;      /* debugging */
 269 unsigned int vm_pageout_inactive_absent = 0;    /* debugging */
 270 unsigned int vm_pageout_inactive_used = 0;      /* debugging */
 271 unsigned int vm_pageout_inactive_clean = 0;     /* debugging */
 272 unsigned int vm_pageout_inactive_dirty = 0;     /* debugging */
 273 unsigned int vm_pageout_dirty_no_pager = 0;     /* debugging */
 274 unsigned int vm_pageout_purged_objects = 0;     /* debugging */
 275 unsigned int vm_stat_discard = 0;               /* debugging */
 276 unsigned int vm_stat_discard_sent = 0;          /* debugging */
 277 unsigned int vm_stat_discard_failure = 0;       /* debugging */
 278 unsigned int vm_stat_discard_throttle = 0;      /* debugging */
 279
 280 unsigned int vm_pageout_scan_active_throttled = 0;
 281 unsigned int vm_pageout_scan_inactive_throttled = 0;
 282 unsigned int vm_pageout_scan_throttle = 0;                      /* debugging */
 283 unsigned int vm_pageout_scan_burst_throttle = 0;                /* debugging */
 284 unsigned int vm_pageout_scan_empty_throttle = 0;                /* debugging */
 285 unsigned int vm_pageout_scan_deadlock_detected = 0;             /* debugging */
 286 unsigned int vm_pageout_scan_active_throttle_success = 0;       /* debugging */
 287 unsigned int vm_pageout_scan_inactive_throttle_success = 0;     /* debugging */
 288 /*
 289  * Backing store throttle when BS is exhausted
 290  */
 291 unsigned int    vm_backing_store_low = 0;
 292
 293 unsigned int vm_pageout_out_of_line  = 0;
 294 unsigned int vm_pageout_in_place  = 0;
 295
 296 /*
 297  * ENCRYPTED SWAP:
 298  * counters and statistics...
 299  */
 300 unsigned long vm_page_decrypt_counter = 0;
 301 unsigned long vm_page_decrypt_for_upl_counter = 0;
 302 unsigned long vm_page_encrypt_counter = 0;
 303 unsigned long vm_page_encrypt_abort_counter = 0;
 304 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
 305 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
 306
 307
 308 struct  vm_pageout_queue vm_pageout_queue_internal;
 309 struct  vm_pageout_queue vm_pageout_queue_external;
 310
 311
 312 /*
 313  *      Routine:        vm_backing_store_disable
 314  *      Purpose:
 315  *              Suspend non-privileged threads wishing to extend
 316  *              backing store when we are low on backing store
 317  *              (Synchronized by caller)
 318  */
 319 void
 320 vm_backing_store_disable(
 321         boolean_t       disable)
 322 {
 323         if(disable) {
 324                 vm_backing_store_low = 1;
 325         } else {
 326                 if(vm_backing_store_low) {
 327                         vm_backing_store_low = 0;
 328                         thread_wakeup((event_t) &vm_backing_store_low);
 329                 }
 330         }
 331 }
 332
 333
 334 /*
 335  *      Routine:        vm_pageout_object_allocate
 336  *      Purpose:
 337  *              Allocate an object for use as out-of-line memory in a
 338  *              data_return/data_initialize message.
 339  *              The page must be in an unlocked object.
 340  *
 341  *              If the page belongs to a trusted pager, cleaning in place
 342  *              will be used, which utilizes a special "pageout object"
 343  *              containing private alias pages for the real page frames.
 344  *              Untrusted pagers use normal out-of-line memory.
 345  */
 346 vm_object_t
 347 vm_pageout_object_allocate(
 348         vm_page_t               m,
 349         vm_size_t               size,
 350         vm_object_offset_t      offset)
 351 {
 352         vm_object_t     object = m->object;
 353         vm_object_t     new_object;
 354
 355         assert(object->pager_ready);
 356
 357         new_object = vm_object_allocate(size);
 358
 359         if (object->pager_trusted) {
 360                 assert (offset < object->size);
 361
 362                 vm_object_lock(new_object);
 363                 new_object->pageout = TRUE;
 364                 new_object->shadow = object;
 365                 new_object->can_persist = FALSE;
 366                 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
 367                 new_object->shadow_offset = offset;
 368                 vm_object_unlock(new_object);
 369
 370                 /*
 371                  * Take a paging reference on the object. This will be dropped
 372                  * in vm_pageout_object_terminate()
 373                  */
 374                 vm_object_lock(object);
 375                 vm_object_paging_begin(object);
 376                 vm_page_lock_queues();
 377                 vm_page_unlock_queues();
 378                 vm_object_unlock(object);
 379
 380                 vm_pageout_in_place++;
 381         } else
 382                 vm_pageout_out_of_line++;
 383         return(new_object);
 384 }
 385
 386 #if MACH_CLUSTER_STATS
 387 unsigned long vm_pageout_cluster_dirtied = 0;
 388 unsigned long vm_pageout_cluster_cleaned = 0;
 389 unsigned long vm_pageout_cluster_collisions = 0;
 390 unsigned long vm_pageout_cluster_clusters = 0;
 391 unsigned long vm_pageout_cluster_conversions = 0;
 392 unsigned long vm_pageout_target_collisions = 0;
 393 unsigned long vm_pageout_target_page_dirtied = 0;
 394 unsigned long vm_pageout_target_page_freed = 0;
 395 #define CLUSTER_STAT(clause)    clause
 396 #else   /* MACH_CLUSTER_STATS */
 397 #define CLUSTER_STAT(clause)
 398 #endif  /* MACH_CLUSTER_STATS */
 399
 400 /*
 401  *      Routine:        vm_pageout_object_terminate
 402  *      Purpose:
 403  *              Destroy the pageout_object allocated by
 404  *              vm_pageout_object_allocate(), and perform all of the
 405  *              required cleanup actions.
 406  *
 407  *      In/Out conditions:
 408  *              The object must be locked, and will be returned locked.
 409  */
 410 void
 411 vm_pageout_object_terminate(
 412         vm_object_t     object)
 413 {
 414         vm_object_t     shadow_object;
 415         boolean_t       shadow_internal;
 416
 417         /*
 418          * Deal with the deallocation (last reference) of a pageout object
 419          * (used for cleaning-in-place) by dropping the paging references/
 420          * freeing pages in the original object.
 421          */
 422
 423         assert(object->pageout);
 424         shadow_object = object->shadow;
 425         vm_object_lock(shadow_object);
 426         shadow_internal = shadow_object->internal;
 427
 428         while (!queue_empty(&object->memq)) {
 429                 vm_page_t               p, m;
 430                 vm_object_offset_t      offset;
 431
 432                 p = (vm_page_t) queue_first(&object->memq);
 433
 434                 assert(p->private);
 435                 assert(p->pageout);
 436                 p->pageout = FALSE;
 437                 assert(!p->cleaning);
 438
 439                 offset = p->offset;
 440                 VM_PAGE_FREE(p);
 441                 p = VM_PAGE_NULL;
 442
 443                 m = vm_page_lookup(shadow_object,
 444                         offset + object->shadow_offset);
 445
 446                 if(m == VM_PAGE_NULL)
 447                         continue;
 448                 assert(m->cleaning);
 449                 /* used as a trigger on upl_commit etc to recognize the */
 450                 /* pageout daemon's subseqent desire to pageout a cleaning */
 451                 /* page.  When the bit is on the upl commit code will   */
 452                 /* respect the pageout bit in the target page over the  */
 453                 /* caller's page list indication */
 454                 m->dump_cleaning = FALSE;
 455
 456                 /*
 457                  * Account for the paging reference taken when
 458                  * m->cleaning was set on this page.
 459                  */
 460                 vm_object_paging_end(shadow_object);
 461                 assert((m->dirty) || (m->precious) ||
 462                                 (m->busy && m->cleaning));
 463
 464                 /*
 465                  * Handle the trusted pager throttle.
 466                  * Also decrement the burst throttle (if external).
 467                  */
 468                 vm_page_lock_queues();
 469                 if (m->laundry) {
 470                         vm_pageout_throttle_up(m);
 471                 }
 472
 473                 /*
 474                  * Handle the "target" page(s). These pages are to be freed if
 475                  * successfully cleaned. Target pages are always busy, and are
 476                  * wired exactly once. The initial target pages are not mapped,
 477                  * (so cannot be referenced or modified) but converted target
 478                  * pages may have been modified between the selection as an
 479                  * adjacent page and conversion to a target.
 480                  */
 481                 if (m->pageout) {
 482                         assert(m->busy);
 483                         assert(m->wire_count == 1);
 484                         m->cleaning = FALSE;
 485                         m->pageout = FALSE;
 486 #if MACH_CLUSTER_STATS
 487                         if (m->wanted) vm_pageout_target_collisions++;
 488 #endif
 489                         /*
 490                          * Revoke all access to the page. Since the object is
 491                          * locked, and the page is busy, this prevents the page
 492                          * from being dirtied after the pmap_disconnect() call
 493                          * returns.
 494                          *
 495                          * Since the page is left "dirty" but "not modifed", we
 496                          * can detect whether the page was redirtied during
 497                          * pageout by checking the modify state.
 498                          */
 499                         if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
 500                               m->dirty = TRUE;
 501                         else
 502                               m->dirty = FALSE;
 503
 504                         if (m->dirty) {
 505                                 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
 506                                 vm_page_unwire(m);/* reactivates */
 507                                 VM_STAT(reactivations++);
 508                                 PAGE_WAKEUP_DONE(m);
 509                         } else {
 510                                 CLUSTER_STAT(vm_pageout_target_page_freed++;)
 511                                 vm_page_free(m);/* clears busy, etc. */
 512                         }
 513                         vm_page_unlock_queues();
 514                         continue;
 515                 }
 516                 /*
 517                  * Handle the "adjacent" pages. These pages were cleaned in
 518                  * place, and should be left alone.
 519                  * If prep_pin_count is nonzero, then someone is using the
 520                  * page, so make it active.
 521                  */
 522                 if (!m->active && !m->inactive && !m->private) {
 523                         if (m->reference)
 524                                 vm_page_activate(m);
 525                         else
 526                                 vm_page_deactivate(m);
 527                 }
 528                 if((m->busy) && (m->cleaning)) {
 529
 530                         /* the request_page_list case, (COPY_OUT_FROM FALSE) */
 531                         m->busy = FALSE;
 532
 533                         /* We do not re-set m->dirty ! */
 534                         /* The page was busy so no extraneous activity     */
 535                         /* could have occurred. COPY_INTO is a read into the */
 536                         /* new pages. CLEAN_IN_PLACE does actually write   */
 537                         /* out the pages but handling outside of this code */
 538                         /* will take care of resetting dirty. We clear the */
 539                         /* modify however for the Programmed I/O case.     */
 540                         pmap_clear_modify(m->phys_page);
 541                         if(m->absent) {
 542                                 m->absent = FALSE;
 543                                 if(shadow_object->absent_count == 1)
 544                                         vm_object_absent_release(shadow_object);
 545                                 else
 546                                         shadow_object->absent_count--;
 547                         }
 548                         m->overwriting = FALSE;
 549                 } else if (m->overwriting) {
 550                         /* alternate request page list, write to page_list */
 551                         /* case.  Occurs when the original page was wired  */
 552                         /* at the time of the list request */
 553                         assert(m->wire_count != 0);
 554                         vm_page_unwire(m);/* reactivates */
 555                         m->overwriting = FALSE;
 556                 } else {
 557                 /*
 558                  * Set the dirty state according to whether or not the page was
 559                  * modified during the pageout. Note that we purposefully do
 560                  * NOT call pmap_clear_modify since the page is still mapped.
 561                  * If the page were to be dirtied between the 2 calls, this
 562                  * this fact would be lost. This code is only necessary to
 563                  * maintain statistics, since the pmap module is always
 564                  * consulted if m->dirty is false.
 565                  */
 566 #if MACH_CLUSTER_STATS
 567                         m->dirty = pmap_is_modified(m->phys_page);
 568
 569                         if (m->dirty)   vm_pageout_cluster_dirtied++;
 570                         else            vm_pageout_cluster_cleaned++;
 571                         if (m->wanted)  vm_pageout_cluster_collisions++;
 572 #else
 573                         m->dirty = 0;
 574 #endif
 575                 }
 576                 m->cleaning = FALSE;
 577
 578                 /*
 579                  * Wakeup any thread waiting for the page to be un-cleaning.
 580                  */
 581                 PAGE_WAKEUP(m);
 582                 vm_page_unlock_queues();
 583         }
 584         /*
 585          * Account for the paging reference taken in vm_paging_object_allocate.
 586          */
 587         vm_object_paging_end(shadow_object);
 588         vm_object_unlock(shadow_object);
 589
 590         assert(object->ref_count == 0);
 591         assert(object->paging_in_progress == 0);
 592         assert(object->resident_page_count == 0);
 593         return;
 594 }
 595
 596 /*
 597  *      Routine:        vm_pageout_setup
 598  *      Purpose:
 599  *              Set up a page for pageout (clean & flush).
 600  *
 601  *              Move the page to a new object, as part of which it will be
 602  *              sent to its memory manager in a memory_object_data_write or
 603  *              memory_object_initialize message.
 604  *
 605  *              The "new_object" and "new_offset" arguments
 606  *              indicate where the page should be moved.
 607  *
 608  *      In/Out conditions:
 609  *              The page in question must not be on any pageout queues,
 610  *              and must be busy.  The object to which it belongs
 611  *              must be unlocked, and the caller must hold a paging
 612  *              reference to it.  The new_object must not be locked.
 613  *
 614  *              This routine returns a pointer to a place-holder page,
 615  *              inserted at the same offset, to block out-of-order
 616  *              requests for the page.  The place-holder page must
 617  *              be freed after the data_write or initialize message
 618  *              has been sent.
 619  *
 620  *              The original page is put on a paging queue and marked
 621  *              not busy on exit.
 622  */
 623 vm_page_t
 624 vm_pageout_setup(
 625         register vm_page_t      m,
 626         register vm_object_t    new_object,
 627         vm_object_offset_t      new_offset)
 628 {
 629         register vm_object_t    old_object = m->object;
 630         vm_object_offset_t      paging_offset;
 631         vm_object_offset_t      offset;
 632         register vm_page_t      holding_page;
 633         register vm_page_t      new_m;
 634         boolean_t               need_to_wire = FALSE;
 635
 636
 637         XPR(XPR_VM_PAGEOUT,
 638      "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
 639                 (integer_t)m->object, (integer_t)m->offset,
 640                 (integer_t)m, (integer_t)new_object,
 641                 (integer_t)new_offset);
 642         assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
 643                 !m->restart);
 644
 645         assert(m->dirty || m->precious);
 646
 647         /*
 648          *      Create a place-holder page where the old one was, to prevent
 649          *      attempted pageins of this page while we're unlocked.
 650          */
 651         VM_PAGE_GRAB_FICTITIOUS(holding_page);
 652
 653         vm_object_lock(old_object);
 654
 655         offset = m->offset;
 656         paging_offset = offset + old_object->paging_offset;
 657
 658         if (old_object->pager_trusted) {
 659                 /*
 660                  * This pager is trusted, so we can clean this page
 661                  * in place. Leave it in the old object, and mark it
 662                  * cleaning & pageout.
 663                  */
 664                 new_m = holding_page;
 665                 holding_page = VM_PAGE_NULL;
 666
 667                 /*
 668                  * Set up new page to be private shadow of real page.
 669                  */
 670                 new_m->phys_page = m->phys_page;
 671                 new_m->fictitious = FALSE;
 672                 new_m->pageout = TRUE;
 673
 674                 /*
 675                  * Mark real page as cleaning (indicating that we hold a
 676                  * paging reference to be released via m_o_d_r_c) and
 677                  * pageout (indicating that the page should be freed
 678                  * when the pageout completes).
 679                  */
 680                 pmap_clear_modify(m->phys_page);
 681                 vm_page_lock_queues();
 682                 new_m->private = TRUE;
 683                 vm_page_wire(new_m);
 684                 m->cleaning = TRUE;
 685                 m->pageout = TRUE;
 686
 687                 vm_page_wire(m);
 688                 assert(m->wire_count == 1);
 689                 vm_page_unlock_queues();
 690
 691                 m->dirty = TRUE;
 692                 m->precious = FALSE;
 693                 m->page_lock = VM_PROT_NONE;
 694                 m->unusual = FALSE;
 695                 m->unlock_request = VM_PROT_NONE;
 696         } else {
 697                 /*
 698                  * Cannot clean in place, so rip the old page out of the
 699                  * object, and stick the holding page in. Set new_m to the
 700                  * page in the new object.
 701                  */
 702                 vm_page_lock_queues();
 703                 VM_PAGE_QUEUES_REMOVE(m);
 704                 vm_page_remove(m);
 705
 706                 vm_page_insert(holding_page, old_object, offset);
 707                 vm_page_unlock_queues();
 708
 709                 m->dirty = TRUE;
 710                 m->precious = FALSE;
 711                 new_m = m;
 712                 new_m->page_lock = VM_PROT_NONE;
 713                 new_m->unlock_request = VM_PROT_NONE;
 714
 715                 if (old_object->internal)
 716                         need_to_wire = TRUE;
 717         }
 718         /*
 719          *      Record that this page has been written out
 720          */
 721 #if     MACH_PAGEMAP
 722         vm_external_state_set(old_object->existence_map, offset);
 723 #endif  /* MACH_PAGEMAP */
 724
 725         vm_object_unlock(old_object);
 726
 727         vm_object_lock(new_object);
 728
 729         /*
 730          *      Put the page into the new object. If it is a not wired
 731          *      (if it's the real page) it will be activated.
 732          */
 733
 734         vm_page_lock_queues();
 735         vm_page_insert(new_m, new_object, new_offset);
 736         if (need_to_wire)
 737                 vm_page_wire(new_m);
 738         else
 739                 vm_page_activate(new_m);
 740         PAGE_WAKEUP_DONE(new_m);
 741         vm_page_unlock_queues();
 742
 743         vm_object_unlock(new_object);
 744
 745         /*
 746          *      Return the placeholder page to simplify cleanup.
 747          */
 748         return (holding_page);
 749 }
 750
 751 /*
 752  * Routine:     vm_pageclean_setup
 753  *
 754  * Purpose:     setup a page to be cleaned (made non-dirty), but not
 755  *              necessarily flushed from the VM page cache.
 756  *              This is accomplished by cleaning in place.
 757  *
 758  *              The page must not be busy, and the object and page
 759  *              queues must be locked.
 760  *
 761  */
 762 void
 763 vm_pageclean_setup(
 764         vm_page_t               m,
 765         vm_page_t               new_m,
 766         vm_object_t             new_object,
 767         vm_object_offset_t      new_offset)
 768 {
 769         vm_object_t old_object = m->object;
 770         assert(!m->busy);
 771         assert(!m->cleaning);
 772
 773         XPR(XPR_VM_PAGEOUT,
 774     "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
 775                 (integer_t)old_object, m->offset, (integer_t)m,
 776                 (integer_t)new_m, new_offset);
 777
 778         pmap_clear_modify(m->phys_page);
 779         vm_object_paging_begin(old_object);
 780
 781         /*
 782          *      Record that this page has been written out
 783          */
 784 #if     MACH_PAGEMAP
 785         vm_external_state_set(old_object->existence_map, m->offset);
 786 #endif  /*MACH_PAGEMAP*/
 787
 788         /*
 789          * Mark original page as cleaning in place.
 790          */
 791         m->cleaning = TRUE;
 792         m->dirty = TRUE;
 793         m->precious = FALSE;
 794
 795         /*
 796          * Convert the fictitious page to a private shadow of
 797          * the real page.
 798          */
 799         assert(new_m->fictitious);
 800         new_m->fictitious = FALSE;
 801         new_m->private = TRUE;
 802         new_m->pageout = TRUE;
 803         new_m->phys_page = m->phys_page;
 804         vm_page_wire(new_m);
 805
 806         vm_page_insert(new_m, new_object, new_offset);
 807         assert(!new_m->wanted);
 808         new_m->busy = FALSE;
 809 }
 810
 811 void
 812 vm_pageclean_copy(
 813         vm_page_t               m,
 814         vm_page_t               new_m,
 815         vm_object_t             new_object,
 816         vm_object_offset_t      new_offset)
 817 {
 818         XPR(XPR_VM_PAGEOUT,
 819         "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
 820                 m, new_m, new_object, new_offset, 0);
 821
 822         assert((!m->busy) && (!m->cleaning));
 823
 824         assert(!new_m->private && !new_m->fictitious);
 825
 826         pmap_clear_modify(m->phys_page);
 827
 828         m->busy = TRUE;
 829         vm_object_paging_begin(m->object);
 830         vm_page_unlock_queues();
 831         vm_object_unlock(m->object);
 832
 833         /*
 834          * Copy the original page to the new page.
 835          */
 836         vm_page_copy(m, new_m);
 837
 838         /*
 839          * Mark the old page as clean. A request to pmap_is_modified
 840          * will get the right answer.
 841          */
 842         vm_object_lock(m->object);
 843         m->dirty = FALSE;
 844
 845         vm_object_paging_end(m->object);
 846
 847         vm_page_lock_queues();
 848         if (!m->active && !m->inactive)
 849                 vm_page_activate(m);
 850         PAGE_WAKEUP_DONE(m);
 851
 852         vm_page_insert(new_m, new_object, new_offset);
 853         vm_page_activate(new_m);
 854         new_m->busy = FALSE;    /* No other thread can be waiting */
 855 }
 856
 857
 858 /*
 859  *      Routine:        vm_pageout_initialize_page
 860  *      Purpose:
 861  *              Causes the specified page to be initialized in
 862  *              the appropriate memory object. This routine is used to push
 863  *              pages into a copy-object when they are modified in the
 864  *              permanent object.
 865  *
 866  *              The page is moved to a temporary object and paged out.
 867  *
 868  *      In/out conditions:
 869  *              The page in question must not be on any pageout queues.
 870  *              The object to which it belongs must be locked.
 871  *              The page must be busy, but not hold a paging reference.
 872  *
 873  *      Implementation:
 874  *              Move this page to a completely new object.
 875  */
 876 void
 877 vm_pageout_initialize_page(
 878         vm_page_t       m)
 879 {
 880         vm_object_t             object;
 881         vm_object_offset_t      paging_offset;
 882         vm_page_t               holding_page;
 883
 884
 885         XPR(XPR_VM_PAGEOUT,
 886                 "vm_pageout_initialize_page, page 0x%X\n",
 887                 (integer_t)m, 0, 0, 0, 0);
 888         assert(m->busy);
 889
 890         /*
 891          *      Verify that we really want to clean this page
 892          */
 893         assert(!m->absent);
 894         assert(!m->error);
 895         assert(m->dirty);
 896
 897         /*
 898          *      Create a paging reference to let us play with the object.
 899          */
 900         object = m->object;
 901         paging_offset = m->offset + object->paging_offset;
 902         vm_object_paging_begin(object);
 903         if (m->absent || m->error || m->restart ||
 904             (!m->dirty && !m->precious)) {
 905                 VM_PAGE_FREE(m);
 906                 panic("reservation without pageout?"); /* alan */
 907              vm_object_unlock(object);
 908                 return;
 909         }
 910
 911         /* set the page for future call to vm_fault_list_request */
 912         holding_page = NULL;
 913         vm_page_lock_queues();
 914         pmap_clear_modify(m->phys_page);
 915         m->dirty = TRUE;
 916         m->busy = TRUE;
 917         m->list_req_pending = TRUE;
 918         m->cleaning = TRUE;
 919         m->pageout = TRUE;
 920         vm_page_wire(m);
 921         vm_page_unlock_queues();
 922         vm_object_unlock(object);
 923
 924         /*
 925          *      Write the data to its pager.
 926          *      Note that the data is passed by naming the new object,
 927          *      not a virtual address; the pager interface has been
 928          *      manipulated to use the "internal memory" data type.
 929          *      [The object reference from its allocation is donated
 930          *      to the eventual recipient.]
 931          */
 932         memory_object_data_initialize(object->pager,
 933                                         paging_offset,
 934                                         PAGE_SIZE);
 935
 936         vm_object_lock(object);
 937 }
 938
 939 #if     MACH_CLUSTER_STATS
 940 #define MAXCLUSTERPAGES 16
 941 struct {
 942         unsigned long pages_in_cluster;
 943         unsigned long pages_at_higher_offsets;
 944         unsigned long pages_at_lower_offsets;
 945 } cluster_stats[MAXCLUSTERPAGES];
 946 #endif  /* MACH_CLUSTER_STATS */
 947
 948 boolean_t allow_clustered_pageouts = FALSE;
 949
 950 /*
 951  * vm_pageout_cluster:
 952  *
 953  * Given a page, queue it to the appropriate I/O thread,
 954  * which will page it out and attempt to clean adjacent pages
 955  * in the same operation.
 956  *
 957  * The page must be busy, and the object and queues locked. We will take a
 958  * paging reference to prevent deallocation or collapse when we
 959  * release the object lock back at the call site.  The I/O thread
 960  * is responsible for consuming this reference
 961  *
 962  * The page must not be on any pageout queue.
 963  */
 964
 965 void
 966 vm_pageout_cluster(vm_page_t m)
 967 {
 968         vm_object_t     object = m->object;
 969         struct          vm_pageout_queue *q;
 970
 971
 972         XPR(XPR_VM_PAGEOUT,
 973                 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
 974                 (integer_t)object, m->offset, (integer_t)m, 0, 0);
 975
 976         /*
 977          * Only a certain kind of page is appreciated here.
 978          */
 979         assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
 980         assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
 981
 982         /*
 983          * protect the object from collapse -
 984          * locking in the object's paging_offset.
 985          */
 986         vm_object_paging_begin(object);
 987
 988         /*
 989          * set the page for future call to vm_fault_list_request
 990          * page should already be marked busy
 991          */
 992         vm_page_wire(m);
 993         m->list_req_pending = TRUE;
 994         m->cleaning = TRUE;
 995         m->pageout = TRUE;
 996         m->laundry = TRUE;
 997
 998         if (object->internal == TRUE)
 999                 q = &vm_pageout_queue_internal;
1000         else
1001                 q = &vm_pageout_queue_external;
1002         q->pgo_laundry++;
1003
1004         m->pageout_queue = TRUE;
1005         queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
1006
1007         if (q->pgo_idle == TRUE) {
1008                 q->pgo_idle = FALSE;
1009                 thread_wakeup((event_t) &q->pgo_pending);
1010         }
1011 }
1012
1013
1014 unsigned long vm_pageout_throttle_up_count = 0;
1015
1016 /*
1017  * A page is back from laundry.  See if there are some pages waiting to
1018  * go to laundry and if we can let some of them go now.
1019  *
1020  * Object and page queues must be locked.
1021  */
1022 void
1023 vm_pageout_throttle_up(
1024         vm_page_t       m)
1025 {
1026         struct vm_pageout_queue *q;
1027
1028         vm_pageout_throttle_up_count++;
1029
1030         assert(m->laundry);
1031         assert(m->object != VM_OBJECT_NULL);
1032         assert(m->object != kernel_object);
1033
1034         if (m->object->internal == TRUE)
1035                 q = &vm_pageout_queue_internal;
1036         else
1037                 q = &vm_pageout_queue_external;
1038
1039         m->laundry = FALSE;
1040         q->pgo_laundry--;
1041
1042         if (q->pgo_throttled == TRUE) {
1043                 q->pgo_throttled = FALSE;
1044                 thread_wakeup((event_t) &q->pgo_laundry);
1045         }
1046 }
1047
1048
1049 /*
1050  *      vm_pageout_scan does the dirty work for the pageout daemon.
1051  *      It returns with vm_page_queue_free_lock held and
1052  *      vm_page_free_wanted == 0.
1053  */
1054
1055 #define DELAYED_UNLOCK_LIMIT  (3 * MAX_UPL_TRANSFER)
1056
1057 #define FCS_IDLE                0
1058 #define FCS_DELAYED             1
1059 #define FCS_DEADLOCK_DETECTED   2
1060
1061 struct flow_control {
1062         int             state;
1063         mach_timespec_t ts;
1064 };
1065
1066 extern kern_return_t    sysclk_gettime(mach_timespec_t *);
1067
1068
1069 void
1070 vm_pageout_scan(void)
1071 {
1072         unsigned int loop_count = 0;
1073         unsigned int inactive_burst_count = 0;
1074         unsigned int active_burst_count = 0;
1075         vm_page_t   local_freeq = 0;
1076         int         local_freed = 0;
1077         int         delayed_unlock = 0;
1078         int         need_internal_inactive = 0;
1079         int         refmod_state = 0;
1080         int     vm_pageout_deadlock_target = 0;
1081         struct  vm_pageout_queue *iq;
1082         struct  vm_pageout_queue *eq;
1083         struct  flow_control    flow_control;
1084         boolean_t active_throttled = FALSE;
1085         boolean_t inactive_throttled = FALSE;
1086         mach_timespec_t         ts;
1087         unsigned int msecs = 0;
1088         vm_object_t     object;
1089
1090
1091         flow_control.state = FCS_IDLE;
1092         iq = &vm_pageout_queue_internal;
1093         eq = &vm_pageout_queue_external;
1094
1095         XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1096
1097 /*???*/ /*
1098          *      We want to gradually dribble pages from the active queue
1099          *      to the inactive queue.  If we let the inactive queue get
1100          *      very small, and then suddenly dump many pages into it,
1101          *      those pages won't get a sufficient chance to be referenced
1102          *      before we start taking them from the inactive queue.
1103          *
1104          *      We must limit the rate at which we send pages to the pagers.
1105          *      data_write messages consume memory, for message buffers and
1106          *      for map-copy objects.  If we get too far ahead of the pagers,
1107          *      we can potentially run out of memory.
1108          *
1109          *      We can use the laundry count to limit directly the number
1110          *      of pages outstanding to the default pager.  A similar
1111          *      strategy for external pagers doesn't work, because
1112          *      external pagers don't have to deallocate the pages sent them,
1113          *      and because we might have to send pages to external pagers
1114          *      even if they aren't processing writes.  So we also
1115          *      use a burst count to limit writes to external pagers.
1116          *
1117          *      When memory is very tight, we can't rely on external pagers to
1118          *      clean pages.  They probably aren't running, because they
1119          *      aren't vm-privileged.  If we kept sending dirty pages to them,
1120          *      we could exhaust the free list.
1121          */
1122         vm_page_lock_queues();
1123         delayed_unlock = 1;
1124
1125
1126 Restart:
1127         /*
1128          *      Recalculate vm_page_inactivate_target.
1129          */
1130         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1131                                                           vm_page_inactive_count);
1132         object = NULL;
1133
1134         for (;;) {
1135                 vm_page_t m;
1136
1137                 if (delayed_unlock == 0)
1138                         vm_page_lock_queues();
1139
1140                 active_burst_count = vm_page_active_count;
1141
1142                 if (active_burst_count > vm_pageout_burst_active_throttle)
1143                         active_burst_count = vm_pageout_burst_active_throttle;
1144
1145                 /*
1146                  *      Move pages from active to inactive.
1147                  */
1148                 while ((need_internal_inactive ||
1149                            vm_page_inactive_count < vm_page_inactive_target) &&
1150                        !queue_empty(&vm_page_queue_active) &&
1151                        ((active_burst_count--) > 0)) {
1152
1153                         vm_pageout_active++;
1154
1155                         m = (vm_page_t) queue_first(&vm_page_queue_active);
1156
1157                         assert(m->active && !m->inactive);
1158                         assert(!m->laundry);
1159                         assert(m->object != kernel_object);
1160
1161                         /*
1162                          * Try to lock object; since we've already got the
1163                          * page queues lock, we can only 'try' for this one.
1164                          * if the 'try' fails, we need to do a mutex_pause
1165                          * to allow the owner of the object lock a chance to
1166                          * run... otherwise, we're likely to trip over this
1167                          * object in the same state as we work our way through
1168                          * the queue... clumps of pages associated with the same
1169                          * object are fairly typical on the inactive and active queues
1170                          */
1171                         if (m->object != object) {
1172                                 if (object != NULL) {
1173                                         vm_object_unlock(object);
1174                                         object = NULL;
1175                                 }
1176                                 if (!vm_object_lock_try(m->object)) {
1177                                         /*
1178                                          * move page to end of active queue and continue
1179                                          */
1180                                         queue_remove(&vm_page_queue_active, m,
1181                                                      vm_page_t, pageq);
1182                                         queue_enter(&vm_page_queue_active, m,
1183                                                     vm_page_t, pageq);
1184
1185                                         goto done_with_activepage;
1186                                 }
1187                                 object = m->object;
1188                         }
1189                         /*
1190                          * if the page is BUSY, then we pull it
1191                          * off the active queue and leave it alone.
1192                          * when BUSY is cleared, it will get stuck
1193                          * back on the appropriate queue
1194                          */
1195                         if (m->busy) {
1196                                 queue_remove(&vm_page_queue_active, m,
1197                                              vm_page_t, pageq);
1198                                 m->pageq.next = NULL;
1199                                 m->pageq.prev = NULL;
1200
1201                                 if (!m->fictitious)
1202                                         vm_page_active_count--;
1203                                 m->active = FALSE;
1204
1205                                 goto done_with_activepage;
1206                         }
1207                         if (need_internal_inactive) {
1208                                 /*
1209                                  * If we're unable to make forward progress
1210                                  * with the current set of pages on the
1211                                  * inactive queue due to busy objects or
1212                                  * throttled pageout queues, then
1213                                  * move a page that is already clean
1214                                  * or belongs to a pageout queue that
1215                                  * isn't currently throttled
1216                                  */
1217                                 active_throttled = FALSE;
1218
1219                                 if (object->internal) {
1220                                         if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
1221                                                 active_throttled = TRUE;
1222                                 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1223                                                 active_throttled = TRUE;
1224                                 }
1225                                 if (active_throttled == TRUE) {
1226                                         if (!m->dirty) {
1227                                                 refmod_state = pmap_get_refmod(m->phys_page);
1228
1229                                                 if (refmod_state & VM_MEM_REFERENCED)
1230                                                         m->reference = TRUE;
1231                                                 if (refmod_state & VM_MEM_MODIFIED)
1232                                                         m->dirty = TRUE;
1233                                         }
1234                                         if (m->dirty || m->precious) {
1235                                                 /*
1236                                                  * page is dirty and targets a THROTTLED queue
1237                                                  * so all we can do is move it back to the
1238                                                  * end of the active queue to get it out
1239                                                  * of the way
1240                                                  */
1241                                                 queue_remove(&vm_page_queue_active, m,
1242                                                              vm_page_t, pageq);
1243                                                 queue_enter(&vm_page_queue_active, m,
1244                                                             vm_page_t, pageq);
1245
1246                                                 vm_pageout_scan_active_throttled++;
1247
1248                                                 goto done_with_activepage;
1249                                         }
1250                                 }
1251                                 vm_pageout_scan_active_throttle_success++;
1252                                 need_internal_inactive--;
1253                         }
1254                         /*
1255                          *      Deactivate the page while holding the object
1256                          *      locked, so we know the page is still not busy.
1257                          *      This should prevent races between pmap_enter
1258                          *      and pmap_clear_reference.  The page might be
1259                          *      absent or fictitious, but vm_page_deactivate
1260                          *      can handle that.
1261                          */
1262                         vm_page_deactivate(m);
1263 done_with_activepage:
1264                         if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1265
1266                                 if (object != NULL) {
1267                                         vm_object_unlock(object);
1268                                         object = NULL;
1269                                 }
1270                                 if (local_freeq) {
1271                                         vm_page_free_list(local_freeq);
1272
1273                                         local_freeq = 0;
1274                                         local_freed = 0;
1275                                 }
1276                                 delayed_unlock = 0;
1277                                 vm_page_unlock_queues();
1278
1279                                 mutex_pause();
1280                                 vm_page_lock_queues();
1281                                 /*
1282                                  * continue the while loop processing
1283                                  * the active queue... need to hold
1284                                  * the page queues lock
1285                                  */
1286                                 continue;
1287                         }
1288                 }
1289
1290
1291
1292                 /**********************************************************************
1293                  * above this point we're playing with the active queue
1294                  * below this point we're playing with the throttling mechanisms
1295                  * and the inactive queue
1296                  **********************************************************************/
1297
1298
1299
1300                 /*
1301                  *      We are done if we have met our target *and*
1302                  *      nobody is still waiting for a page.
1303                  */
1304                 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1305                         if (object != NULL) {
1306                                 vm_object_unlock(object);
1307                                 object = NULL;
1308                         }
1309                         if (local_freeq) {
1310                                 vm_page_free_list(local_freeq);
1311
1312                                 local_freeq = 0;
1313                                 local_freed = 0;
1314                         }
1315                         mutex_lock(&vm_page_queue_free_lock);
1316
1317                         if ((vm_page_free_count >= vm_page_free_target) &&
1318                                   (vm_page_free_wanted == 0)) {
1319
1320                                 vm_page_unlock_queues();
1321
1322                                 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1323                                 return;
1324                         }
1325                         mutex_unlock(&vm_page_queue_free_lock);
1326                 }
1327
1328
1329                 /*
1330                  * Sometimes we have to pause:
1331                  *      1) No inactive pages - nothing to do.
1332                  *      2) Flow control - default pageout queue is full
1333                  *      3) Loop control - no acceptable pages found on the inactive queue
1334                  *         within the last vm_pageout_burst_inactive_throttle iterations
1335                  */
1336                 if ((queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf))) {
1337                         vm_pageout_scan_empty_throttle++;
1338                         msecs = vm_pageout_empty_wait;
1339                         goto vm_pageout_scan_delay;
1340
1341                 } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
1342                         vm_pageout_scan_burst_throttle++;
1343                         msecs = vm_pageout_burst_wait;
1344                         goto vm_pageout_scan_delay;
1345
1346                 } else if (VM_PAGE_Q_THROTTLED(iq)) {
1347
1348                         switch (flow_control.state) {
1349
1350                         case FCS_IDLE:
1351 reset_deadlock_timer:
1352                                 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1353                                 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1354                                 sysclk_gettime(&flow_control.ts);
1355                                 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1356
1357                                 flow_control.state = FCS_DELAYED;
1358                                 msecs = vm_pageout_deadlock_wait;
1359
1360                                 break;
1361
1362                         case FCS_DELAYED:
1363                                 sysclk_gettime(&ts);
1364
1365                                 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1366                                         /*
1367                                          * the pageout thread for the default pager is potentially
1368                                          * deadlocked since the
1369                                          * default pager queue has been throttled for more than the
1370                                          * allowable time... we need to move some clean pages or dirty
1371                                          * pages belonging to the external pagers if they aren't throttled
1372                                          * vm_page_free_wanted represents the number of threads currently
1373                                          * blocked waiting for pages... we'll move one page for each of
1374                                          * these plus a fixed amount to break the logjam... once we're done
1375                                          * moving this number of pages, we'll re-enter the FSC_DELAYED state
1376                                          * with a new timeout target since we have no way of knowing
1377                                          * whether we've broken the deadlock except through observation
1378                                          * of the queue associated with the default pager... we need to
1379                                          * stop moving pagings and allow the system to run to see what
1380                                          * state it settles into.
1381                                          */
1382                                         vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted;
1383                                         vm_pageout_scan_deadlock_detected++;
1384                                         flow_control.state = FCS_DEADLOCK_DETECTED;
1385
1386                                         thread_wakeup((event_t) &vm_pageout_garbage_collect);
1387                                         goto consider_inactive;
1388                                 }
1389                                 /*
1390                                  * just resniff instead of trying
1391                                  * to compute a new delay time... we're going to be
1392                                  * awakened immediately upon a laundry completion,
1393                                  * so we won't wait any longer than necessary
1394                                  */
1395                                 msecs = vm_pageout_idle_wait;
1396                                 break;
1397
1398                         case FCS_DEADLOCK_DETECTED:
1399                                 if (vm_pageout_deadlock_target)
1400                                         goto consider_inactive;
1401                                 goto reset_deadlock_timer;
1402
1403                         }
1404                         vm_pageout_scan_throttle++;
1405                         iq->pgo_throttled = TRUE;
1406 vm_pageout_scan_delay:
1407                         if (object != NULL) {
1408                                 vm_object_unlock(object);
1409                                 object = NULL;
1410                         }
1411                         if (local_freeq) {
1412                                 vm_page_free_list(local_freeq);
1413
1414                                 local_freeq = 0;
1415                                 local_freed = 0;
1416                         }
1417                         assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1418
1419                         counter(c_vm_pageout_scan_block++);
1420
1421                         vm_page_unlock_queues();
1422
1423                         thread_block(THREAD_CONTINUE_NULL);
1424
1425                         vm_page_lock_queues();
1426                         delayed_unlock = 1;
1427
1428                         iq->pgo_throttled = FALSE;
1429
1430                         if (loop_count >= vm_page_inactive_count) {
1431                                 if (VM_PAGE_Q_THROTTLED(eq) || VM_PAGE_Q_THROTTLED(iq)) {
1432                                         /*
1433                                          * Make sure we move enough "appropriate"
1434                                          * pages to the inactive queue before trying
1435                                          * again.
1436                                          */
1437                                         need_internal_inactive = vm_pageout_inactive_relief;
1438                                 }
1439                                 loop_count = 0;
1440                         }
1441                         inactive_burst_count = 0;
1442
1443                         goto Restart;
1444                         /*NOTREACHED*/
1445                 }
1446
1447
1448                 flow_control.state = FCS_IDLE;
1449 consider_inactive:
1450                 loop_count++;
1451                 inactive_burst_count++;
1452                 vm_pageout_inactive++;
1453
1454                 if (!queue_empty(&vm_page_queue_inactive)) {
1455                         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1456
1457                         if (m->clustered && (m->no_isync == TRUE)) {
1458                                 goto use_this_page;
1459                         }
1460                 }
1461                 if (vm_zf_count < vm_accellerate_zf_pageout_trigger) {
1462                         vm_zf_iterator = 0;
1463                 } else {
1464                         last_page_zf = 0;
1465                         if((vm_zf_iterator+=1) >= vm_zf_iterator_count) {
1466                                         vm_zf_iterator = 0;
1467                         }
1468                 }
1469                 if (queue_empty(&vm_page_queue_zf) ||
1470                                 (((last_page_zf) || (vm_zf_iterator == 0)) &&
1471                                 !queue_empty(&vm_page_queue_inactive))) {
1472                         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1473                         last_page_zf = 0;
1474                 } else {
1475                         m = (vm_page_t) queue_first(&vm_page_queue_zf);
1476                         last_page_zf = 1;
1477                 }
1478 use_this_page:
1479                 assert(!m->active && m->inactive);
1480                 assert(!m->laundry);
1481                 assert(m->object != kernel_object);
1482
1483                 /*
1484                  * Try to lock object; since we've alread got the
1485                  * page queues lock, we can only 'try' for this one.
1486                  * if the 'try' fails, we need to do a mutex_pause
1487                  * to allow the owner of the object lock a chance to
1488                  * run... otherwise, we're likely to trip over this
1489                  * object in the same state as we work our way through
1490                  * the queue... clumps of pages associated with the same
1491                  * object are fairly typical on the inactive and active queues
1492                  */
1493                 if (m->object != object) {
1494                         if (object != NULL) {
1495                                 vm_object_unlock(object);
1496                                 object = NULL;
1497                         }
1498                         if (!vm_object_lock_try(m->object)) {
1499                                 /*
1500                                  *      Move page to end and continue.
1501                                  *      Don't re-issue ticket
1502                                  */
1503                                 if (m->zero_fill) {
1504                                         queue_remove(&vm_page_queue_zf, m,
1505                                                      vm_page_t, pageq);
1506                                         queue_enter(&vm_page_queue_zf, m,
1507                                                     vm_page_t, pageq);
1508                                 } else {
1509                                         queue_remove(&vm_page_queue_inactive, m,
1510                                                      vm_page_t, pageq);
1511                                         queue_enter(&vm_page_queue_inactive, m,
1512                                                     vm_page_t, pageq);
1513                                 }
1514                                 vm_pageout_inactive_nolock++;
1515
1516                                 /*
1517                                  * force us to dump any collected free pages
1518                                  * and to pause before moving on
1519                                  */
1520                                 delayed_unlock = DELAYED_UNLOCK_LIMIT + 1;
1521
1522                                 goto done_with_inactivepage;
1523                         }
1524                         object = m->object;
1525                 }
1526                 /*
1527                  * If the page belongs to a purgable object with no pending copies
1528                  * against it, then we reap all of the pages in the object
1529                  * and note that the object has been "emptied".  It'll be up to the
1530                  * application the discover this and recreate its contents if desired.
1531                  */
1532                 if ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
1533                      object->purgable == VM_OBJECT_PURGABLE_EMPTY) &&
1534                     object->copy == VM_OBJECT_NULL) {
1535
1536                         (void) vm_object_purge(object);
1537                         vm_pageout_purged_objects++;
1538                         /*
1539                          * we've just taken all of the pages from this object,
1540                          * so drop the lock now since we're not going to find
1541                          * any more pages belonging to it anytime soon
1542                          */
1543                         vm_object_unlock(object);
1544                         object = NULL;
1545
1546                         inactive_burst_count = 0;
1547
1548                         goto done_with_inactivepage;
1549                 }
1550
1551                 /*
1552                  *      Paging out pages of external objects which
1553                  *      are currently being created must be avoided.
1554                  *      The pager may claim for memory, thus leading to a
1555                  *      possible dead lock between it and the pageout thread,
1556                  *      if such pages are finally chosen. The remaining assumption
1557                  *      is that there will finally be enough available pages in the
1558                  *      inactive pool to page out in order to satisfy all memory
1559                  *      claimed by the thread which concurrently creates the pager.
1560                  */
1561                 if (!object->pager_initialized && object->pager_created) {
1562                         /*
1563                          *      Move page to end and continue, hoping that
1564                          *      there will be enough other inactive pages to
1565                          *      page out so that the thread which currently
1566                          *      initializes the pager will succeed.
1567                          *      Don't re-grant the ticket, the page should
1568                          *      pulled from the queue and paged out whenever
1569                          *      one of its logically adjacent fellows is
1570                          *      targeted.
1571                          */
1572                         if (m->zero_fill) {
1573                                 queue_remove(&vm_page_queue_zf, m,
1574                                              vm_page_t, pageq);
1575                                 queue_enter(&vm_page_queue_zf, m,
1576                                             vm_page_t, pageq);
1577                                 last_page_zf = 1;
1578                                 vm_zf_iterator = vm_zf_iterator_count - 1;
1579                         } else {
1580                                 queue_remove(&vm_page_queue_inactive, m,
1581                                              vm_page_t, pageq);
1582                                 queue_enter(&vm_page_queue_inactive, m,
1583                                             vm_page_t, pageq);
1584                                 last_page_zf = 0;
1585                                 vm_zf_iterator = 1;
1586                         }
1587                         vm_pageout_inactive_avoid++;
1588
1589                         goto done_with_inactivepage;
1590                 }
1591                 /*
1592                  *      Remove the page from the inactive list.
1593                  */
1594                 if (m->zero_fill) {
1595                         queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1596                 } else {
1597                         queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1598                 }
1599                 m->pageq.next = NULL;
1600                 m->pageq.prev = NULL;
1601                 m->inactive = FALSE;
1602                 if (!m->fictitious)
1603                         vm_page_inactive_count--;
1604
1605                 if (m->busy || !object->alive) {
1606                         /*
1607                          *      Somebody is already playing with this page.
1608                          *      Leave it off the pageout queues.
1609                          */
1610                         vm_pageout_inactive_busy++;
1611
1612                         goto done_with_inactivepage;
1613                 }
1614
1615                 /*
1616                  *      If it's absent or in error, we can reclaim the page.
1617                  */
1618
1619                 if (m->absent || m->error) {
1620                         vm_pageout_inactive_absent++;
1621 reclaim_page:
1622                         if (vm_pageout_deadlock_target) {
1623                                 vm_pageout_scan_inactive_throttle_success++;
1624                                 vm_pageout_deadlock_target--;
1625                         }
1626                         if (m->tabled)
1627                                 vm_page_remove(m);    /* clears tabled, object, offset */
1628                         if (m->absent)
1629                                 vm_object_absent_release(object);
1630
1631                         assert(m->pageq.next == NULL &&
1632                                m->pageq.prev == NULL);
1633                         m->pageq.next = (queue_entry_t)local_freeq;
1634                         local_freeq = m;
1635                         local_freed++;
1636
1637                         inactive_burst_count = 0;
1638
1639                         goto done_with_inactivepage;
1640                 }
1641
1642                 assert(!m->private);
1643                 assert(!m->fictitious);
1644
1645                 /*
1646                  *      If already cleaning this page in place, convert from
1647                  *      "adjacent" to "target". We can leave the page mapped,
1648                  *      and vm_pageout_object_terminate will determine whether
1649                  *      to free or reactivate.
1650                  */
1651
1652                 if (m->cleaning) {
1653                         m->busy = TRUE;
1654                         m->pageout = TRUE;
1655                         m->dump_cleaning = TRUE;
1656                         vm_page_wire(m);
1657
1658                         CLUSTER_STAT(vm_pageout_cluster_conversions++);
1659
1660                         inactive_burst_count = 0;
1661
1662                         goto done_with_inactivepage;
1663                 }
1664
1665                 /*
1666                  *      If it's being used, reactivate.
1667                  *      (Fictitious pages are either busy or absent.)
1668                  */
1669                 if ( (!m->reference) ) {
1670                         refmod_state = pmap_get_refmod(m->phys_page);
1671
1672                         if (refmod_state & VM_MEM_REFERENCED)
1673                                 m->reference = TRUE;
1674                         if (refmod_state & VM_MEM_MODIFIED)
1675                                 m->dirty = TRUE;
1676                 }
1677                 if (m->reference) {
1678 was_referenced:
1679                         vm_page_activate(m);
1680                         VM_STAT(reactivations++);
1681
1682                         vm_pageout_inactive_used++;
1683                         last_page_zf = 0;
1684                         inactive_burst_count = 0;
1685
1686                         goto done_with_inactivepage;
1687                 }
1688
1689                 XPR(XPR_VM_PAGEOUT,
1690                 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1691                 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1692
1693                 /*
1694                  * we've got a candidate page to steal...
1695                  *
1696                  * m->dirty is up to date courtesy of the
1697                  * preceding check for m->reference... if
1698                  * we get here, then m->reference had to be
1699                  * FALSE which means we did a pmap_get_refmod
1700                  * and updated both m->reference and m->dirty
1701                  *
1702                  * if it's dirty or precious we need to
1703                  * see if the target queue is throtttled
1704                  * it if is, we need to skip over it by moving it back
1705                  * to the end of the inactive queue
1706                  */
1707                 inactive_throttled = FALSE;
1708
1709                 if (m->dirty || m->precious) {
1710                         if (object->internal) {
1711                                 if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
1712                                         inactive_throttled = TRUE;
1713                         } else if (VM_PAGE_Q_THROTTLED(eq)) {
1714                                         inactive_throttled = TRUE;
1715                         }
1716                 }
1717                 if (inactive_throttled == TRUE) {
1718                         if (m->zero_fill) {
1719                                 queue_enter(&vm_page_queue_zf, m,
1720                                             vm_page_t, pageq);
1721                         } else {
1722                                 queue_enter(&vm_page_queue_inactive, m,
1723                                             vm_page_t, pageq);
1724                         }
1725                         if (!m->fictitious)
1726                                 vm_page_inactive_count++;
1727                         m->inactive = TRUE;
1728
1729                         vm_pageout_scan_inactive_throttled++;
1730
1731                         goto done_with_inactivepage;
1732                 }
1733                 /*
1734                  * we've got a page that we can steal...
1735                  * eliminate all mappings and make sure
1736                  * we have the up-to-date modified state
1737                  * first take the page BUSY, so that no new
1738                  * mappings can be made
1739                  */
1740                 m->busy = TRUE;
1741
1742                 /*
1743                  * if we need to do a pmap_disconnect then we
1744                  * need to re-evaluate m->dirty since the pmap_disconnect
1745                  * provides the true state atomically... the
1746                  * page was still mapped up to the pmap_disconnect
1747                  * and may have been dirtied at the last microsecond
1748                  *
1749                  * we also check for the page being referenced 'late'
1750                  * if it was, we first need to do a WAKEUP_DONE on it
1751                  * since we already set m->busy = TRUE, before
1752                  * going off to reactivate it
1753                  *
1754                  * if we don't need the pmap_disconnect, then
1755                  * m->dirty is up to date courtesy of the
1756                  * earlier check for m->reference... if
1757                  * we get here, then m->reference had to be
1758                  * FALSE which means we did a pmap_get_refmod
1759                  * and updated both m->reference and m->dirty...
1760                  */
1761                 if (m->no_isync == FALSE) {
1762                         refmod_state = pmap_disconnect(m->phys_page);
1763
1764                         if (refmod_state & VM_MEM_MODIFIED)
1765                                 m->dirty = TRUE;
1766                         if (refmod_state & VM_MEM_REFERENCED) {
1767                                 m->reference = TRUE;
1768
1769                                 PAGE_WAKEUP_DONE(m);
1770                                 goto was_referenced;
1771                         }
1772                 }
1773                 /*
1774                  *      If it's clean and not precious, we can free the page.
1775                  */
1776                 if (!m->dirty && !m->precious) {
1777                         vm_pageout_inactive_clean++;
1778                         goto reclaim_page;
1779                 }
1780                 vm_pageout_cluster(m);
1781
1782                 vm_pageout_inactive_dirty++;
1783
1784                 inactive_burst_count = 0;
1785
1786 done_with_inactivepage:
1787                 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1788
1789                         if (object != NULL) {
1790                                 vm_object_unlock(object);
1791                                 object = NULL;
1792                         }
1793                         if (local_freeq) {
1794                                 vm_page_free_list(local_freeq);
1795
1796                                 local_freeq = 0;
1797                                 local_freed = 0;
1798                         }
1799                         delayed_unlock = 0;
1800                         vm_page_unlock_queues();
1801                         mutex_pause();
1802                 }
1803                 /*
1804                  * back to top of pageout scan loop
1805                  */
1806         }
1807 }
1808
1809
1810 int vm_page_free_count_init;
1811
1812 void
1813 vm_page_free_reserve(
1814         int pages)
1815 {
1816         int             free_after_reserve;
1817
1818         vm_page_free_reserved += pages;
1819
1820         free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1821
1822         vm_page_free_min = vm_page_free_reserved +
1823                 VM_PAGE_FREE_MIN(free_after_reserve);
1824
1825         vm_page_free_target = vm_page_free_reserved +
1826                 VM_PAGE_FREE_TARGET(free_after_reserve);
1827
1828         if (vm_page_free_target < vm_page_free_min + 5)
1829                 vm_page_free_target = vm_page_free_min + 5;
1830 }
1831
1832 /*
1833  *      vm_pageout is the high level pageout daemon.
1834  */
1835
1836 void
1837 vm_pageout_continue(void)
1838 {
1839         vm_pageout_scan_event_counter++;
1840         vm_pageout_scan();
1841         /* we hold vm_page_queue_free_lock now */
1842         assert(vm_page_free_wanted == 0);
1843         assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1844         mutex_unlock(&vm_page_queue_free_lock);
1845
1846         counter(c_vm_pageout_block++);
1847         thread_block((thread_continue_t)vm_pageout_continue);
1848         /*NOTREACHED*/
1849 }
1850
1851
1852 /*
1853  * must be called with the
1854  * queues and object locks held
1855  */
1856 static void
1857 vm_pageout_queue_steal(vm_page_t m)
1858 {
1859         struct vm_pageout_queue *q;
1860
1861         if (m->object->internal == TRUE)
1862                 q = &vm_pageout_queue_internal;
1863         else
1864                 q = &vm_pageout_queue_external;
1865
1866         m->laundry = FALSE;
1867         m->pageout_queue = FALSE;
1868         queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
1869
1870         m->pageq.next = NULL;
1871         m->pageq.prev = NULL;
1872
1873         vm_object_paging_end(m->object);
1874
1875         q->pgo_laundry--;
1876 }
1877
1878
1879 #ifdef FAKE_DEADLOCK
1880
1881 #define FAKE_COUNT      5000
1882
1883 int internal_count = 0;
1884 int fake_deadlock = 0;
1885
1886 #endif
1887
1888 static void
1889 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
1890 {
1891         vm_page_t       m = NULL;
1892         vm_object_t     object;
1893         boolean_t       need_wakeup;
1894
1895         vm_page_lock_queues();
1896
1897         while ( !queue_empty(&q->pgo_pending) ) {
1898
1899                    q->pgo_busy = TRUE;
1900                    queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
1901                    m->pageout_queue = FALSE;
1902                    vm_page_unlock_queues();
1903
1904                    m->pageq.next = NULL;
1905                    m->pageq.prev = NULL;
1906 #ifdef FAKE_DEADLOCK
1907                    if (q == &vm_pageout_queue_internal) {
1908                            vm_offset_t addr;
1909                            int  pg_count;
1910
1911                            internal_count++;
1912
1913                            if ((internal_count == FAKE_COUNT)) {
1914
1915                                    pg_count = vm_page_free_count + vm_page_free_reserved;
1916
1917                                    if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
1918                                            kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
1919                                    }
1920                                    internal_count = 0;
1921                                    fake_deadlock++;
1922                            }
1923                    }
1924 #endif
1925                    object = m->object;
1926
1927                    if (!object->pager_initialized) {
1928                            vm_object_lock(object);
1929
1930                            /*
1931                             *   If there is no memory object for the page, create
1932                             *   one and hand it to the default pager.
1933                             */
1934
1935                            if (!object->pager_initialized)
1936                                    vm_object_collapse(object, (vm_object_offset_t)0);
1937                            if (!object->pager_initialized)
1938                                    vm_object_pager_create(object);
1939                            if (!object->pager_initialized) {
1940                                    /*
1941                                     *   Still no pager for the object.
1942                                     *   Reactivate the page.
1943                                     *
1944                                     *   Should only happen if there is no
1945                                     *   default pager.
1946                                     */
1947                                    m->list_req_pending = FALSE;
1948                                    m->cleaning = FALSE;
1949                                    m->pageout = FALSE;
1950                                    vm_page_unwire(m);
1951
1952                                    vm_pageout_throttle_up(m);
1953
1954                                    vm_page_lock_queues();
1955                                    vm_pageout_dirty_no_pager++;
1956                                    vm_page_activate(m);
1957                                    vm_page_unlock_queues();
1958
1959                                    /*
1960                                     *   And we are done with it.
1961                                     */
1962                                    PAGE_WAKEUP_DONE(m);
1963
1964                                    vm_object_paging_end(object);
1965                                    vm_object_unlock(object);
1966
1967                                    vm_page_lock_queues();
1968                                    continue;
1969                            } else if (object->pager == MEMORY_OBJECT_NULL) {
1970                                    /*
1971                                     * This pager has been destroyed by either
1972                                     * memory_object_destroy or vm_object_destroy, and
1973                                     * so there is nowhere for the page to go.
1974                                     * Just free the page... VM_PAGE_FREE takes
1975                                     * care of cleaning up all the state...
1976                                     * including doing the vm_pageout_throttle_up
1977                                     */
1978                                    VM_PAGE_FREE(m);
1979
1980                                    vm_object_paging_end(object);
1981                                    vm_object_unlock(object);
1982
1983                                    vm_page_lock_queues();
1984                                    continue;
1985                            }
1986                            vm_object_unlock(object);
1987                    }
1988                    /*
1989                     * we expect the paging_in_progress reference to have
1990                     * already been taken on the object before it was added
1991                     * to the appropriate pageout I/O queue... this will
1992                     * keep the object from being terminated and/or the
1993                     * paging_offset from changing until the I/O has
1994                     * completed... therefore no need to lock the object to
1995                     * pull the paging_offset from it.
1996                     *
1997                     * Send the data to the pager.
1998                     * any pageout clustering happens there
1999                     */
2000                    memory_object_data_return(object->pager,
2001                                              m->offset + object->paging_offset,
2002                                              PAGE_SIZE,
2003                                              NULL,
2004                                              NULL,
2005                                              FALSE,
2006                                              FALSE,
2007                                              0);
2008
2009                    vm_object_lock(object);
2010                    vm_object_paging_end(object);
2011                    vm_object_unlock(object);
2012
2013                    vm_page_lock_queues();
2014         }
2015         assert_wait((event_t) q, THREAD_UNINT);
2016
2017
2018         if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2019                 q->pgo_throttled = FALSE;
2020                 need_wakeup = TRUE;
2021         } else
2022                 need_wakeup = FALSE;
2023
2024         q->pgo_busy = FALSE;
2025         q->pgo_idle = TRUE;
2026         vm_page_unlock_queues();
2027
2028         if (need_wakeup == TRUE)
2029                 thread_wakeup((event_t) &q->pgo_laundry);
2030
2031         thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2032         /*NOTREACHED*/
2033 }
2034
2035
2036 static void
2037 vm_pageout_iothread_external(void)
2038 {
2039
2040         vm_pageout_iothread_continue(&vm_pageout_queue_external);
2041         /*NOTREACHED*/
2042 }
2043
2044
2045 static void
2046 vm_pageout_iothread_internal(void)
2047 {
2048         thread_t        self = current_thread();
2049
2050         self->options |= TH_OPT_VMPRIV;
2051
2052         vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2053         /*NOTREACHED*/
2054 }
2055
2056 static void
2057 vm_pageout_garbage_collect(int collect)
2058 {
2059         if (collect) {
2060                 stack_collect();
2061
2062                 /*
2063                  * consider_zone_gc should be last, because the other operations
2064                  * might return memory to zones.
2065                  */
2066                 consider_machine_collect();
2067                 consider_zone_gc();
2068
2069                 consider_machine_adjust();
2070         }
2071
2072         assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2073
2074         thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2075         /*NOTREACHED*/
2076 }
2077
2078
2079
2080 void
2081 vm_pageout(void)
2082 {
2083         thread_t        self = current_thread();
2084         thread_t        thread;
2085         kern_return_t   result;
2086         spl_t           s;
2087
2088         /*
2089          * Set thread privileges.
2090          */
2091         s = splsched();
2092         thread_lock(self);
2093         self->priority = BASEPRI_PREEMPT - 1;
2094         set_sched_pri(self, self->priority);
2095         thread_unlock(self);
2096         splx(s);
2097
2098         /*
2099          *      Initialize some paging parameters.
2100          */
2101
2102         if (vm_pageout_idle_wait == 0)
2103                 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2104
2105         if (vm_pageout_burst_wait == 0)
2106                 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2107
2108         if (vm_pageout_empty_wait == 0)
2109                 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2110
2111         if (vm_pageout_deadlock_wait == 0)
2112                 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2113
2114         if (vm_pageout_deadlock_relief == 0)
2115                 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2116
2117         if (vm_pageout_inactive_relief == 0)
2118                 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2119
2120         if (vm_pageout_burst_active_throttle == 0)
2121                 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2122
2123         if (vm_pageout_burst_inactive_throttle == 0)
2124                 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2125
2126         /*
2127          * Set kernel task to low backing store privileged
2128          * status
2129          */
2130         task_lock(kernel_task);
2131         kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2132         task_unlock(kernel_task);
2133
2134         vm_page_free_count_init = vm_page_free_count;
2135         vm_zf_iterator = 0;
2136         /*
2137          * even if we've already called vm_page_free_reserve
2138          * call it again here to insure that the targets are
2139          * accurately calculated (it uses vm_page_free_count_init)
2140          * calling it with an arg of 0 will not change the reserve
2141          * but will re-calculate free_min and free_target
2142          */
2143         if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2144                 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
2145         } else
2146                 vm_page_free_reserve(0);
2147
2148
2149         queue_init(&vm_pageout_queue_external.pgo_pending);
2150         vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2151         vm_pageout_queue_external.pgo_laundry = 0;
2152         vm_pageout_queue_external.pgo_idle = FALSE;
2153         vm_pageout_queue_external.pgo_busy = FALSE;
2154         vm_pageout_queue_external.pgo_throttled = FALSE;
2155
2156         queue_init(&vm_pageout_queue_internal.pgo_pending);
2157         vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2158         vm_pageout_queue_internal.pgo_laundry = 0;
2159         vm_pageout_queue_internal.pgo_idle = FALSE;
2160         vm_pageout_queue_internal.pgo_busy = FALSE;
2161         vm_pageout_queue_internal.pgo_throttled = FALSE;
2162
2163
2164         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &thread);
2165         if (result != KERN_SUCCESS)
2166                 panic("vm_pageout_iothread_internal: create failed");
2167
2168         thread_deallocate(thread);
2169
2170
2171         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, BASEPRI_PREEMPT - 1, &thread);
2172         if (result != KERN_SUCCESS)
2173                 panic("vm_pageout_iothread_external: create failed");
2174
2175         thread_deallocate(thread);
2176
2177
2178         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, BASEPRI_PREEMPT - 2, &thread);
2179         if (result != KERN_SUCCESS)
2180                 panic("vm_pageout_garbage_collect: create failed");
2181
2182         thread_deallocate(thread);
2183
2184
2185         vm_pageout_continue();
2186         /*NOTREACHED*/
2187 }
2188
2189
2190 static upl_t
2191 upl_create(
2192         int                flags,
2193         upl_size_t       size)
2194 {
2195         upl_t   upl;
2196         int     page_field_size;  /* bit field in word size buf */
2197
2198         page_field_size = 0;
2199         if (flags & UPL_CREATE_LITE) {
2200                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2201                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2202         }
2203         if(flags & UPL_CREATE_INTERNAL) {
2204                 upl = (upl_t)kalloc(sizeof(struct upl)
2205                         + (sizeof(struct upl_page_info)*(size/PAGE_SIZE))
2206                         + page_field_size);
2207         } else {
2208                 upl = (upl_t)kalloc(sizeof(struct upl) + page_field_size);
2209         }
2210         upl->flags = 0;
2211         upl->src_object = NULL;
2212         upl->kaddr = (vm_offset_t)0;
2213         upl->size = 0;
2214         upl->map_object = NULL;
2215         upl->ref_count = 1;
2216         upl_lock_init(upl);
2217 #ifdef UPL_DEBUG
2218         upl->ubc_alias1 = 0;
2219         upl->ubc_alias2 = 0;
2220 #endif /* UPL_DEBUG */
2221         return(upl);
2222 }
2223
2224 static void
2225 upl_destroy(
2226         upl_t   upl)
2227 {
2228         int     page_field_size;  /* bit field in word size buf */
2229
2230 #ifdef UPL_DEBUG
2231         {
2232                 upl_t   upl_ele;
2233                 vm_object_t     object;
2234                 if (upl->map_object->pageout) {
2235                         object = upl->map_object->shadow;
2236                 } else {
2237                         object = upl->map_object;
2238                 }
2239                 vm_object_lock(object);
2240                 queue_iterate(&object->uplq, upl_ele, upl_t, uplq) {
2241                         if(upl_ele == upl) {
2242                                 queue_remove(&object->uplq,
2243                                                 upl_ele, upl_t, uplq);
2244                                 break;
2245                         }
2246                 }
2247                 vm_object_unlock(object);
2248         }
2249 #endif /* UPL_DEBUG */
2250         /* drop a reference on the map_object whether or */
2251         /* not a pageout object is inserted */
2252         if(upl->map_object->pageout)
2253                 vm_object_deallocate(upl->map_object);
2254
2255         page_field_size = 0;
2256         if (upl->flags & UPL_LITE) {
2257                 page_field_size = ((upl->size/PAGE_SIZE) + 7) >> 3;
2258                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2259         }
2260         if(upl->flags & UPL_INTERNAL) {
2261                 kfree(upl,
2262                       sizeof(struct upl) +
2263                       (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
2264                       + page_field_size);
2265         } else {
2266                 kfree(upl, sizeof(struct upl) + page_field_size);
2267         }
2268 }
2269
2270 void uc_upl_dealloc(upl_t upl);
2271 __private_extern__ void
2272 uc_upl_dealloc(
2273         upl_t   upl)
2274 {
2275         upl->ref_count -= 1;
2276         if(upl->ref_count == 0) {
2277                 upl_destroy(upl);
2278         }
2279 }
2280
2281 void
2282 upl_deallocate(
2283         upl_t   upl)
2284 {
2285
2286         upl->ref_count -= 1;
2287         if(upl->ref_count == 0) {
2288                 upl_destroy(upl);
2289         }
2290 }
2291
2292 /*
2293  * Statistics about UPL enforcement of copy-on-write obligations.
2294  */
2295 unsigned long upl_cow = 0;
2296 unsigned long upl_cow_again = 0;
2297 unsigned long upl_cow_contiguous = 0;
2298 unsigned long upl_cow_pages = 0;
2299 unsigned long upl_cow_again_pages = 0;
2300 unsigned long upl_cow_contiguous_pages = 0;
2301
2302 /*
2303  *      Routine:        vm_object_upl_request
2304  *      Purpose:
2305  *              Cause the population of a portion of a vm_object.
2306  *              Depending on the nature of the request, the pages
2307  *              returned may be contain valid data or be uninitialized.
2308  *              A page list structure, listing the physical pages
2309  *              will be returned upon request.
2310  *              This function is called by the file system or any other
2311  *              supplier of backing store to a pager.
2312  *              IMPORTANT NOTE: The caller must still respect the relationship
2313  *              between the vm_object and its backing memory object.  The
2314  *              caller MUST NOT substitute changes in the backing file
2315  *              without first doing a memory_object_lock_request on the
2316  *              target range unless it is know that the pages are not
2317  *              shared with another entity at the pager level.
2318  *              Copy_in_to:
2319  *                      if a page list structure is present
2320  *                      return the mapped physical pages, where a
2321  *                      page is not present, return a non-initialized
2322  *                      one.  If the no_sync bit is turned on, don't
2323  *                      call the pager unlock to synchronize with other
2324  *                      possible copies of the page. Leave pages busy
2325  *                      in the original object, if a page list structure
2326  *                      was specified.  When a commit of the page list
2327  *                      pages is done, the dirty bit will be set for each one.
2328  *              Copy_out_from:
2329  *                      If a page list structure is present, return
2330  *                      all mapped pages.  Where a page does not exist
2331  *                      map a zero filled one. Leave pages busy in
2332  *                      the original object.  If a page list structure
2333  *                      is not specified, this call is a no-op.
2334  *
2335  *              Note:  access of default pager objects has a rather interesting
2336  *              twist.  The caller of this routine, presumably the file system
2337  *              page cache handling code, will never actually make a request
2338  *              against a default pager backed object.  Only the default
2339  *              pager will make requests on backing store related vm_objects
2340  *              In this way the default pager can maintain the relationship
2341  *              between backing store files (abstract memory objects) and
2342  *              the vm_objects (cache objects), they support.
2343  *
2344  */
2345
2346 __private_extern__ kern_return_t
2347 vm_object_upl_request(
2348         vm_object_t             object,
2349         vm_object_offset_t      offset,
2350         upl_size_t              size,
2351         upl_t                   *upl_ptr,
2352         upl_page_info_array_t   user_page_list,
2353         unsigned int            *page_list_count,
2354         int                     cntrl_flags)
2355 {
2356         vm_page_t               dst_page = VM_PAGE_NULL;
2357         vm_object_offset_t      dst_offset = offset;
2358         upl_size_t              xfer_size = size;
2359         boolean_t               do_m_lock = FALSE;
2360         boolean_t               dirty;
2361         boolean_t               hw_dirty;
2362         upl_t                   upl = NULL;
2363         unsigned int            entry;
2364 #if MACH_CLUSTER_STATS
2365         boolean_t               encountered_lrp = FALSE;
2366 #endif
2367         vm_page_t               alias_page = NULL;
2368         int                     page_ticket;
2369         int                     refmod_state;
2370         wpl_array_t             lite_list = NULL;
2371         vm_object_t             last_copy_object;
2372
2373
2374         if (cntrl_flags & ~UPL_VALID_FLAGS) {
2375                 /*
2376                  * For forward compatibility's sake,
2377                  * reject any unknown flag.
2378                  */
2379                 return KERN_INVALID_VALUE;
2380         }
2381
2382         page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2383                                         >> UPL_PAGE_TICKET_SHIFT;
2384
2385         if(((size/PAGE_SIZE) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2386                 size = MAX_UPL_TRANSFER * PAGE_SIZE;
2387         }
2388
2389         if(cntrl_flags & UPL_SET_INTERNAL)
2390                 if(page_list_count != NULL)
2391                         *page_list_count = MAX_UPL_TRANSFER;
2392
2393         if((!object->internal) && (object->paging_offset != 0))
2394                 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
2395
2396         if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2397                 return KERN_SUCCESS;
2398         }
2399
2400         vm_object_lock(object);
2401         vm_object_paging_begin(object);
2402         vm_object_unlock(object);
2403
2404         if(upl_ptr) {
2405                 if(cntrl_flags & UPL_SET_INTERNAL) {
2406                         if(cntrl_flags & UPL_SET_LITE) {
2407                                 uintptr_t page_field_size;
2408                                 upl = upl_create(
2409                                         UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
2410                                         size);
2411                                 user_page_list = (upl_page_info_t *)
2412                                    (((uintptr_t)upl) + sizeof(struct upl));
2413                                 lite_list = (wpl_array_t)
2414                                         (((uintptr_t)user_page_list) +
2415                                         ((size/PAGE_SIZE) *
2416                                                 sizeof(upl_page_info_t)));
2417                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2418                                 page_field_size =
2419                                         (page_field_size + 3) & 0xFFFFFFFC;
2420                                 bzero((char *)lite_list, page_field_size);
2421                                 upl->flags =
2422                                         UPL_LITE | UPL_INTERNAL;
2423                         } else {
2424                                 upl = upl_create(UPL_CREATE_INTERNAL, size);
2425                                 user_page_list = (upl_page_info_t *)
2426                                         (((uintptr_t)upl) + sizeof(struct upl));
2427                                 upl->flags = UPL_INTERNAL;
2428                         }
2429                 } else {
2430                         if(cntrl_flags & UPL_SET_LITE) {
2431                                 uintptr_t page_field_size;
2432                                 upl = upl_create(UPL_CREATE_LITE, size);
2433                                 lite_list = (wpl_array_t)
2434                                    (((uintptr_t)upl) + sizeof(struct upl));
2435                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2436                                 page_field_size =
2437                                         (page_field_size + 3) & 0xFFFFFFFC;
2438                                 bzero((char *)lite_list, page_field_size);
2439                                 upl->flags = UPL_LITE;
2440                         } else {
2441                                 upl = upl_create(UPL_CREATE_EXTERNAL, size);
2442                                 upl->flags = 0;
2443                         }
2444                 }
2445
2446                 if (object->phys_contiguous) {
2447                         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2448                             object->copy != VM_OBJECT_NULL) {
2449                                 /* Honor copy-on-write obligations */
2450
2451                                 /*
2452                                  * XXX FBDP
2453                                  * We could still have a race...
2454                                  * A is here building the UPL for a write().
2455                                  * A pushes the pages to the current copy
2456                                  * object.
2457                                  * A returns the UPL to the caller.
2458                                  * B comes along and establishes another
2459                                  * private mapping on this object, inserting
2460                                  * a new copy object between the original
2461                                  * object and the old copy object.
2462                                  * B reads a page and gets the original contents
2463                                  * from the original object.
2464                                  * A modifies the page in the original object.
2465                                  * B reads the page again and sees A's changes,
2466                                  * which is wrong...
2467                                  *
2468                                  * The problem is that the pages are not
2469                                  * marked "busy" in the original object, so
2470                                  * nothing prevents B from reading it before
2471                                  * before A's changes are completed.
2472                                  *
2473                                  * The "paging_in_progress" might protect us
2474                                  * from the insertion of a new copy object
2475                                  * though...  To be verified.
2476                                  */
2477                                 vm_object_lock_request(object,
2478                                                        offset,
2479                                                        size,
2480                                                        FALSE,
2481                                                        MEMORY_OBJECT_COPY_SYNC,
2482                                                        VM_PROT_NO_CHANGE);
2483                                 upl_cow_contiguous++;
2484                                 upl_cow_contiguous_pages += size >> PAGE_SHIFT;
2485                         }
2486
2487                         upl->map_object = object;
2488                         /* don't need any shadow mappings for this one */
2489                         /* since it is already I/O memory */
2490                         upl->flags |= UPL_DEVICE_MEMORY;
2491
2492
2493                         /* paging_in_progress protects paging_offset */
2494                         upl->offset = offset + object->paging_offset;
2495                         upl->size = size;
2496                         *upl_ptr = upl;
2497                         if(user_page_list) {
2498                                 user_page_list[0].phys_addr =
2499                                    (offset + object->shadow_offset)>>PAGE_SHIFT;
2500                                 user_page_list[0].device = TRUE;
2501                         }
2502
2503                         if(page_list_count != NULL) {
2504                                 if (upl->flags & UPL_INTERNAL) {
2505                                         *page_list_count = 0;
2506                                 } else {
2507                                         *page_list_count = 1;
2508                                 }
2509                         }
2510
2511                         return KERN_SUCCESS;
2512                 }
2513
2514                 if(user_page_list)
2515                         user_page_list[0].device = FALSE;
2516
2517                 if(cntrl_flags & UPL_SET_LITE) {
2518                         upl->map_object = object;
2519                 } else {
2520                         upl->map_object = vm_object_allocate(size);
2521                         /*
2522                          * No neeed to lock the new object: nobody else knows
2523                          * about it yet, so it's all ours so far.
2524                          */
2525                         upl->map_object->shadow = object;
2526                         upl->map_object->pageout = TRUE;
2527                         upl->map_object->can_persist = FALSE;
2528                         upl->map_object->copy_strategy =
2529                                         MEMORY_OBJECT_COPY_NONE;
2530                         upl->map_object->shadow_offset = offset;
2531                         upl->map_object->wimg_bits = object->wimg_bits;
2532                 }
2533
2534         }
2535         if (!(cntrl_flags & UPL_SET_LITE)) {
2536                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2537         }
2538
2539         /*
2540          * ENCRYPTED SWAP:
2541          * Just mark the UPL as "encrypted" here.
2542          * We'll actually encrypt the pages later,
2543          * in upl_encrypt(), when the caller has
2544          * selected which pages need to go to swap.
2545          */
2546         if (cntrl_flags & UPL_ENCRYPT) {
2547                 upl->flags |= UPL_ENCRYPTED;
2548         }
2549         if (cntrl_flags & UPL_FOR_PAGEOUT) {
2550                 upl->flags |= UPL_PAGEOUT;
2551         }
2552         vm_object_lock(object);
2553
2554         /* we can lock in the paging_offset once paging_in_progress is set */
2555         if(upl_ptr) {
2556                 upl->size = size;
2557                 upl->offset = offset + object->paging_offset;
2558                 *upl_ptr = upl;
2559 #ifdef UPL_DEBUG
2560                 queue_enter(&object->uplq, upl, upl_t, uplq);
2561 #endif /* UPL_DEBUG */
2562         }
2563
2564         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2565             object->copy != VM_OBJECT_NULL) {
2566                 /* Honor copy-on-write obligations */
2567
2568                 /*
2569                  * The caller is gathering these pages and
2570                  * might modify their contents.  We need to
2571                  * make sure that the copy object has its own
2572                  * private copies of these pages before we let
2573                  * the caller modify them.
2574                  */
2575                 vm_object_update(object,
2576                                  offset,
2577                                  size,
2578                                  NULL,
2579                                  NULL,
2580                                  FALSE, /* should_return */
2581                                  MEMORY_OBJECT_COPY_SYNC,
2582                                  VM_PROT_NO_CHANGE);
2583                 upl_cow++;
2584                 upl_cow_pages += size >> PAGE_SHIFT;
2585
2586         }
2587         /* remember which copy object we synchronized with */
2588         last_copy_object = object->copy;
2589
2590         entry = 0;
2591         if(cntrl_flags & UPL_COPYOUT_FROM) {
2592                 upl->flags |= UPL_PAGE_SYNC_DONE;
2593
2594                 while (xfer_size) {
2595                         if((alias_page == NULL) &&
2596                                 !(cntrl_flags & UPL_SET_LITE)) {
2597                                 vm_object_unlock(object);
2598                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2599                                 vm_object_lock(object);
2600                         }
2601                         if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
2602                                 dst_page->fictitious ||
2603                                 dst_page->absent ||
2604                                 dst_page->error ||
2605                                (dst_page->wire_count && !dst_page->pageout) ||
2606
2607                              ((!dst_page->inactive) && (cntrl_flags & UPL_FOR_PAGEOUT) &&
2608                                (dst_page->page_ticket != page_ticket) &&
2609                               ((dst_page->page_ticket+1) != page_ticket)) ) {
2610
2611                                 if (user_page_list)
2612                                         user_page_list[entry].phys_addr = 0;
2613                         } else {
2614                                 /*
2615                                  * grab this up front...
2616                                  * a high percentange of the time we're going to
2617                                  * need the hardware modification state a bit later
2618                                  * anyway... so we can eliminate an extra call into
2619                                  * the pmap layer by grabbing it here and recording it
2620                                  */
2621                                 refmod_state = pmap_get_refmod(dst_page->phys_page);
2622
2623                                 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
2624                                         /*
2625                                          * we're only asking for DIRTY pages to be returned
2626                                          */
2627
2628                                         if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
2629                                                 /*
2630                                                  * if we were the page stolen by vm_pageout_scan to be
2631                                                  * cleaned (as opposed to a buddy being clustered in
2632                                                  * or this request is not being driven by a PAGEOUT cluster
2633                                                  * then we only need to check for the page being diry or
2634                                                  * precious to decide whether to return it
2635                                                  */
2636                                                 if (dst_page->dirty || dst_page->precious ||
2637                                                     (refmod_state & VM_MEM_MODIFIED)) {
2638                                                         goto check_busy;
2639                                                 }
2640                                         }
2641                                         /*
2642                                          * this is a request for a PAGEOUT cluster and this page
2643                                          * is merely along for the ride as a 'buddy'... not only
2644                                          * does it have to be dirty to be returned, but it also
2645                                          * can't have been referenced recently... note that we've
2646                                          * already filtered above based on whether this page is
2647                                          * currently on the inactive queue or it meets the page
2648                                          * ticket (generation count) check
2649                                          */
2650                                         if ( !(refmod_state & VM_MEM_REFERENCED) &&
2651                                              ((refmod_state & VM_MEM_MODIFIED) ||
2652                                               dst_page->dirty || dst_page->precious) ) {
2653                                                 goto check_busy;
2654                                         }
2655                                         /*
2656                                          * if we reach here, we're not to return
2657                                          * the page... go on to the next one
2658                                          */
2659                                         if (user_page_list)
2660                                                 user_page_list[entry].phys_addr = 0;
2661                                         entry++;
2662                                         dst_offset += PAGE_SIZE_64;
2663                                         xfer_size -= PAGE_SIZE;
2664                                         continue;
2665                                 }
2666 check_busy:
2667                                 if(dst_page->busy &&
2668                                         (!(dst_page->list_req_pending &&
2669                                                 dst_page->pageout))) {
2670                                         if(cntrl_flags & UPL_NOBLOCK) {
2671                                                 if(user_page_list) {
2672                                                         user_page_list[entry].phys_addr = 0;
2673                                                 }
2674                                                 entry++;
2675                                                 dst_offset += PAGE_SIZE_64;
2676                                                 xfer_size -= PAGE_SIZE;
2677                                                 continue;
2678                                         }
2679                                         /*
2680                                          * someone else is playing with the
2681                                          * page.  We will have to wait.
2682                                          */
2683                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2684                                         continue;
2685                                 }
2686                                 /* Someone else already cleaning the page? */
2687                                 if((dst_page->cleaning || dst_page->absent ||
2688                                         dst_page->wire_count != 0) &&
2689                                         !dst_page->list_req_pending) {
2690                                    if(user_page_list) {
2691                                            user_page_list[entry].phys_addr = 0;
2692                                    }
2693                                    entry++;
2694                                    dst_offset += PAGE_SIZE_64;
2695                                    xfer_size -= PAGE_SIZE;
2696                                    continue;
2697                                 }
2698                                 /* eliminate all mappings from the */
2699                                 /* original object and its prodigy */
2700
2701                                 vm_page_lock_queues();
2702
2703                                 if (dst_page->pageout_queue == TRUE)
2704                                         /*
2705                                          * we've buddied up a page for a clustered pageout
2706                                          * that has already been moved to the pageout
2707                                          * queue by pageout_scan... we need to remove
2708                                          * it from the queue and drop the laundry count
2709                                          * on that queue
2710                                          */
2711                                         vm_pageout_queue_steal(dst_page);
2712 #if MACH_CLUSTER_STATS
2713                                 /* pageout statistics gathering.  count  */
2714                                 /* all the pages we will page out that   */
2715                                 /* were not counted in the initial       */
2716                                 /* vm_pageout_scan work                  */
2717                                 if(dst_page->list_req_pending)
2718                                         encountered_lrp = TRUE;
2719                                 if((dst_page->dirty ||
2720                                         (dst_page->object->internal &&
2721                                         dst_page->precious)) &&
2722                                         (dst_page->list_req_pending
2723                                         == FALSE)) {
2724                                         if(encountered_lrp) {
2725                                                 CLUSTER_STAT
2726                                                 (pages_at_higher_offsets++;)
2727                                         } else {
2728                                                 CLUSTER_STAT
2729                                                 (pages_at_lower_offsets++;)
2730                                         }
2731                                 }
2732 #endif
2733                                 /* Turn off busy indication on pending */
2734                                 /* pageout.  Note: we can only get here */
2735                                 /* in the request pending case.  */
2736                                 dst_page->list_req_pending = FALSE;
2737                                 dst_page->busy = FALSE;
2738                                 dst_page->cleaning = FALSE;
2739
2740                                 hw_dirty = refmod_state & VM_MEM_MODIFIED;
2741                                 dirty = hw_dirty ? TRUE : dst_page->dirty;
2742
2743                                 if(cntrl_flags & UPL_SET_LITE) {
2744                                         int     pg_num;
2745                                         pg_num = (dst_offset-offset)/PAGE_SIZE;
2746                                         lite_list[pg_num>>5] |=
2747                                                         1 << (pg_num & 31);
2748                                         if (hw_dirty)
2749                                                 pmap_clear_modify(dst_page->phys_page);
2750                                         /*
2751                                          * Record that this page has been
2752                                          * written out
2753                                          */
2754 #if     MACH_PAGEMAP
2755                                         vm_external_state_set(
2756                                                 object->existence_map,
2757                                                 dst_page->offset);
2758 #endif  /*MACH_PAGEMAP*/
2759
2760                                         /*
2761                                          * Mark original page as cleaning
2762                                          * in place.
2763                                          */
2764                                         dst_page->cleaning = TRUE;
2765                                         dst_page->dirty = TRUE;
2766                                         dst_page->precious = FALSE;
2767                                 } else {
2768                                         /* use pageclean setup, it is more */
2769                                         /* convenient even for the pageout */
2770                                         /* cases here */
2771
2772                                         vm_object_lock(upl->map_object);
2773                                         vm_pageclean_setup(dst_page,
2774                                                 alias_page, upl->map_object,
2775                                                 size - xfer_size);
2776                                         vm_object_unlock(upl->map_object);
2777
2778                                         alias_page->absent = FALSE;
2779                                         alias_page = NULL;
2780                                 }
2781
2782                                 if(!dirty) {
2783                                         dst_page->dirty = FALSE;
2784                                         dst_page->precious = TRUE;
2785                                 }
2786
2787                                 if(dst_page->pageout)
2788                                         dst_page->busy = TRUE;
2789
2790                                 if ( (cntrl_flags & UPL_ENCRYPT) ) {
2791                                         /*
2792                                          * ENCRYPTED SWAP:
2793                                          * We want to deny access to the target page
2794                                          * because its contents are about to be
2795                                          * encrypted and the user would be very
2796                                          * confused to see encrypted data instead
2797                                          * of their data.
2798                                          */
2799                                         dst_page->busy = TRUE;
2800                                 }
2801                                 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
2802                                         /*
2803                                          * deny access to the target page
2804                                          * while it is being worked on
2805                                          */
2806                                         if ((!dst_page->pageout) &&
2807                                             (dst_page->wire_count == 0)) {
2808                                                 dst_page->busy = TRUE;
2809                                                 dst_page->pageout = TRUE;
2810                                                 vm_page_wire(dst_page);
2811                                         }
2812                                 }
2813
2814                                 if(user_page_list) {
2815                                         user_page_list[entry].phys_addr
2816                                                 = dst_page->phys_page;
2817                                         user_page_list[entry].dirty =
2818                                                         dst_page->dirty;
2819                                         user_page_list[entry].pageout =
2820                                                         dst_page->pageout;
2821                                         user_page_list[entry].absent =
2822                                                         dst_page->absent;
2823                                         user_page_list[entry].precious =
2824                                                         dst_page->precious;
2825                                 }
2826                                 vm_page_unlock_queues();
2827
2828                                 /*
2829                                  * ENCRYPTED SWAP:
2830                                  * The caller is gathering this page and might
2831                                  * access its contents later on.  Decrypt the
2832                                  * page before adding it to the UPL, so that
2833                                  * the caller never sees encrypted data.
2834                                  */
2835                                 if (! (cntrl_flags & UPL_ENCRYPT) &&
2836                                     dst_page->encrypted) {
2837                                         assert(dst_page->busy);
2838
2839                                         vm_page_decrypt(dst_page, 0);
2840                                         vm_page_decrypt_for_upl_counter++;
2841
2842                                         /*
2843                                          * Retry this page, since anything
2844                                          * could have changed while we were
2845                                          * decrypting.
2846                                          */
2847                                         continue;
2848                                 }
2849                         }
2850                         entry++;
2851                         dst_offset += PAGE_SIZE_64;
2852                         xfer_size -= PAGE_SIZE;
2853                 }
2854         } else {
2855                 while (xfer_size) {
2856                         if((alias_page == NULL) &&
2857                                 !(cntrl_flags & UPL_SET_LITE)) {
2858                                 vm_object_unlock(object);
2859                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2860                                 vm_object_lock(object);
2861                         }
2862
2863                         if ((cntrl_flags & UPL_WILL_MODIFY) &&
2864                             object->copy != last_copy_object) {
2865                                 /* Honor copy-on-write obligations */
2866
2867                                 /*
2868                                  * The copy object has changed since we
2869                                  * last synchronized for copy-on-write.
2870                                  * Another copy object might have been
2871                                  * inserted while we released the object's
2872                                  * lock.  Since someone could have seen the
2873                                  * original contents of the remaining pages
2874                                  * through that new object, we have to
2875                                  * synchronize with it again for the remaining
2876                                  * pages only.  The previous pages are "busy"
2877                                  * so they can not be seen through the new
2878                                  * mapping.  The new mapping will see our
2879                                  * upcoming changes for those previous pages,
2880                                  * but that's OK since they couldn't see what
2881                                  * was there before.  It's just a race anyway
2882                                  * and there's no guarantee of consistency or
2883                                  * atomicity.  We just don't want new mappings
2884                                  * to see both the *before* and *after* pages.
2885                                  */
2886                                 if (object->copy != VM_OBJECT_NULL) {
2887                                         vm_object_update(
2888                                                 object,
2889                                                 dst_offset,/* current offset */
2890                                                 xfer_size, /* remaining size */
2891                                                 NULL,
2892                                                 NULL,
2893                                                 FALSE,     /* should_return */
2894                                                 MEMORY_OBJECT_COPY_SYNC,
2895                                                 VM_PROT_NO_CHANGE);
2896                                         upl_cow_again++;
2897                                         upl_cow_again_pages +=
2898                                                 xfer_size >> PAGE_SHIFT;
2899                                 }
2900                                 /* remember the copy object we synced with */
2901                                 last_copy_object = object->copy;
2902                         }
2903
2904                         dst_page = vm_page_lookup(object, dst_offset);
2905
2906                         if(dst_page != VM_PAGE_NULL) {
2907                                 if((cntrl_flags & UPL_RET_ONLY_ABSENT) &&
2908                                         !((dst_page->list_req_pending)
2909                                                 && (dst_page->absent))) {
2910                                         /* we are doing extended range */
2911                                         /* requests.  we want to grab  */
2912                                         /* pages around some which are */
2913                                         /* already present.  */
2914                                         if(user_page_list) {
2915                                                 user_page_list[entry].phys_addr = 0;
2916                                         }
2917                                         entry++;
2918                                         dst_offset += PAGE_SIZE_64;
2919                                         xfer_size -= PAGE_SIZE;
2920                                         continue;
2921                                 }
2922                                 if((dst_page->cleaning) &&
2923                                    !(dst_page->list_req_pending)) {
2924                                         /*someone else is writing to the */
2925                                         /* page.  We will have to wait.  */
2926                                         PAGE_SLEEP(object,dst_page,THREAD_UNINT);
2927                                         continue;
2928                                 }
2929                                 if ((dst_page->fictitious &&
2930                                      dst_page->list_req_pending)) {
2931                                         /* dump the fictitious page */
2932                                         dst_page->list_req_pending = FALSE;
2933                                         dst_page->clustered = FALSE;
2934
2935                                         vm_page_lock_queues();
2936                                         vm_page_free(dst_page);
2937                                         vm_page_unlock_queues();
2938
2939                                         dst_page = NULL;
2940                                 } else if ((dst_page->absent &&
2941                                             dst_page->list_req_pending)) {
2942                                         /* the default_pager case */
2943                                         dst_page->list_req_pending = FALSE;
2944                                         dst_page->busy = FALSE;
2945                                 }
2946                         }
2947                         if(dst_page == VM_PAGE_NULL) {
2948                                 if(object->private) {
2949                                         /*
2950                                          * This is a nasty wrinkle for users
2951                                          * of upl who encounter device or
2952                                          * private memory however, it is
2953                                          * unavoidable, only a fault can
2954                                          * reslove the actual backing
2955                                          * physical page by asking the
2956                                          * backing device.
2957                                          */
2958                                         if(user_page_list) {
2959                                                 user_page_list[entry].phys_addr = 0;
2960                                         }
2961                                         entry++;
2962                                         dst_offset += PAGE_SIZE_64;
2963                                         xfer_size -= PAGE_SIZE;
2964                                         continue;
2965                                 }
2966                                 /* need to allocate a page */
2967                                 dst_page = vm_page_alloc(object, dst_offset);
2968                                 if (dst_page == VM_PAGE_NULL) {
2969                                         vm_object_unlock(object);
2970                                         VM_PAGE_WAIT();
2971                                         vm_object_lock(object);
2972                                         continue;
2973                                 }
2974                                 dst_page->busy = FALSE;
2975 #if 0
2976                                 if(cntrl_flags & UPL_NO_SYNC) {
2977                                         dst_page->page_lock = 0;
2978                                         dst_page->unlock_request = 0;
2979                                 }
2980 #endif
2981                                 if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
2982                                         /*
2983                                          * if UPL_RET_ONLY_ABSENT was specified,
2984                                          * than we're definitely setting up a
2985                                          * upl for a clustered read/pagein
2986                                          * operation... mark the pages as clustered
2987                                          * so vm_fault can correctly attribute them
2988                                          * to the 'pagein' bucket the first time
2989                                          * a fault happens on them
2990                                          */
2991                                         dst_page->clustered = TRUE;
2992                                 }
2993                                 dst_page->absent = TRUE;
2994                                 object->absent_count++;
2995                         }
2996 #if 1
2997                         if(cntrl_flags & UPL_NO_SYNC) {
2998                                 dst_page->page_lock = 0;
2999                                 dst_page->unlock_request = 0;
3000                         }
3001 #endif /* 1 */
3002
3003                         /*
3004                          * ENCRYPTED SWAP:
3005                          */
3006                         if (cntrl_flags & UPL_ENCRYPT) {
3007                                 /*
3008                                  * The page is going to be encrypted when we
3009                                  * get it from the pager, so mark it so.
3010                                  */
3011                                 dst_page->encrypted = TRUE;
3012                         } else {
3013                                 /*
3014                                  * Otherwise, the page will not contain
3015                                  * encrypted data.
3016                                  */
3017                                 dst_page->encrypted = FALSE;
3018                         }
3019
3020                         dst_page->overwriting = TRUE;
3021                         if(dst_page->fictitious) {
3022                                 panic("need corner case for fictitious page");
3023                         }
3024                         if(dst_page->page_lock) {
3025                                 do_m_lock = TRUE;
3026                         }
3027                         if(upl_ptr) {
3028
3029                                 /* eliminate all mappings from the */
3030                                 /* original object and its prodigy */
3031
3032                                 if(dst_page->busy) {
3033                                         /*someone else is playing with the */
3034                                         /* page.  We will have to wait.    */
3035                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
3036                                         continue;
3037                                 }
3038                                 vm_page_lock_queues();
3039
3040                                 if( !(cntrl_flags & UPL_FILE_IO))
3041                                         hw_dirty = pmap_disconnect(dst_page->phys_page) & VM_MEM_MODIFIED;
3042                                 else
3043                                         hw_dirty = pmap_get_refmod(dst_page->phys_page) & VM_MEM_MODIFIED;
3044                                 dirty = hw_dirty ? TRUE : dst_page->dirty;
3045
3046                                 if(cntrl_flags & UPL_SET_LITE) {
3047                                         int     pg_num;
3048                                         pg_num = (dst_offset-offset)/PAGE_SIZE;
3049                                         lite_list[pg_num>>5] |=
3050                                                         1 << (pg_num & 31);
3051                                         if (hw_dirty)
3052                                                 pmap_clear_modify(dst_page->phys_page);
3053                                         /*
3054                                          * Record that this page has been
3055                                          * written out
3056                                          */
3057 #if     MACH_PAGEMAP
3058                                         vm_external_state_set(
3059                                                 object->existence_map,
3060                                                 dst_page->offset);
3061 #endif  /*MACH_PAGEMAP*/
3062
3063                                         /*
3064                                          * Mark original page as cleaning
3065                                          * in place.
3066                                          */
3067                                         dst_page->cleaning = TRUE;
3068                                         dst_page->dirty = TRUE;
3069                                         dst_page->precious = FALSE;
3070                                 } else {
3071                                         /* use pageclean setup, it is more */
3072                                         /* convenient even for the pageout */
3073                                         /* cases here */
3074                                         vm_object_lock(upl->map_object);
3075                                         vm_pageclean_setup(dst_page,
3076                                                 alias_page, upl->map_object,
3077                                                 size - xfer_size);
3078                                         vm_object_unlock(upl->map_object);
3079
3080                                         alias_page->absent = FALSE;
3081                                         alias_page = NULL;
3082                                 }
3083
3084                                 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
3085                                         /* clean in place for read implies   */
3086                                         /* that a write will be done on all  */
3087                                         /* the pages that are dirty before   */
3088                                         /* a upl commit is done.  The caller */
3089                                         /* is obligated to preserve the      */
3090                                         /* contents of all pages marked      */
3091                                         /* dirty. */
3092                                         upl->flags |= UPL_CLEAR_DIRTY;
3093                                 }
3094
3095                                 if(!dirty) {
3096                                         dst_page->dirty = FALSE;
3097                                         dst_page->precious = TRUE;
3098                                 }
3099
3100                                 if (dst_page->wire_count == 0) {
3101                                    /* deny access to the target page while */
3102                                    /* it is being worked on */
3103                                         dst_page->busy = TRUE;
3104                                 } else {
3105                                         vm_page_wire(dst_page);
3106                                 }
3107                                 if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
3108                                         /*
3109                                          * expect the page not to be used
3110                                          * since it's coming in as part
3111                                          * of a cluster and could be
3112                                          * speculative... pages that
3113                                          * are 'consumed' will get a
3114                                          * hardware reference
3115                                          */
3116                                         dst_page->reference = FALSE;
3117                                 } else {
3118                                         /*
3119                                          * expect the page to be used
3120                                          */
3121                                         dst_page->reference = TRUE;
3122                                 }
3123                                 dst_page->precious =
3124                                         (cntrl_flags & UPL_PRECIOUS)
3125                                                         ? TRUE : FALSE;
3126                                 if(user_page_list) {
3127                                         user_page_list[entry].phys_addr
3128                                                 = dst_page->phys_page;
3129                                         user_page_list[entry].dirty =
3130                                                         dst_page->dirty;
3131                                         user_page_list[entry].pageout =
3132                                                         dst_page->pageout;
3133                                         user_page_list[entry].absent =
3134                                                         dst_page->absent;
3135                                         user_page_list[entry].precious =
3136                                                         dst_page->precious;
3137                                 }
3138                                 vm_page_unlock_queues();
3139                         }
3140                         entry++;
3141                         dst_offset += PAGE_SIZE_64;
3142                         xfer_size -= PAGE_SIZE;
3143                 }
3144         }
3145
3146         if (upl->flags & UPL_INTERNAL) {
3147                 if(page_list_count != NULL)
3148                         *page_list_count = 0;
3149         } else if (*page_list_count > entry) {
3150                 if(page_list_count != NULL)
3151                         *page_list_count = entry;
3152         }
3153
3154         if(alias_page != NULL) {
3155                 vm_page_lock_queues();
3156                 vm_page_free(alias_page);
3157                 vm_page_unlock_queues();
3158         }
3159
3160         if(do_m_lock) {
3161            vm_prot_t    access_required;
3162            /* call back all associated pages from other users of the pager */
3163            /* all future updates will be on data which is based on the     */
3164            /* changes we are going to make here. Note: it is assumed that  */
3165            /* we already hold copies of the data so we will not be seeing  */
3166            /* an avalanche of incoming data from the pager */
3167            access_required = (cntrl_flags & UPL_COPYOUT_FROM)
3168                                         ? VM_PROT_READ : VM_PROT_WRITE;
3169            while (TRUE) {
3170                 kern_return_t   rc;
3171
3172                 if(!object->pager_ready) {
3173                    wait_result_t wait_result;
3174
3175                    wait_result = vm_object_sleep(object,
3176                                                 VM_OBJECT_EVENT_PAGER_READY,
3177                                                 THREAD_UNINT);
3178                    if (wait_result !=  THREAD_AWAKENED) {
3179                         vm_object_unlock(object);
3180                         return KERN_FAILURE;
3181                    }
3182                    continue;
3183                 }
3184
3185                 vm_object_unlock(object);
3186                 rc = memory_object_data_unlock(
3187                         object->pager,
3188                         dst_offset + object->paging_offset,
3189                         size,
3190                         access_required);
3191                 if (rc != KERN_SUCCESS && rc != MACH_SEND_INTERRUPTED)
3192                         return KERN_FAILURE;
3193                 vm_object_lock(object);
3194
3195                 if (rc == KERN_SUCCESS)
3196                         break;
3197            }
3198
3199            /* lets wait on the last page requested */
3200            /* NOTE: we will have to update lock completed routine to signal */
3201            if(dst_page != VM_PAGE_NULL &&
3202                 (access_required & dst_page->page_lock) != access_required) {
3203                 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
3204                 vm_object_unlock(object);
3205                 thread_block(THREAD_CONTINUE_NULL);
3206                 return KERN_SUCCESS;
3207            }
3208         }
3209
3210         vm_object_unlock(object);
3211         return KERN_SUCCESS;
3212 }
3213
3214 /* JMM - Backward compatability for now */
3215 kern_return_t
3216 vm_fault_list_request(                  /* forward */
3217         memory_object_control_t         control,
3218         vm_object_offset_t      offset,
3219         upl_size_t              size,
3220         upl_t                   *upl_ptr,
3221         upl_page_info_t         **user_page_list_ptr,
3222         int                     page_list_count,
3223         int                     cntrl_flags);
3224 kern_return_t
3225 vm_fault_list_request(
3226         memory_object_control_t         control,
3227         vm_object_offset_t      offset,
3228         upl_size_t              size,
3229         upl_t                   *upl_ptr,
3230         upl_page_info_t         **user_page_list_ptr,
3231         int                     page_list_count,
3232         int                     cntrl_flags)
3233 {
3234         int                     local_list_count;
3235         upl_page_info_t         *user_page_list;
3236         kern_return_t           kr;
3237
3238         if (user_page_list_ptr != NULL) {
3239                 local_list_count = page_list_count;
3240                 user_page_list = *user_page_list_ptr;
3241         } else {
3242                 local_list_count = 0;
3243                 user_page_list = NULL;
3244         }
3245         kr =  memory_object_upl_request(control,
3246                                 offset,
3247                                 size,
3248                                 upl_ptr,
3249                                 user_page_list,
3250                                 &local_list_count,
3251                                 cntrl_flags);
3252
3253         if(kr != KERN_SUCCESS)
3254                 return kr;
3255
3256         if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3257                 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3258         }
3259
3260         return KERN_SUCCESS;
3261 }
3262
3263
3264
3265 /*
3266  *      Routine:        vm_object_super_upl_request
3267  *      Purpose:
3268  *              Cause the population of a portion of a vm_object
3269  *              in much the same way as memory_object_upl_request.
3270  *              Depending on the nature of the request, the pages
3271  *              returned may be contain valid data or be uninitialized.
3272  *              However, the region may be expanded up to the super
3273  *              cluster size provided.
3274  */
3275
3276 __private_extern__ kern_return_t
3277 vm_object_super_upl_request(
3278         vm_object_t object,
3279         vm_object_offset_t      offset,
3280         upl_size_t              size,
3281         upl_size_t              super_cluster,
3282         upl_t                   *upl,
3283         upl_page_info_t         *user_page_list,
3284         unsigned int            *page_list_count,
3285         int                     cntrl_flags)
3286 {
3287         vm_page_t       target_page;
3288         int             ticket;
3289
3290
3291         if(object->paging_offset > offset)
3292                 return KERN_FAILURE;
3293
3294         assert(object->paging_in_progress);
3295         offset = offset - object->paging_offset;
3296
3297         if(cntrl_flags & UPL_FOR_PAGEOUT) {
3298
3299                 vm_object_lock(object);
3300
3301                 if((target_page = vm_page_lookup(object, offset))
3302                                                         != VM_PAGE_NULL) {
3303                         ticket = target_page->page_ticket;
3304                         cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
3305                         cntrl_flags = cntrl_flags |
3306                                 ((ticket << UPL_PAGE_TICKET_SHIFT)
3307                                                         & UPL_PAGE_TICKET_MASK);
3308                 }
3309                 vm_object_unlock(object);
3310         }
3311
3312         if (super_cluster > size) {
3313
3314                 vm_object_offset_t      base_offset;
3315                 upl_size_t              super_size;
3316
3317                 base_offset = (offset &
3318                         ~((vm_object_offset_t) super_cluster - 1));
3319                 super_size = (offset+size) > (base_offset + super_cluster) ?
3320                                 super_cluster<<1 : super_cluster;
3321                 super_size = ((base_offset + super_size) > object->size) ?
3322                                 (object->size - base_offset) : super_size;
3323                 if(offset > (base_offset + super_size))
3324                    panic("vm_object_super_upl_request: Missed target pageout"
3325                          " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3326                          offset, base_offset, super_size, super_cluster,
3327                          size, object->paging_offset);
3328                 /*
3329                  * apparently there is a case where the vm requests a
3330                  * page to be written out who's offset is beyond the
3331                  * object size
3332                  */
3333                 if((offset + size) > (base_offset + super_size))
3334                    super_size = (offset + size) - base_offset;
3335
3336                 offset = base_offset;
3337                 size = super_size;
3338         }
3339         return vm_object_upl_request(object, offset, size,
3340                                      upl, user_page_list, page_list_count,
3341                                      cntrl_flags);
3342 }
3343
3344
3345 kern_return_t
3346 vm_map_create_upl(
3347         vm_map_t                map,
3348         vm_map_address_t        offset,
3349         upl_size_t              *upl_size,
3350         upl_t                   *upl,
3351         upl_page_info_array_t   page_list,
3352         unsigned int            *count,
3353         int                     *flags)
3354 {
3355         vm_map_entry_t  entry;
3356         int             caller_flags;
3357         int             force_data_sync;
3358         int             sync_cow_data;
3359         vm_object_t     local_object;
3360         vm_map_offset_t local_offset;
3361         vm_map_offset_t local_start;
3362         kern_return_t   ret;
3363
3364         caller_flags = *flags;
3365
3366         if (caller_flags & ~UPL_VALID_FLAGS) {
3367                 /*
3368                  * For forward compatibility's sake,
3369                  * reject any unknown flag.
3370                  */
3371                 return KERN_INVALID_VALUE;
3372         }
3373
3374         force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3375         sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3376
3377         if(upl == NULL)
3378                 return KERN_INVALID_ARGUMENT;
3379
3380
3381 REDISCOVER_ENTRY:
3382         vm_map_lock(map);
3383         if (vm_map_lookup_entry(map, offset, &entry)) {
3384                 if (entry->object.vm_object == VM_OBJECT_NULL ||
3385                         !entry->object.vm_object->phys_contiguous) {
3386                         if((*upl_size/page_size) > MAX_UPL_TRANSFER) {
3387                                 *upl_size = MAX_UPL_TRANSFER * page_size;
3388                         }
3389                 }
3390                 if((entry->vme_end - offset) < *upl_size) {
3391                         *upl_size = entry->vme_end - offset;
3392                 }
3393                 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3394                         if (entry->object.vm_object == VM_OBJECT_NULL) {
3395                                 *flags = 0;
3396                         } else if (entry->object.vm_object->private) {
3397                                 *flags = UPL_DEV_MEMORY;
3398                                 if (entry->object.vm_object->phys_contiguous) {
3399                                         *flags |= UPL_PHYS_CONTIG;
3400                                 }
3401                         } else  {
3402                                 *flags = 0;
3403                         }
3404                         vm_map_unlock(map);
3405                         return KERN_SUCCESS;
3406                 }
3407                 /*
3408                  *      Create an object if necessary.
3409                  */
3410                 if (entry->object.vm_object == VM_OBJECT_NULL) {
3411                         entry->object.vm_object = vm_object_allocate(
3412                                 (vm_size_t)(entry->vme_end - entry->vme_start));
3413                         entry->offset = 0;
3414                 }
3415                 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3416                         if (!(entry->protection & VM_PROT_WRITE)) {
3417                                 vm_map_unlock(map);
3418                                 return KERN_PROTECTION_FAILURE;
3419                         }
3420                         if (entry->needs_copy)  {
3421                                 vm_map_t                local_map;
3422                                 vm_object_t             object;
3423                                 vm_map_offset_t         offset_hi;
3424                                 vm_map_offset_t         offset_lo;
3425                                 vm_object_offset_t      new_offset;
3426                                 vm_prot_t               prot;
3427                                 boolean_t               wired;
3428                                 vm_behavior_t           behavior;
3429                                 vm_map_version_t        version;
3430                                 vm_map_t                real_map;
3431
3432                                 local_map = map;
3433                                 vm_map_lock_write_to_read(map);
3434                                 if(vm_map_lookup_locked(&local_map,
3435                                         offset, VM_PROT_WRITE,
3436                                         &version, &object,
3437                                         &new_offset, &prot, &wired,
3438                                         &behavior, &offset_lo,
3439                                         &offset_hi, &real_map)) {
3440                                         vm_map_unlock(local_map);
3441                                         return KERN_FAILURE;
3442                                 }
3443                                 if (real_map != map) {
3444                                         vm_map_unlock(real_map);
3445                                 }
3446                                 vm_object_unlock(object);
3447                                 vm_map_unlock(local_map);
3448
3449                                 goto REDISCOVER_ENTRY;
3450                         }
3451                 }
3452                 if (entry->is_sub_map) {
3453                         vm_map_t        submap;
3454
3455                         submap = entry->object.sub_map;
3456                         local_start = entry->vme_start;
3457                         local_offset = entry->offset;
3458                         vm_map_reference(submap);
3459                         vm_map_unlock(map);
3460
3461                         ret = (vm_map_create_upl(submap,
3462                                 local_offset + (offset - local_start),
3463                                 upl_size, upl, page_list, count,
3464                                 flags));
3465
3466                         vm_map_deallocate(submap);
3467                         return ret;
3468                 }
3469
3470                 if (sync_cow_data) {
3471                         if (entry->object.vm_object->shadow
3472                                     || entry->object.vm_object->copy) {
3473
3474                                 local_object = entry->object.vm_object;
3475                                 local_start = entry->vme_start;
3476                                 local_offset = entry->offset;
3477                                 vm_object_reference(local_object);
3478                                 vm_map_unlock(map);
3479
3480                                 if (entry->object.vm_object->shadow &&
3481                                            entry->object.vm_object->copy) {
3482                                    vm_object_lock_request(
3483                                         local_object->shadow,
3484                                         (vm_object_offset_t)
3485                                         ((offset - local_start) +
3486                                          local_offset) +
3487                                         local_object->shadow_offset,
3488                                         *upl_size, FALSE,
3489                                         MEMORY_OBJECT_DATA_SYNC,
3490                                         VM_PROT_NO_CHANGE);
3491                                 }
3492                                 sync_cow_data = FALSE;
3493                                 vm_object_deallocate(local_object);
3494                                 goto REDISCOVER_ENTRY;
3495                         }
3496                 }
3497
3498                 if (force_data_sync) {
3499
3500                         local_object = entry->object.vm_object;
3501                         local_start = entry->vme_start;
3502                         local_offset = entry->offset;
3503                         vm_object_reference(local_object);
3504                         vm_map_unlock(map);
3505
3506                         vm_object_lock_request(
3507                                    local_object,
3508                                    (vm_object_offset_t)
3509                                    ((offset - local_start) + local_offset),
3510                                    (vm_object_size_t)*upl_size, FALSE,
3511                                    MEMORY_OBJECT_DATA_SYNC,
3512                                    VM_PROT_NO_CHANGE);
3513                         force_data_sync = FALSE;
3514                         vm_object_deallocate(local_object);
3515                         goto REDISCOVER_ENTRY;
3516                 }
3517
3518                 if(!(entry->object.vm_object->private)) {
3519                         if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
3520                                 *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
3521                         if(entry->object.vm_object->phys_contiguous) {
3522                                 *flags = UPL_PHYS_CONTIG;
3523                         } else {
3524                                 *flags = 0;
3525                         }
3526                 } else {
3527                         *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
3528                 }
3529                 local_object = entry->object.vm_object;
3530                 local_offset = entry->offset;
3531                 local_start = entry->vme_start;
3532                 vm_object_reference(local_object);
3533                 vm_map_unlock(map);
3534                 if(caller_flags & UPL_SET_IO_WIRE) {
3535                         ret = (vm_object_iopl_request(local_object,
3536                                 (vm_object_offset_t)
3537                                    ((offset - local_start)
3538                                                 + local_offset),
3539                                 *upl_size,
3540                                 upl,
3541                                 page_list,
3542                                 count,
3543                                 caller_flags));
3544                 } else {
3545                         ret = (vm_object_upl_request(local_object,
3546                                 (vm_object_offset_t)
3547                                    ((offset - local_start)
3548                                                 + local_offset),
3549                                 *upl_size,
3550                                 upl,
3551                                 page_list,
3552                                 count,
3553                                 caller_flags));
3554                 }
3555                 vm_object_deallocate(local_object);
3556                 return(ret);
3557         }
3558
3559         vm_map_unlock(map);
3560         return(KERN_FAILURE);
3561
3562 }
3563
3564 /*
3565  * Internal routine to enter a UPL into a VM map.
3566  *
3567  * JMM - This should just be doable through the standard
3568  * vm_map_enter() API.
3569  */
3570 kern_return_t
3571 vm_map_enter_upl(
3572         vm_map_t                map,
3573         upl_t                   upl,
3574         vm_map_offset_t *dst_addr)
3575 {
3576         vm_map_size_t           size;
3577         vm_object_offset_t      offset;
3578         vm_map_offset_t         addr;
3579         vm_page_t               m;
3580         kern_return_t           kr;
3581
3582         if (upl == UPL_NULL)
3583                 return KERN_INVALID_ARGUMENT;
3584
3585         upl_lock(upl);
3586
3587         /* check to see if already mapped */
3588         if(UPL_PAGE_LIST_MAPPED & upl->flags) {
3589                 upl_unlock(upl);
3590                 return KERN_FAILURE;
3591         }
3592
3593         if((!(upl->map_object->pageout)) &&
3594                 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3595                                         (upl->map_object->phys_contiguous))) {
3596                 vm_object_t             object;
3597                 vm_page_t               alias_page;
3598                 vm_object_offset_t      new_offset;
3599                 int                     pg_num;
3600                 wpl_array_t             lite_list;
3601
3602                 if(upl->flags & UPL_INTERNAL) {
3603                         lite_list = (wpl_array_t)
3604                                 ((((uintptr_t)upl) + sizeof(struct upl))
3605                                 + ((upl->size/PAGE_SIZE)
3606                                                 * sizeof(upl_page_info_t)));
3607                 } else {
3608                         lite_list = (wpl_array_t)
3609                                 (((uintptr_t)upl) + sizeof(struct upl));
3610                 }
3611                 object = upl->map_object;
3612                 upl->map_object = vm_object_allocate(upl->size);
3613                 vm_object_lock(upl->map_object);
3614                 upl->map_object->shadow = object;
3615                 upl->map_object->pageout = TRUE;
3616                 upl->map_object->can_persist = FALSE;
3617                 upl->map_object->copy_strategy =
3618                                 MEMORY_OBJECT_COPY_NONE;
3619                 upl->map_object->shadow_offset =
3620                                 upl->offset - object->paging_offset;
3621                 upl->map_object->wimg_bits = object->wimg_bits;
3622                 offset = upl->map_object->shadow_offset;
3623                 new_offset = 0;
3624                 size = upl->size;
3625
3626                 vm_object_lock(object);
3627
3628                 while(size) {
3629                    pg_num = (new_offset)/PAGE_SIZE;
3630                    if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3631                         vm_object_unlock(object);
3632                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
3633                         vm_object_lock(object);
3634                         m = vm_page_lookup(object, offset);
3635                         if (m == VM_PAGE_NULL) {
3636                                 panic("vm_upl_map: page missing\n");
3637                         }
3638
3639                         vm_object_paging_begin(object);
3640
3641                         /*
3642                         * Convert the fictitious page to a private
3643                          * shadow of the real page.
3644                          */
3645                         assert(alias_page->fictitious);
3646                         alias_page->fictitious = FALSE;
3647                         alias_page->private = TRUE;
3648                         alias_page->pageout = TRUE;
3649                         alias_page->phys_page = m->phys_page;
3650
3651                         vm_page_lock_queues();
3652                         vm_page_wire(alias_page);
3653                         vm_page_unlock_queues();
3654
3655                         /*
3656                          * ENCRYPTED SWAP:
3657                          * The virtual page ("m") has to be wired in some way
3658                          * here or its physical page ("m->phys_page") could
3659                          * be recycled at any time.
3660                          * Assuming this is enforced by the caller, we can't
3661                          * get an encrypted page here.  Since the encryption
3662                          * key depends on the VM page's "pager" object and
3663                          * the "paging_offset", we couldn't handle 2 pageable
3664                          * VM pages (with different pagers and paging_offsets)
3665                          * sharing the same physical page:  we could end up
3666                          * encrypting with one key (via one VM page) and
3667                          * decrypting with another key (via the alias VM page).
3668                          */
3669                         ASSERT_PAGE_DECRYPTED(m);
3670
3671                         vm_page_insert(alias_page,
3672                                         upl->map_object, new_offset);
3673                         assert(!alias_page->wanted);
3674                         alias_page->busy = FALSE;
3675                         alias_page->absent = FALSE;
3676                    }
3677
3678                    size -= PAGE_SIZE;
3679                    offset += PAGE_SIZE_64;
3680                    new_offset += PAGE_SIZE_64;
3681                 }
3682                 vm_object_unlock(object);
3683                 vm_object_unlock(upl->map_object);
3684         }
3685         if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3686                 offset = upl->offset - upl->map_object->paging_offset;
3687         else
3688                 offset = 0;
3689
3690         size = upl->size;
3691
3692         vm_object_lock(upl->map_object);
3693         upl->map_object->ref_count++;
3694         vm_object_res_reference(upl->map_object);
3695         vm_object_unlock(upl->map_object);
3696
3697         *dst_addr = 0;
3698
3699
3700         /* NEED A UPL_MAP ALIAS */
3701         kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3702                 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
3703                 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3704
3705         if (kr != KERN_SUCCESS) {
3706                 upl_unlock(upl);
3707                 return(kr);
3708         }
3709
3710         vm_object_lock(upl->map_object);
3711
3712         for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
3713                 m = vm_page_lookup(upl->map_object, offset);
3714                 if(m) {
3715                    unsigned int cache_attr;
3716                    cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3717
3718                    PMAP_ENTER(map->pmap, addr,
3719                                 m, VM_PROT_ALL,
3720                                 cache_attr, TRUE);
3721                 }
3722                 offset+=PAGE_SIZE_64;
3723         }
3724         vm_object_unlock(upl->map_object);
3725
3726         upl->ref_count++;  /* hold a reference for the mapping */
3727         upl->flags |= UPL_PAGE_LIST_MAPPED;
3728         upl->kaddr = *dst_addr;
3729         upl_unlock(upl);
3730         return KERN_SUCCESS;
3731 }
3732
3733 /*
3734  * Internal routine to remove a UPL mapping from a VM map.
3735  *
3736  * XXX - This should just be doable through a standard
3737  * vm_map_remove() operation.  Otherwise, implicit clean-up
3738  * of the target map won't be able to correctly remove
3739  * these (and release the reference on the UPL).  Having
3740  * to do this means we can't map these into user-space
3741  * maps yet.
3742  */
3743 kern_return_t
3744 vm_map_remove_upl(
3745         vm_map_t        map,
3746         upl_t           upl)
3747 {
3748         vm_address_t    addr;
3749         upl_size_t      size;
3750
3751         if (upl == UPL_NULL)
3752                 return KERN_INVALID_ARGUMENT;
3753
3754         upl_lock(upl);
3755         if(upl->flags & UPL_PAGE_LIST_MAPPED) {
3756                 addr = upl->kaddr;
3757                 size = upl->size;
3758                 assert(upl->ref_count > 1);
3759                 upl->ref_count--;               /* removing mapping ref */
3760                 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3761                 upl->kaddr = (vm_offset_t) 0;
3762                 upl_unlock(upl);
3763
3764                 vm_map_remove(  map,
3765                                 vm_map_trunc_page(addr),
3766                                 vm_map_round_page(addr + size),
3767                                 VM_MAP_NO_FLAGS);
3768                 return KERN_SUCCESS;
3769         }
3770         upl_unlock(upl);
3771         return KERN_FAILURE;
3772 }
3773
3774 kern_return_t
3775 upl_commit_range(
3776         upl_t                   upl,
3777         upl_offset_t            offset,
3778         upl_size_t              size,
3779         int                     flags,
3780         upl_page_info_t         *page_list,
3781         mach_msg_type_number_t  count,
3782         boolean_t               *empty)
3783 {
3784         upl_size_t              xfer_size = size;
3785         vm_object_t             shadow_object;
3786         vm_object_t             object = upl->map_object;
3787         vm_object_offset_t      target_offset;
3788         int                     entry;
3789         wpl_array_t             lite_list;
3790         int                     occupied;
3791         int                     delayed_unlock = 0;
3792         int                     clear_refmod = 0;
3793         boolean_t               shadow_internal;
3794
3795         *empty = FALSE;
3796
3797         if (upl == UPL_NULL)
3798                 return KERN_INVALID_ARGUMENT;
3799
3800
3801         if (count == 0)
3802                 page_list = NULL;
3803
3804         if (object->pageout) {
3805                 shadow_object = object->shadow;
3806         } else {
3807                 shadow_object = object;
3808         }
3809
3810         upl_lock(upl);
3811
3812         if (upl->flags & UPL_ACCESS_BLOCKED) {
3813                 /*
3814                  * We used this UPL to block access to the pages by marking
3815                  * them "busy".  Now we need to clear the "busy" bit to allow
3816                  * access to these pages again.
3817                  */
3818                 flags |= UPL_COMMIT_ALLOW_ACCESS;
3819         }
3820
3821         if (upl->flags & UPL_CLEAR_DIRTY)
3822                 flags |= UPL_COMMIT_CLEAR_DIRTY;
3823
3824         if (upl->flags & UPL_DEVICE_MEMORY) {
3825                 xfer_size = 0;
3826         } else if ((offset + size) > upl->size) {
3827                 upl_unlock(upl);
3828                 return KERN_FAILURE;
3829         }
3830
3831         if (upl->flags & UPL_INTERNAL) {
3832                 lite_list = (wpl_array_t)
3833                         ((((uintptr_t)upl) + sizeof(struct upl))
3834                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3835         } else {
3836                 lite_list = (wpl_array_t)
3837                         (((uintptr_t)upl) + sizeof(struct upl));
3838         }
3839         if (object != shadow_object)
3840                 vm_object_lock(object);
3841         vm_object_lock(shadow_object);
3842
3843         shadow_internal = shadow_object->internal;
3844
3845         entry = offset/PAGE_SIZE;
3846         target_offset = (vm_object_offset_t)offset;
3847
3848         while (xfer_size) {
3849                 vm_page_t       t,m;
3850                 upl_page_info_t *p;
3851
3852                 m = VM_PAGE_NULL;
3853
3854                 if (upl->flags & UPL_LITE) {
3855                         int     pg_num;
3856
3857                         pg_num = target_offset/PAGE_SIZE;
3858
3859                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3860                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3861                                 m = vm_page_lookup(shadow_object,
3862                                                    target_offset + (upl->offset -
3863                                                                     shadow_object->paging_offset));
3864                         }
3865                 }
3866                 if (object->pageout) {
3867                         if ((t = vm_page_lookup(object, target_offset)) != NULL) {
3868                                 t->pageout = FALSE;
3869
3870                                 if (delayed_unlock) {
3871                                         delayed_unlock = 0;
3872                                         vm_page_unlock_queues();
3873                                 }
3874                                 VM_PAGE_FREE(t);
3875
3876                                 if (m == NULL) {
3877                                         m = vm_page_lookup(
3878                                             shadow_object,
3879                                             target_offset +
3880                                                 object->shadow_offset);
3881                                 }
3882                                 if (m != VM_PAGE_NULL)
3883                                         vm_object_paging_end(m->object);
3884                         }
3885                 }
3886                 if (m != VM_PAGE_NULL) {
3887
3888                    clear_refmod = 0;
3889
3890                    if (upl->flags & UPL_IO_WIRE) {
3891
3892                         if (delayed_unlock == 0)
3893                                 vm_page_lock_queues();
3894
3895                         vm_page_unwire(m);
3896
3897                         if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3898                                 delayed_unlock = 0;
3899                                 vm_page_unlock_queues();
3900                         }
3901                         if (page_list) {
3902                                 page_list[entry].phys_addr = 0;
3903                         }
3904                         if (flags & UPL_COMMIT_SET_DIRTY) {
3905                                 m->dirty = TRUE;
3906                         } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3907                                 m->dirty = FALSE;
3908                                 clear_refmod |= VM_MEM_MODIFIED;
3909                         }
3910                         if (flags & UPL_COMMIT_INACTIVATE) {
3911                                 m->reference = FALSE;
3912                                 clear_refmod |= VM_MEM_REFERENCED;
3913                                 vm_page_deactivate(m);
3914                         }
3915                         if (clear_refmod)
3916                                 pmap_clear_refmod(m->phys_page, clear_refmod);
3917
3918                         if (flags & UPL_COMMIT_ALLOW_ACCESS) {
3919                                 /*
3920                                  * We blocked access to the pages in this UPL.
3921                                  * Clear the "busy" bit and wake up any waiter
3922                                  * for this page.
3923                                  */
3924                                 PAGE_WAKEUP_DONE(m);
3925                         }
3926
3927                         target_offset += PAGE_SIZE_64;
3928                         xfer_size -= PAGE_SIZE;
3929                         entry++;
3930                         continue;
3931                    }
3932                    if (delayed_unlock == 0)
3933                         vm_page_lock_queues();
3934                    /*
3935                     * make sure to clear the hardware
3936                     * modify or reference bits before
3937                     * releasing the BUSY bit on this page
3938                     * otherwise we risk losing a legitimate
3939                     * change of state
3940                     */
3941                    if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3942                         m->dirty = FALSE;
3943                         clear_refmod |= VM_MEM_MODIFIED;
3944                    }
3945                    if (flags & UPL_COMMIT_INACTIVATE)
3946                         clear_refmod |= VM_MEM_REFERENCED;
3947
3948                    if (clear_refmod)
3949                         pmap_clear_refmod(m->phys_page, clear_refmod);
3950
3951                    if (page_list) {
3952                         p = &(page_list[entry]);
3953                         if(p->phys_addr && p->pageout && !m->pageout) {
3954                                 m->busy = TRUE;
3955                                 m->pageout = TRUE;
3956                                 vm_page_wire(m);
3957                         } else if (page_list[entry].phys_addr &&
3958                                         !p->pageout && m->pageout &&
3959                                         !m->dump_cleaning) {
3960                                 m->pageout = FALSE;
3961                                 m->absent = FALSE;
3962                                 m->overwriting = FALSE;
3963                                 vm_page_unwire(m);
3964                                 PAGE_WAKEUP_DONE(m);
3965                         }
3966                         page_list[entry].phys_addr = 0;
3967                    }
3968                    m->dump_cleaning = FALSE;
3969                    if(m->laundry) {
3970                            vm_pageout_throttle_up(m);
3971                    }
3972                    if(m->pageout) {
3973                       m->cleaning = FALSE;
3974                       m->pageout = FALSE;
3975 #if MACH_CLUSTER_STATS
3976                       if (m->wanted) vm_pageout_target_collisions++;
3977 #endif
3978                       if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
3979                               m->dirty = TRUE;
3980                       else
3981                               m->dirty = FALSE;
3982
3983                       if(m->dirty) {
3984                               vm_page_unwire(m);/* reactivates */
3985
3986                               if (upl->flags & UPL_PAGEOUT) {
3987                                       CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
3988                                       VM_STAT(reactivations++);
3989                               }
3990                               PAGE_WAKEUP_DONE(m);
3991                       } else {
3992                             vm_page_free(m);/* clears busy, etc. */
3993
3994                             if (upl->flags & UPL_PAGEOUT) {
3995                                     CLUSTER_STAT(vm_pageout_target_page_freed++;)
3996
3997                                     if (page_list[entry].dirty)
3998                                             VM_STAT(pageouts++);
3999                             }
4000                       }
4001                       if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4002                             delayed_unlock = 0;
4003                             vm_page_unlock_queues();
4004                       }
4005                       target_offset += PAGE_SIZE_64;
4006                       xfer_size -= PAGE_SIZE;
4007                       entry++;
4008                       continue;
4009                    }
4010 #if MACH_CLUSTER_STATS
4011                    m->dirty = pmap_is_modified(m->phys_page);
4012
4013                    if (m->dirty)   vm_pageout_cluster_dirtied++;
4014                    else            vm_pageout_cluster_cleaned++;
4015                    if (m->wanted)  vm_pageout_cluster_collisions++;
4016 #else
4017                    m->dirty = 0;
4018 #endif
4019
4020                    if((m->busy) && (m->cleaning)) {
4021                         /* the request_page_list case */
4022                         if(m->absent) {
4023                                 m->absent = FALSE;
4024                                 if(shadow_object->absent_count == 1)
4025                                       vm_object_absent_release(shadow_object);
4026                                 else
4027                                       shadow_object->absent_count--;
4028                         }
4029                         m->overwriting = FALSE;
4030                         m->busy = FALSE;
4031                         m->dirty = FALSE;
4032                    } else if (m->overwriting) {
4033                          /* alternate request page list, write to
4034                           * page_list case.  Occurs when the original
4035                           * page was wired at the time of the list
4036                           * request */
4037                          assert(m->wire_count != 0);
4038                          vm_page_unwire(m);/* reactivates */
4039                          m->overwriting = FALSE;
4040                    }
4041                    m->cleaning = FALSE;
4042
4043                    /* It is a part of the semantic of COPYOUT_FROM */
4044                    /* UPLs that a commit implies cache sync           */
4045                    /* between the vm page and the backing store    */
4046                    /* this can be used to strip the precious bit   */
4047                    /* as well as clean */
4048                    if (upl->flags & UPL_PAGE_SYNC_DONE)
4049                          m->precious = FALSE;
4050
4051                    if (flags & UPL_COMMIT_SET_DIRTY)
4052                         m->dirty = TRUE;
4053
4054                    if (flags & UPL_COMMIT_INACTIVATE) {
4055                         m->reference = FALSE;
4056                         vm_page_deactivate(m);
4057                    } else if (!m->active && !m->inactive) {
4058                         if (m->reference)
4059                                 vm_page_activate(m);
4060                         else
4061                                 vm_page_deactivate(m);
4062                    }
4063
4064                    if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4065                            /*
4066                             * We blocked access to the pages in this URL.
4067                             * Clear the "busy" bit on this page before we
4068                             * wake up any waiter.
4069                             */
4070                            m->busy = FALSE;
4071                    }
4072
4073                    /*
4074                     * Wakeup any thread waiting for the page to be un-cleaning.
4075                     */
4076                    PAGE_WAKEUP(m);
4077
4078                    if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4079                          delayed_unlock = 0;
4080                          vm_page_unlock_queues();
4081                    }
4082                 }
4083                 target_offset += PAGE_SIZE_64;
4084                 xfer_size -= PAGE_SIZE;
4085                 entry++;
4086         }
4087         if (delayed_unlock)
4088                 vm_page_unlock_queues();
4089
4090         occupied = 1;
4091
4092         if (upl->flags & UPL_DEVICE_MEMORY)  {
4093                 occupied = 0;
4094         } else if (upl->flags & UPL_LITE) {
4095                 int     pg_num;
4096                 int     i;
4097                 pg_num = upl->size/PAGE_SIZE;
4098                 pg_num = (pg_num + 31) >> 5;
4099                 occupied = 0;
4100                 for(i= 0; i<pg_num; i++) {
4101                         if(lite_list[i] != 0) {
4102                                 occupied = 1;
4103                                 break;
4104                         }
4105                 }
4106         } else {
4107                 if(queue_empty(&upl->map_object->memq)) {
4108                         occupied = 0;
4109                 }
4110         }
4111
4112         if(occupied == 0) {
4113                 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
4114                         *empty = TRUE;
4115                 }
4116                 if(object == shadow_object)
4117                         vm_object_paging_end(shadow_object);
4118         }
4119         vm_object_unlock(shadow_object);
4120         if (object != shadow_object)
4121                 vm_object_unlock(object);
4122         upl_unlock(upl);
4123
4124         return KERN_SUCCESS;
4125 }
4126
4127 kern_return_t
4128 upl_abort_range(
4129         upl_t                   upl,
4130         upl_offset_t            offset,
4131         upl_size_t              size,
4132         int                     error,
4133         boolean_t               *empty)
4134 {
4135         upl_size_t              xfer_size = size;
4136         vm_object_t             shadow_object;
4137         vm_object_t             object = upl->map_object;
4138         vm_object_offset_t      target_offset;
4139         int                     entry;
4140         wpl_array_t             lite_list;
4141         int                     occupied;
4142         boolean_t               shadow_internal;
4143
4144         *empty = FALSE;
4145
4146         if (upl == UPL_NULL)
4147                 return KERN_INVALID_ARGUMENT;
4148
4149         if (upl->flags & UPL_IO_WIRE) {
4150                 return upl_commit_range(upl,
4151                         offset, size, 0,
4152                         NULL, 0, empty);
4153         }
4154
4155         if(object->pageout) {
4156                 shadow_object = object->shadow;
4157         } else {
4158                 shadow_object = object;
4159         }
4160
4161         upl_lock(upl);
4162         if(upl->flags & UPL_DEVICE_MEMORY) {
4163                 xfer_size = 0;
4164         } else if ((offset + size) > upl->size) {
4165                 upl_unlock(upl);
4166                 return KERN_FAILURE;
4167         }
4168         if (object != shadow_object)
4169                 vm_object_lock(object);
4170         vm_object_lock(shadow_object);
4171
4172         shadow_internal = shadow_object->internal;
4173
4174         if(upl->flags & UPL_INTERNAL) {
4175                 lite_list = (wpl_array_t)
4176                         ((((uintptr_t)upl) + sizeof(struct upl))
4177                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4178         } else {
4179                 lite_list = (wpl_array_t)
4180                         (((uintptr_t)upl) + sizeof(struct upl));
4181         }
4182
4183         entry = offset/PAGE_SIZE;
4184         target_offset = (vm_object_offset_t)offset;
4185         while(xfer_size) {
4186                 vm_page_t       t,m;
4187
4188                 m = VM_PAGE_NULL;
4189                 if(upl->flags & UPL_LITE) {
4190                         int     pg_num;
4191                         pg_num = target_offset/PAGE_SIZE;
4192                         if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4193                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4194                                 m = vm_page_lookup(shadow_object,
4195                                         target_offset + (upl->offset -
4196                                                 shadow_object->paging_offset));
4197                         }
4198                 }
4199                 if(object->pageout) {
4200                         if ((t = vm_page_lookup(object, target_offset))
4201                                                                 != NULL) {
4202                                 t->pageout = FALSE;
4203                                 VM_PAGE_FREE(t);
4204                                 if(m == NULL) {
4205                                         m = vm_page_lookup(
4206                                             shadow_object,
4207                                             target_offset +
4208                                                 object->shadow_offset);
4209                                 }
4210                                 if(m != VM_PAGE_NULL)
4211                                         vm_object_paging_end(m->object);
4212                         }
4213                 }
4214                 if(m != VM_PAGE_NULL) {
4215                         vm_page_lock_queues();
4216                         if(m->absent) {
4217                                 boolean_t must_free = TRUE;
4218
4219                                 /* COPYOUT = FALSE case */
4220                                 /* check for error conditions which must */
4221                                 /* be passed back to the pages customer  */
4222                                 if(error & UPL_ABORT_RESTART) {
4223                                         m->restart = TRUE;
4224                                         m->absent = FALSE;
4225                                         vm_object_absent_release(m->object);
4226                                         m->page_error = KERN_MEMORY_ERROR;
4227                                         m->error = TRUE;
4228                                         must_free = FALSE;
4229                                 } else if(error & UPL_ABORT_UNAVAILABLE) {
4230                                         m->restart = FALSE;
4231                                         m->unusual = TRUE;
4232                                         must_free = FALSE;
4233                                 } else if(error & UPL_ABORT_ERROR) {
4234                                         m->restart = FALSE;
4235                                         m->absent = FALSE;
4236                                         vm_object_absent_release(m->object);
4237                                         m->page_error = KERN_MEMORY_ERROR;
4238                                         m->error = TRUE;
4239                                         must_free = FALSE;
4240                                 }
4241
4242                                 /*
4243                                  * ENCRYPTED SWAP:
4244                                  * If the page was already encrypted,
4245                                  * we don't really need to decrypt it
4246                                  * now.  It will get decrypted later,
4247                                  * on demand, as soon as someone needs
4248                                  * to access its contents.
4249                                  */
4250
4251                                 m->cleaning = FALSE;
4252                                 m->overwriting = FALSE;
4253                                 PAGE_WAKEUP_DONE(m);
4254
4255                                 if (must_free == TRUE) {
4256                                         vm_page_free(m);
4257                                 } else {
4258                                         vm_page_activate(m);
4259                                 }
4260                                 vm_page_unlock_queues();
4261
4262                                 target_offset += PAGE_SIZE_64;
4263                                 xfer_size -= PAGE_SIZE;
4264                                 entry++;
4265                                 continue;
4266                         }
4267                         /*
4268                         * Handle the trusted pager throttle.
4269                         */
4270                         if (m->laundry) {
4271                                 vm_pageout_throttle_up(m);
4272                         }
4273                         if(m->pageout) {
4274                                 assert(m->busy);
4275                                 assert(m->wire_count == 1);
4276                                 m->pageout = FALSE;
4277                                 vm_page_unwire(m);
4278                         }
4279                         m->dump_cleaning = FALSE;
4280                         m->cleaning = FALSE;
4281                         m->overwriting = FALSE;
4282 #if     MACH_PAGEMAP
4283                         vm_external_state_clr(
4284                                 m->object->existence_map, m->offset);
4285 #endif  /* MACH_PAGEMAP */
4286                         if(error & UPL_ABORT_DUMP_PAGES) {
4287                                 vm_page_free(m);
4288                                 pmap_disconnect(m->phys_page);
4289                         } else {
4290                                 PAGE_WAKEUP_DONE(m);
4291                         }
4292                         vm_page_unlock_queues();
4293                 }
4294                 target_offset += PAGE_SIZE_64;
4295                 xfer_size -= PAGE_SIZE;
4296                 entry++;
4297         }
4298         occupied = 1;
4299         if (upl->flags & UPL_DEVICE_MEMORY)  {
4300                 occupied = 0;
4301         } else if (upl->flags & UPL_LITE) {
4302                 int     pg_num;
4303                 int     i;
4304                 pg_num = upl->size/PAGE_SIZE;
4305                 pg_num = (pg_num + 31) >> 5;
4306                 occupied = 0;
4307                 for(i= 0; i<pg_num; i++) {
4308                         if(lite_list[i] != 0) {
4309                                 occupied = 1;
4310                                 break;
4311                         }
4312                 }
4313         } else {
4314                 if(queue_empty(&upl->map_object->memq)) {
4315                         occupied = 0;
4316                 }
4317         }
4318
4319         if(occupied == 0) {
4320                 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
4321                         *empty = TRUE;
4322                 }
4323                 if(object == shadow_object)
4324                         vm_object_paging_end(shadow_object);
4325         }
4326         vm_object_unlock(shadow_object);
4327         if (object != shadow_object)
4328                 vm_object_unlock(object);
4329
4330         upl_unlock(upl);
4331
4332         return KERN_SUCCESS;
4333 }
4334
4335 kern_return_t
4336 upl_abort(
4337         upl_t   upl,
4338         int     error)
4339 {
4340         vm_object_t             object = NULL;
4341         vm_object_t             shadow_object = NULL;
4342         vm_object_offset_t      offset;
4343         vm_object_offset_t      shadow_offset;
4344         vm_object_offset_t      target_offset;
4345         upl_size_t              i;
4346         wpl_array_t             lite_list;
4347         vm_page_t               t,m;
4348         int                     occupied;
4349         boolean_t               shadow_internal;
4350
4351         if (upl == UPL_NULL)
4352                 return KERN_INVALID_ARGUMENT;
4353
4354         if (upl->flags & UPL_IO_WIRE) {
4355                 boolean_t       empty;
4356                 return upl_commit_range(upl,
4357                         0, upl->size, 0,
4358                         NULL, 0, &empty);
4359         }
4360
4361         upl_lock(upl);
4362         if(upl->flags & UPL_DEVICE_MEMORY) {
4363                 upl_unlock(upl);
4364                 return KERN_SUCCESS;
4365         }
4366
4367         object = upl->map_object;
4368
4369         if (object == NULL) {
4370                 panic("upl_abort: upl object is not backed by an object");
4371                 upl_unlock(upl);
4372                 return KERN_INVALID_ARGUMENT;
4373         }
4374
4375         if(object->pageout) {
4376                 shadow_object = object->shadow;
4377                 shadow_offset = object->shadow_offset;
4378         } else {
4379                 shadow_object = object;
4380                 shadow_offset = upl->offset - object->paging_offset;
4381         }
4382
4383         if(upl->flags & UPL_INTERNAL) {
4384                 lite_list = (wpl_array_t)
4385                         ((((uintptr_t)upl) + sizeof(struct upl))
4386                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4387         } else {
4388                 lite_list = (wpl_array_t)
4389                         (((uintptr_t)upl) + sizeof(struct upl));
4390         }
4391         offset = 0;
4392
4393         if (object != shadow_object)
4394                 vm_object_lock(object);
4395         vm_object_lock(shadow_object);
4396
4397         shadow_internal = shadow_object->internal;
4398
4399         for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
4400                 m = VM_PAGE_NULL;
4401                 target_offset = offset + shadow_offset;
4402                 if(upl->flags & UPL_LITE) {
4403                         int     pg_num;
4404                         pg_num = offset/PAGE_SIZE;
4405                         if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4406                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4407                                 m = vm_page_lookup(
4408                                         shadow_object, target_offset);
4409                         }
4410                 }
4411                 if(object->pageout) {
4412                         if ((t = vm_page_lookup(object, offset)) != NULL) {
4413                                 t->pageout = FALSE;
4414                                 VM_PAGE_FREE(t);
4415                                 if(m == NULL) {
4416                                         m = vm_page_lookup(
4417                                             shadow_object, target_offset);
4418                                 }
4419                                 if(m != VM_PAGE_NULL)
4420                                         vm_object_paging_end(m->object);
4421                         }
4422                 }
4423                 if(m != VM_PAGE_NULL) {
4424                         vm_page_lock_queues();
4425                         if(m->absent) {
4426                                 boolean_t must_free = TRUE;
4427
4428                                 /* COPYOUT = FALSE case */
4429                                 /* check for error conditions which must */
4430                                 /* be passed back to the pages customer  */
4431                                 if(error & UPL_ABORT_RESTART) {
4432                                         m->restart = TRUE;
4433                                         m->absent = FALSE;
4434                                         vm_object_absent_release(m->object);
4435                                         m->page_error = KERN_MEMORY_ERROR;
4436                                         m->error = TRUE;
4437                                         must_free = FALSE;
4438                                 } else if(error & UPL_ABORT_UNAVAILABLE) {
4439                                         m->restart = FALSE;
4440                                         m->unusual = TRUE;
4441                                         must_free = FALSE;
4442                                 } else if(error & UPL_ABORT_ERROR) {
4443                                         m->restart = FALSE;
4444                                         m->absent = FALSE;
4445                                         vm_object_absent_release(m->object);
4446                                         m->page_error = KERN_MEMORY_ERROR;
4447                                         m->error = TRUE;
4448                                         must_free = FALSE;
4449                                 }
4450
4451                                 /*
4452                                  * ENCRYPTED SWAP:
4453                                  * If the page was already encrypted,
4454                                  * we don't really need to decrypt it
4455                                  * now.  It will get decrypted later,
4456                                  * on demand, as soon as someone needs
4457                                  * to access its contents.
4458                                  */
4459
4460                                 m->cleaning = FALSE;
4461                                 m->overwriting = FALSE;
4462                                 PAGE_WAKEUP_DONE(m);
4463
4464                                 if (must_free == TRUE) {
4465                                         vm_page_free(m);
4466                                 } else {
4467                                         vm_page_activate(m);
4468                                 }
4469                                 vm_page_unlock_queues();
4470                                 continue;
4471                         }
4472                         /*
4473                          * Handle the trusted pager throttle.
4474                          */
4475                         if (m->laundry) {
4476                                 vm_pageout_throttle_up(m);
4477                         }
4478                         if(m->pageout) {
4479                                 assert(m->busy);
4480                                 assert(m->wire_count == 1);
4481                                 m->pageout = FALSE;
4482                                 vm_page_unwire(m);
4483                         }
4484                         m->dump_cleaning = FALSE;
4485                         m->cleaning = FALSE;
4486                         m->overwriting = FALSE;
4487 #if     MACH_PAGEMAP
4488                         vm_external_state_clr(
4489                                 m->object->existence_map, m->offset);
4490 #endif  /* MACH_PAGEMAP */
4491                         if(error & UPL_ABORT_DUMP_PAGES) {
4492                                 vm_page_free(m);
4493                                 pmap_disconnect(m->phys_page);
4494                         } else {
4495                                 PAGE_WAKEUP_DONE(m);
4496                         }
4497                         vm_page_unlock_queues();
4498                 }
4499         }
4500         occupied = 1;
4501         if (upl->flags & UPL_DEVICE_MEMORY)  {
4502                 occupied = 0;
4503         } else if (upl->flags & UPL_LITE) {
4504                 int     pg_num;
4505                 int     j;
4506                 pg_num = upl->size/PAGE_SIZE;
4507                 pg_num = (pg_num + 31) >> 5;
4508                 occupied = 0;
4509                 for(j= 0; j<pg_num; j++) {
4510                         if(lite_list[j] != 0) {
4511                                 occupied = 1;
4512                                 break;
4513                         }
4514                 }
4515         } else {
4516                 if(queue_empty(&upl->map_object->memq)) {
4517                         occupied = 0;
4518                 }
4519         }
4520
4521         if(occupied == 0) {
4522                 if(object == shadow_object)
4523                         vm_object_paging_end(shadow_object);
4524         }
4525         vm_object_unlock(shadow_object);
4526         if (object != shadow_object)
4527                 vm_object_unlock(object);
4528
4529         upl_unlock(upl);
4530         return KERN_SUCCESS;
4531 }
4532
4533 /* an option on commit should be wire */
4534 kern_return_t
4535 upl_commit(
4536         upl_t                   upl,
4537         upl_page_info_t         *page_list,
4538         mach_msg_type_number_t  count)
4539 {
4540         if (upl == UPL_NULL)
4541                 return KERN_INVALID_ARGUMENT;
4542
4543         if(upl->flags & (UPL_LITE | UPL_IO_WIRE)) {
4544                 boolean_t       empty;
4545                 return upl_commit_range(upl, 0, upl->size, 0,
4546                                         page_list, count, &empty);
4547         }
4548
4549         if (count == 0)
4550                 page_list = NULL;
4551
4552         upl_lock(upl);
4553         if (upl->flags & UPL_DEVICE_MEMORY)
4554                 page_list = NULL;
4555
4556         if (upl->flags & UPL_ENCRYPTED) {
4557                 /*
4558                  * ENCRYPTED SWAP:
4559                  * This UPL was encrypted, but we don't need
4560                  * to decrypt here.  We'll decrypt each page
4561                  * later, on demand, as soon as someone needs
4562                  * to access the page's contents.
4563                  */
4564         }
4565
4566         if ((upl->flags & UPL_CLEAR_DIRTY) ||
4567                 (upl->flags & UPL_PAGE_SYNC_DONE) || page_list) {
4568                 vm_object_t     shadow_object = upl->map_object->shadow;
4569                 vm_object_t     object = upl->map_object;
4570                 vm_object_offset_t target_offset;
4571                 upl_size_t      xfer_end;
4572                 int             entry;
4573
4574                 vm_page_t       t, m;
4575                 upl_page_info_t *p;
4576
4577                 if (object != shadow_object)
4578                         vm_object_lock(object);
4579                 vm_object_lock(shadow_object);
4580
4581                 entry = 0;
4582                 target_offset = object->shadow_offset;
4583                 xfer_end = upl->size + object->shadow_offset;
4584
4585                 while(target_offset < xfer_end) {
4586
4587                         if ((t = vm_page_lookup(object,
4588                                 target_offset - object->shadow_offset))
4589                                 == NULL) {
4590                                 target_offset += PAGE_SIZE_64;
4591                                 entry++;
4592                                 continue;
4593                         }
4594
4595                         m = vm_page_lookup(shadow_object, target_offset);
4596                         if(m != VM_PAGE_NULL) {
4597                             /*
4598                              * ENCRYPTED SWAP:
4599                              * If this page was encrypted, we
4600                              * don't need to decrypt it here.
4601                              * We'll decrypt it later, on demand,
4602                              * as soon as someone needs to access
4603                              * its contents.
4604                              */
4605
4606                             if (upl->flags & UPL_CLEAR_DIRTY) {
4607                                 pmap_clear_modify(m->phys_page);
4608                                 m->dirty = FALSE;
4609                             }
4610                             /* It is a part of the semantic of */
4611                             /* COPYOUT_FROM UPLs that a commit */
4612                             /* implies cache sync between the  */
4613                             /* vm page and the backing store   */
4614                             /* this can be used to strip the   */
4615                             /* precious bit as well as clean   */
4616                             if (upl->flags & UPL_PAGE_SYNC_DONE)
4617                                 m->precious = FALSE;
4618
4619                            if(page_list) {
4620                                 p = &(page_list[entry]);
4621                                 if(page_list[entry].phys_addr &&
4622                                                 p->pageout && !m->pageout) {
4623                                         vm_page_lock_queues();
4624                                         m->busy = TRUE;
4625                                         m->pageout = TRUE;
4626                                         vm_page_wire(m);
4627                                         vm_page_unlock_queues();
4628                                 } else if (page_list[entry].phys_addr &&
4629                                                 !p->pageout && m->pageout &&
4630                                                 !m->dump_cleaning) {
4631                                         vm_page_lock_queues();
4632                                         m->pageout = FALSE;
4633                                         m->absent = FALSE;
4634                                         m->overwriting = FALSE;
4635                                         vm_page_unwire(m);
4636                                         PAGE_WAKEUP_DONE(m);
4637                                         vm_page_unlock_queues();
4638                                 }
4639                                 page_list[entry].phys_addr = 0;
4640                            }
4641                         }
4642                         target_offset += PAGE_SIZE_64;
4643                         entry++;
4644                 }
4645                 vm_object_unlock(shadow_object);
4646                 if (object != shadow_object)
4647                         vm_object_unlock(object);
4648
4649         }
4650         if (upl->flags & UPL_DEVICE_MEMORY)  {
4651                 vm_object_lock(upl->map_object->shadow);
4652                 if(upl->map_object == upl->map_object->shadow)
4653                         vm_object_paging_end(upl->map_object->shadow);
4654                 vm_object_unlock(upl->map_object->shadow);
4655         }
4656         upl_unlock(upl);
4657         return KERN_SUCCESS;
4658 }
4659
4660
4661
4662 kern_return_t
4663 vm_object_iopl_request(
4664         vm_object_t             object,
4665         vm_object_offset_t      offset,
4666         upl_size_t              size,
4667         upl_t                   *upl_ptr,
4668         upl_page_info_array_t   user_page_list,
4669         unsigned int            *page_list_count,
4670         int                     cntrl_flags)
4671 {
4672         vm_page_t               dst_page;
4673         vm_object_offset_t      dst_offset = offset;
4674         upl_size_t              xfer_size = size;
4675         upl_t                   upl = NULL;
4676         unsigned int            entry;
4677         wpl_array_t             lite_list = NULL;
4678         int                     page_field_size;
4679         int                     delayed_unlock = 0;
4680         int                     no_zero_fill = FALSE;
4681         vm_page_t               alias_page = NULL;
4682         kern_return_t           ret;
4683         vm_prot_t               prot;
4684
4685
4686         if (cntrl_flags & ~UPL_VALID_FLAGS) {
4687                 /*
4688                  * For forward compatibility's sake,
4689                  * reject any unknown flag.
4690                  */
4691                 return KERN_INVALID_VALUE;
4692         }
4693
4694         if (cntrl_flags & UPL_ENCRYPT) {
4695                 /*
4696                  * ENCRYPTED SWAP:
4697                  * The paging path doesn't use this interface,
4698                  * so we don't support the UPL_ENCRYPT flag
4699                  * here.  We won't encrypt the pages.
4700                  */
4701                 assert(! (cntrl_flags & UPL_ENCRYPT));
4702         }
4703
4704         if (cntrl_flags & UPL_NOZEROFILL)
4705                 no_zero_fill = TRUE;
4706
4707         if (cntrl_flags & UPL_COPYOUT_FROM)
4708                 prot = VM_PROT_READ;
4709         else
4710                 prot = VM_PROT_READ | VM_PROT_WRITE;
4711
4712         if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
4713                 size = MAX_UPL_TRANSFER * page_size;
4714         }
4715
4716         if(cntrl_flags & UPL_SET_INTERNAL)
4717                 if(page_list_count != NULL)
4718                         *page_list_count = MAX_UPL_TRANSFER;
4719         if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4720            ((page_list_count != NULL) && (*page_list_count != 0)
4721                                 && *page_list_count < (size/page_size)))
4722                 return KERN_INVALID_ARGUMENT;
4723
4724         if((!object->internal) && (object->paging_offset != 0))
4725                 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
4726
4727         if(object->phys_contiguous) {
4728                 /* No paging operations are possible against this memory */
4729                 /* and so no need for map object, ever */
4730                 cntrl_flags |= UPL_SET_LITE;
4731         }
4732
4733         if(upl_ptr) {
4734                 if(cntrl_flags & UPL_SET_INTERNAL) {
4735                         if(cntrl_flags & UPL_SET_LITE) {
4736                                 upl = upl_create(
4737                                         UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
4738                                         size);
4739                                 user_page_list = (upl_page_info_t *)
4740                                    (((uintptr_t)upl) + sizeof(struct upl));
4741                                 lite_list = (wpl_array_t)
4742                                         (((uintptr_t)user_page_list) +
4743                                         ((size/PAGE_SIZE) *
4744                                                 sizeof(upl_page_info_t)));
4745                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4746                                 page_field_size =
4747                                         (page_field_size + 3) & 0xFFFFFFFC;
4748                                 bzero((char *)lite_list, page_field_size);
4749                                 upl->flags =
4750                                         UPL_LITE | UPL_INTERNAL | UPL_IO_WIRE;
4751                         } else {
4752                                 upl = upl_create(UPL_CREATE_INTERNAL, size);
4753                                 user_page_list = (upl_page_info_t *)
4754                                         (((uintptr_t)upl)
4755                                                 + sizeof(struct upl));
4756                                 upl->flags = UPL_INTERNAL | UPL_IO_WIRE;
4757                         }
4758                 } else {
4759                         if(cntrl_flags & UPL_SET_LITE) {
4760                                 upl = upl_create(UPL_CREATE_LITE, size);
4761                                 lite_list = (wpl_array_t)
4762                                    (((uintptr_t)upl) + sizeof(struct upl));
4763                                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4764                                 page_field_size =
4765                                         (page_field_size + 3) & 0xFFFFFFFC;
4766                                 bzero((char *)lite_list, page_field_size);
4767                                 upl->flags = UPL_LITE | UPL_IO_WIRE;
4768                         } else {
4769                                 upl = upl_create(UPL_CREATE_EXTERNAL, size);
4770                                 upl->flags = UPL_IO_WIRE;
4771                         }
4772                 }
4773
4774                 if(object->phys_contiguous) {
4775                         upl->map_object = object;
4776                         /* don't need any shadow mappings for this one */
4777                         /* since it is already I/O memory */
4778                         upl->flags |= UPL_DEVICE_MEMORY;
4779
4780                         vm_object_lock(object);
4781                         vm_object_paging_begin(object);
4782                         vm_object_unlock(object);
4783
4784                         /* paging in progress also protects the paging_offset */
4785                         upl->offset = offset + object->paging_offset;
4786                         upl->size = size;
4787                         *upl_ptr = upl;
4788                         if(user_page_list) {
4789                                 user_page_list[0].phys_addr =
4790                                   (offset + object->shadow_offset)>>PAGE_SHIFT;
4791                                 user_page_list[0].device = TRUE;
4792                         }
4793
4794                         if(page_list_count != NULL) {
4795                                 if (upl->flags & UPL_INTERNAL) {
4796                                         *page_list_count = 0;
4797                                 } else {
4798                                         *page_list_count = 1;
4799                                 }
4800                         }
4801                         return KERN_SUCCESS;
4802                 }
4803                 if(user_page_list)
4804                         user_page_list[0].device = FALSE;
4805
4806                 if(cntrl_flags & UPL_SET_LITE) {
4807                         upl->map_object = object;
4808                 } else {
4809                         upl->map_object = vm_object_allocate(size);
4810                         vm_object_lock(upl->map_object);
4811                         upl->map_object->shadow = object;
4812                         upl->map_object->pageout = TRUE;
4813                         upl->map_object->can_persist = FALSE;
4814                         upl->map_object->copy_strategy =
4815                                         MEMORY_OBJECT_COPY_NONE;
4816                         upl->map_object->shadow_offset = offset;
4817                         upl->map_object->wimg_bits = object->wimg_bits;
4818                         vm_object_unlock(upl->map_object);
4819                 }
4820         }
4821         vm_object_lock(object);
4822         vm_object_paging_begin(object);
4823
4824         if (!object->phys_contiguous) {
4825                 /* Protect user space from future COW operations */
4826                 object->true_share = TRUE;
4827                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4828                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4829         }
4830
4831         /* we can lock the upl offset now that paging_in_progress is set */
4832         if(upl_ptr) {
4833                 upl->size = size;
4834                 upl->offset = offset + object->paging_offset;
4835                 *upl_ptr = upl;
4836 #ifdef UPL_DEBUG
4837                 queue_enter(&object->uplq, upl, upl_t, uplq);
4838 #endif /* UPL_DEBUG */
4839         }
4840
4841         if (cntrl_flags & UPL_BLOCK_ACCESS) {
4842                 /*
4843                  * The user requested that access to the pages in this URL
4844                  * be blocked until the UPL is commited or aborted.
4845                  */
4846                 upl->flags |= UPL_ACCESS_BLOCKED;
4847         }
4848
4849         entry = 0;
4850         while (xfer_size) {
4851                 if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
4852                         if (delayed_unlock) {
4853                                 delayed_unlock = 0;
4854                                 vm_page_unlock_queues();
4855                         }
4856                         vm_object_unlock(object);
4857                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
4858                         vm_object_lock(object);
4859                 }
4860                 dst_page = vm_page_lookup(object, dst_offset);
4861
4862                 /*
4863                  * ENCRYPTED SWAP:
4864                  * If the page is encrypted, we need to decrypt it,
4865                  * so force a soft page fault.
4866                  */
4867                 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
4868                     (dst_page->encrypted) ||
4869                     (dst_page->unusual && (dst_page->error ||
4870                                            dst_page->restart ||
4871                                            dst_page->absent ||
4872                                            dst_page->fictitious ||
4873                                            (prot & dst_page->page_lock)))) {
4874                         vm_fault_return_t       result;
4875                    do {
4876                         vm_page_t       top_page;
4877                         kern_return_t   error_code;
4878                         int             interruptible;
4879
4880                         vm_object_offset_t      lo_offset = offset;
4881                         vm_object_offset_t      hi_offset = offset + size;
4882
4883
4884                         if (delayed_unlock) {
4885                                 delayed_unlock = 0;
4886                                 vm_page_unlock_queues();
4887                         }
4888
4889                         if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
4890                                 interruptible = THREAD_ABORTSAFE;
4891                         } else {
4892                                 interruptible = THREAD_UNINT;
4893                         }
4894
4895                         result = vm_fault_page(object, dst_offset,
4896                                 prot | VM_PROT_WRITE, FALSE,
4897                                 interruptible,
4898                                 lo_offset, hi_offset,
4899                                 VM_BEHAVIOR_SEQUENTIAL,
4900                                 &prot, &dst_page, &top_page,
4901                                 (int *)0,
4902                                 &error_code, no_zero_fill, FALSE, NULL, 0);
4903
4904                         switch(result) {
4905                         case VM_FAULT_SUCCESS:
4906
4907                                 PAGE_WAKEUP_DONE(dst_page);
4908
4909                                 /*
4910                                  *      Release paging references and
4911                                  *      top-level placeholder page, if any.
4912                                  */
4913
4914                                 if(top_page != VM_PAGE_NULL) {
4915                                         vm_object_t local_object;
4916                                         local_object =
4917                                                 top_page->object;
4918                                         if(top_page->object
4919                                                 != dst_page->object) {
4920                                                 vm_object_lock(
4921                                                         local_object);
4922                                                 VM_PAGE_FREE(top_page);
4923                                                 vm_object_paging_end(
4924                                                         local_object);
4925                                                 vm_object_unlock(
4926                                                         local_object);
4927                                         } else {
4928                                                 VM_PAGE_FREE(top_page);
4929                                                 vm_object_paging_end(
4930                                                         local_object);
4931                                         }
4932                                 }
4933
4934                                 break;
4935
4936
4937                         case VM_FAULT_RETRY:
4938                                 vm_object_lock(object);
4939                                 vm_object_paging_begin(object);
4940                                 break;
4941
4942                         case VM_FAULT_FICTITIOUS_SHORTAGE:
4943                                 vm_page_more_fictitious();
4944                                 vm_object_lock(object);
4945                                 vm_object_paging_begin(object);
4946                                 break;
4947
4948                         case VM_FAULT_MEMORY_SHORTAGE:
4949                                 if (vm_page_wait(interruptible)) {
4950                                         vm_object_lock(object);
4951                                         vm_object_paging_begin(object);
4952                                         break;
4953                                 }
4954                                 /* fall thru */
4955
4956                         case VM_FAULT_INTERRUPTED:
4957                                 error_code = MACH_SEND_INTERRUPTED;
4958                         case VM_FAULT_MEMORY_ERROR:
4959                                 ret = (error_code ? error_code:
4960                                         KERN_MEMORY_ERROR);
4961                                 vm_object_lock(object);
4962                                 for(; offset < dst_offset;
4963                                                 offset += PAGE_SIZE) {
4964                                    dst_page = vm_page_lookup(
4965                                                 object, offset);
4966                                    if(dst_page == VM_PAGE_NULL)
4967                                         panic("vm_object_iopl_request: Wired pages missing. \n");
4968                                    vm_page_lock_queues();
4969                                    vm_page_unwire(dst_page);
4970                                    vm_page_unlock_queues();
4971                                    VM_STAT(reactivations++);
4972                                 }
4973                                 vm_object_unlock(object);
4974                                 upl_destroy(upl);
4975                                 return ret;
4976                         }
4977                    } while ((result != VM_FAULT_SUCCESS)
4978                                 || (result == VM_FAULT_INTERRUPTED));
4979                 }
4980                 if (delayed_unlock == 0)
4981                         vm_page_lock_queues();
4982                 vm_page_wire(dst_page);
4983
4984                 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4985                         /*
4986                          * Mark the page "busy" to block any future page fault
4987                          * on this page.  We'll also remove the mapping
4988                          * of all these pages before leaving this routine.
4989                          */
4990                         assert(!dst_page->fictitious);
4991                         dst_page->busy = TRUE;
4992                 }
4993
4994                 if (upl_ptr) {
4995                         if (cntrl_flags & UPL_SET_LITE) {
4996                                 int     pg_num;
4997                                 pg_num = (dst_offset-offset)/PAGE_SIZE;
4998                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
4999                         } else {
5000                                 /*
5001                                  * Convert the fictitious page to a
5002                                  * private shadow of the real page.
5003                                  */
5004                                 assert(alias_page->fictitious);
5005                                 alias_page->fictitious = FALSE;
5006                                 alias_page->private = TRUE;
5007                                 alias_page->pageout = TRUE;
5008                                 alias_page->phys_page = dst_page->phys_page;
5009                                 vm_page_wire(alias_page);
5010
5011                                 vm_page_insert(alias_page,
5012                                         upl->map_object, size - xfer_size);
5013                                 assert(!alias_page->wanted);
5014                                 alias_page->busy = FALSE;
5015                                 alias_page->absent = FALSE;
5016                         }
5017
5018                         /* expect the page to be used */
5019                         dst_page->reference = TRUE;
5020
5021                         if (!(cntrl_flags & UPL_COPYOUT_FROM))
5022                                 dst_page->dirty = TRUE;
5023                         alias_page = NULL;
5024
5025                         if (user_page_list) {
5026                                 user_page_list[entry].phys_addr
5027                                         = dst_page->phys_page;
5028                                 user_page_list[entry].dirty =
5029                                                 dst_page->dirty;
5030                                 user_page_list[entry].pageout =
5031                                                 dst_page->pageout;
5032                                 user_page_list[entry].absent =
5033                                                 dst_page->absent;
5034                                 user_page_list[entry].precious =
5035                                                 dst_page->precious;
5036                         }
5037                 }
5038                 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
5039                         delayed_unlock = 0;
5040                         vm_page_unlock_queues();
5041                 }
5042                 entry++;
5043                 dst_offset += PAGE_SIZE_64;
5044                 xfer_size -= PAGE_SIZE;
5045         }
5046         if (delayed_unlock)
5047                 vm_page_unlock_queues();
5048
5049         if (upl->flags & UPL_INTERNAL) {
5050                 if(page_list_count != NULL)
5051                         *page_list_count = 0;
5052         } else if (*page_list_count > entry) {
5053                 if(page_list_count != NULL)
5054                         *page_list_count = entry;
5055         }
5056
5057         if (alias_page != NULL) {
5058                 vm_page_lock_queues();
5059                 vm_page_free(alias_page);
5060                 vm_page_unlock_queues();
5061         }
5062
5063         vm_object_unlock(object);
5064
5065         if (cntrl_flags & UPL_BLOCK_ACCESS) {
5066                 /*
5067                  * We've marked all the pages "busy" so that future
5068                  * page faults will block.
5069                  * Now remove the mapping for these pages, so that they
5070                  * can't be accessed without causing a page fault.
5071                  */
5072                 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5073                                        PMAP_NULL, 0, VM_PROT_NONE);
5074         }
5075
5076         return KERN_SUCCESS;
5077 }
5078
5079 kern_return_t
5080 upl_transpose(
5081         upl_t           upl1,
5082         upl_t           upl2)
5083 {
5084         kern_return_t           retval;
5085         boolean_t               upls_locked;
5086         vm_object_t             object1, object2;
5087
5088         if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) {
5089                 return KERN_INVALID_ARGUMENT;
5090         }
5091
5092         upls_locked = FALSE;
5093
5094         /*
5095          * Since we need to lock both UPLs at the same time,
5096          * avoid deadlocks by always taking locks in the same order.
5097          */
5098         if (upl1 < upl2) {
5099                 upl_lock(upl1);
5100                 upl_lock(upl2);
5101         } else {
5102                 upl_lock(upl2);
5103                 upl_lock(upl1);
5104         }
5105         upls_locked = TRUE;     /* the UPLs will need to be unlocked */
5106
5107         object1 = upl1->map_object;
5108         object2 = upl2->map_object;
5109
5110         if (upl1->offset != 0 || upl2->offset != 0 ||
5111             upl1->size != upl2->size) {
5112                 /*
5113                  * We deal only with full objects, not subsets.
5114                  * That's because we exchange the entire backing store info
5115                  * for the objects: pager, resident pages, etc...  We can't do
5116                  * only part of it.
5117                  */
5118                 retval = KERN_INVALID_VALUE;
5119                 goto done;
5120         }
5121
5122         /*
5123          * Tranpose the VM objects' backing store.
5124          */
5125         retval = vm_object_transpose(object1, object2,
5126                                      (vm_object_size_t) upl1->size);
5127
5128         if (retval == KERN_SUCCESS) {
5129                 /*
5130                  * Make each UPL point to the correct VM object, i.e. the
5131                  * object holding the pages that the UPL refers to...
5132                  */
5133                 upl1->map_object = object2;
5134                 upl2->map_object = object1;
5135         }
5136
5137 done:
5138         /*
5139          * Cleanup.
5140          */
5141         if (upls_locked) {
5142                 upl_unlock(upl1);
5143                 upl_unlock(upl2);
5144                 upls_locked = FALSE;
5145         }
5146
5147         return retval;
5148 }
5149
5150 /*
5151  * ENCRYPTED SWAP:
5152  *
5153  * Rationale:  the user might have some encrypted data on disk (via
5154  * FileVault or any other mechanism).  That data is then decrypted in
5155  * memory, which is safe as long as the machine is secure.  But that
5156  * decrypted data in memory could be paged out to disk by the default
5157  * pager.  The data would then be stored on disk in clear (not encrypted)
5158  * and it could be accessed by anyone who gets physical access to the
5159  * disk (if the laptop or the disk gets stolen for example).  This weakens
5160  * the security offered by FileVault.
5161  *
5162  * Solution:  the default pager will optionally request that all the
5163  * pages it gathers for pageout be encrypted, via the UPL interfaces,
5164  * before it sends this UPL to disk via the vnode_pageout() path.
5165  *
5166  * Notes:
5167  *
5168  * To avoid disrupting the VM LRU algorithms, we want to keep the
5169  * clean-in-place mechanisms, which allow us to send some extra pages to
5170  * swap (clustering) without actually removing them from the user's
5171  * address space.  We don't want the user to unknowingly access encrypted
5172  * data, so we have to actually remove the encrypted pages from the page
5173  * table.  When the user accesses the data, the hardware will fail to
5174  * locate the virtual page in its page table and will trigger a page
5175  * fault.  We can then decrypt the page and enter it in the page table
5176  * again.  Whenever we allow the user to access the contents of a page,
5177  * we have to make sure it's not encrypted.
5178  *
5179  *
5180  */
5181 /*
5182  * ENCRYPTED SWAP:
5183  * Reserve of virtual addresses in the kernel address space.
5184  * We need to map the physical pages in the kernel, so that we
5185  * can call the encryption/decryption routines with a kernel
5186  * virtual address.  We keep this pool of pre-allocated kernel
5187  * virtual addresses so that we don't have to scan the kernel's
5188  * virtaul address space each time we need to encrypt or decrypt
5189  * a physical page.
5190  * It would be nice to be able to encrypt and decrypt in physical
5191  * mode but that might not always be more efficient...
5192  */
5193 decl_simple_lock_data(,vm_paging_lock)
5194 #define VM_PAGING_NUM_PAGES     64
5195 vm_map_offset_t vm_paging_base_address = 0;
5196 boolean_t       vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
5197 int             vm_paging_max_index = 0;
5198 unsigned long   vm_paging_no_kernel_page = 0;
5199 unsigned long   vm_paging_objects_mapped = 0;
5200 unsigned long   vm_paging_pages_mapped = 0;
5201 unsigned long   vm_paging_objects_mapped_slow = 0;
5202 unsigned long   vm_paging_pages_mapped_slow = 0;
5203
5204 /*
5205  * ENCRYPTED SWAP:
5206  * vm_paging_map_object:
5207  *      Maps part of a VM object's pages in the kernel
5208  *      virtual address space, using the pre-allocated
5209  *      kernel virtual addresses, if possible.
5210  * Context:
5211  *      The VM object is locked.  This lock will get
5212  *      dropped and re-acquired though.
5213  */
5214 kern_return_t
5215 vm_paging_map_object(
5216         vm_map_offset_t         *address,
5217         vm_page_t               page,
5218         vm_object_t             object,
5219         vm_object_offset_t      offset,
5220         vm_map_size_t           *size)
5221 {
5222         kern_return_t           kr;
5223         vm_map_offset_t         page_map_offset;
5224         vm_map_size_t           map_size;
5225         vm_object_offset_t      object_offset;
5226 #ifdef __ppc__
5227         int                     i;
5228         vm_map_entry_t          map_entry;
5229 #endif /* __ppc__ */
5230
5231
5232 #ifdef __ppc__
5233         if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
5234                 /*
5235                  * Optimization for the PowerPC.
5236                  * Use one of the pre-allocated kernel virtual addresses
5237                  * and just enter the VM page in the kernel address space
5238                  * at that virtual address.
5239                  */
5240                 vm_object_unlock(object);
5241                 simple_lock(&vm_paging_lock);
5242
5243                 if (vm_paging_base_address == 0) {
5244                         /*
5245                          * Initialize our pool of pre-allocated kernel
5246                          * virtual addresses.
5247                          */
5248                         simple_unlock(&vm_paging_lock);
5249                         page_map_offset = 0;
5250                         kr = vm_map_find_space(kernel_map,
5251                                                &page_map_offset,
5252                                                VM_PAGING_NUM_PAGES * PAGE_SIZE,
5253                                                0,
5254                                                &map_entry);
5255                         if (kr != KERN_SUCCESS) {
5256                                 panic("vm_paging_map_object: "
5257                                       "kernel_map full\n");
5258                         }
5259                         map_entry->object.vm_object = kernel_object;
5260                         map_entry->offset =
5261                                 page_map_offset - VM_MIN_KERNEL_ADDRESS;
5262                         vm_object_reference(kernel_object);
5263                         vm_map_unlock(kernel_map);
5264
5265                         simple_lock(&vm_paging_lock);
5266                         if (vm_paging_base_address != 0) {
5267                                 /* someone raced us and won: undo */
5268                                 simple_unlock(&vm_paging_lock);
5269                                 kr = vm_map_remove(kernel_map,
5270                                                    page_map_offset,
5271                                                    page_map_offset +
5272                                                    (VM_PAGING_NUM_PAGES
5273                                                     * PAGE_SIZE),
5274                                                    VM_MAP_NO_FLAGS);
5275                                 assert(kr == KERN_SUCCESS);
5276                                 simple_lock(&vm_paging_lock);
5277                         } else {
5278                                 vm_paging_base_address = page_map_offset;
5279                         }
5280                 }
5281
5282                 /*
5283                  * Try and find an available kernel virtual address
5284                  * from our pre-allocated pool.
5285                  */
5286                 page_map_offset = 0;
5287                 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
5288                         if (vm_paging_page_inuse[i] == FALSE) {
5289                                 page_map_offset = vm_paging_base_address +
5290                                         (i * PAGE_SIZE);
5291                                 break;
5292                         }
5293                 }
5294
5295                 if (page_map_offset != 0) {
5296                         /*
5297                          * We found a kernel virtual address;
5298                          * map the physical page to that virtual address.
5299                          */
5300                         if (i > vm_paging_max_index) {
5301                                 vm_paging_max_index = i;
5302                         }
5303                         vm_paging_page_inuse[i] = TRUE;
5304                         simple_unlock(&vm_paging_lock);
5305                         pmap_map_block(kernel_pmap,
5306                                        page_map_offset,
5307                                        page->phys_page,
5308                                        1,                                               /* Size is number of 4k pages */
5309                                        VM_PROT_DEFAULT,
5310                                        ((int) page->object->wimg_bits &
5311                                         VM_WIMG_MASK),
5312                                        0);
5313                         vm_paging_objects_mapped++;
5314                         vm_paging_pages_mapped++;
5315                         *address = page_map_offset;
5316                         vm_object_lock(object);
5317
5318                         /* all done and mapped, ready to use ! */
5319                         return KERN_SUCCESS;
5320                 }
5321
5322                 /*
5323                  * We ran out of pre-allocated kernel virtual
5324                  * addresses.  Just map the page in the kernel
5325                  * the slow and regular way.
5326                  */
5327                 vm_paging_no_kernel_page++;
5328                 simple_unlock(&vm_paging_lock);
5329                 vm_object_lock(object);
5330         }
5331 #endif /* __ppc__ */
5332
5333         object_offset = vm_object_trunc_page(offset);
5334         map_size = vm_map_round_page(*size);
5335
5336         /*
5337          * Try and map the required range of the object
5338          * in the kernel_map
5339          */
5340
5341         /* don't go beyond the object's end... */
5342         if (object_offset >= object->size) {
5343                 map_size = 0;
5344         } else if (map_size > object->size - offset) {
5345                 map_size = object->size - offset;
5346         }
5347
5348         vm_object_reference_locked(object);     /* for the map entry */
5349         vm_object_unlock(object);
5350
5351         kr = vm_map_enter(kernel_map,
5352                           address,
5353                           map_size,
5354                           0,
5355                           VM_FLAGS_ANYWHERE,
5356                           object,
5357                           object_offset,
5358                           FALSE,
5359                           VM_PROT_DEFAULT,
5360                           VM_PROT_ALL,
5361                           VM_INHERIT_NONE);
5362         if (kr != KERN_SUCCESS) {
5363                 *address = 0;
5364                 *size = 0;
5365                 vm_object_deallocate(object);   /* for the map entry */
5366                 return kr;
5367         }
5368
5369         *size = map_size;
5370
5371         /*
5372          * Enter the mapped pages in the page table now.
5373          */
5374         vm_object_lock(object);
5375         for (page_map_offset = 0;
5376              map_size != 0;
5377              map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
5378                 unsigned int    cache_attr;
5379
5380                 page = vm_page_lookup(object, offset + page_map_offset);
5381                 if (page == VM_PAGE_NULL) {
5382                         panic("vm_paging_map_object: no page !?");
5383                 }
5384                 if (page->no_isync == TRUE) {
5385                         pmap_sync_page_data_phys(page->phys_page);
5386                 }
5387                 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
5388
5389                 PMAP_ENTER(kernel_pmap,
5390                            *address + page_map_offset,
5391                            page,
5392                            VM_PROT_DEFAULT,
5393                            cache_attr,
5394                            FALSE);
5395         }
5396
5397         vm_paging_objects_mapped_slow++;
5398         vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64;
5399
5400         return KERN_SUCCESS;
5401 }
5402
5403 /*
5404  * ENCRYPTED SWAP:
5405  * vm_paging_unmap_object:
5406  *      Unmaps part of a VM object's pages from the kernel
5407  *      virtual address space.
5408  * Context:
5409  *      The VM object is locked.  This lock will get
5410  *      dropped and re-acquired though.
5411  */
5412 void
5413 vm_paging_unmap_object(
5414         vm_object_t     object,
5415         vm_map_offset_t start,
5416         vm_map_offset_t end)
5417 {
5418         kern_return_t   kr;
5419 #ifdef __ppc__
5420         int             i;
5421 #endif /* __ppc__ */
5422
5423         if ((vm_paging_base_address != 0) &&
5424             ((start < vm_paging_base_address) ||
5425              (end > (vm_paging_base_address
5426                      + (VM_PAGING_NUM_PAGES * PAGE_SIZE))))) {
5427                 /*
5428                  * We didn't use our pre-allocated pool of
5429                  * kernel virtual address.  Deallocate the
5430                  * virtual memory.
5431                  */
5432                 if (object != VM_OBJECT_NULL) {
5433                         vm_object_unlock(object);
5434                 }
5435                 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
5436                 if (object != VM_OBJECT_NULL) {
5437                         vm_object_lock(object);
5438                 }
5439                 assert(kr == KERN_SUCCESS);
5440         } else {
5441                 /*
5442                  * We used a kernel virtual address from our
5443                  * pre-allocated pool.  Put it back in the pool
5444                  * for next time.
5445                  */
5446 #ifdef __ppc__
5447                 assert(end - start == PAGE_SIZE);
5448                 i = (start - vm_paging_base_address) >> PAGE_SHIFT;
5449
5450                 /* undo the pmap mapping */
5451                 mapping_remove(kernel_pmap, start);
5452
5453                 simple_lock(&vm_paging_lock);
5454                 vm_paging_page_inuse[i] = FALSE;
5455                 simple_unlock(&vm_paging_lock);
5456 #endif /* __ppc__ */
5457         }
5458 }
5459
5460 /*
5461  * Encryption data.
5462  * "iv" is the "initial vector".  Ideally, we want to
5463  * have a different one for each page we encrypt, so that
5464  * crackers can't find encryption patterns too easily.
5465  */
5466 #define SWAP_CRYPT_AES_KEY_SIZE 128     /* XXX 192 and 256 don't work ! */
5467 boolean_t               swap_crypt_ctx_initialized = FALSE;
5468 aes_32t                 swap_crypt_key[8]; /* big enough for a 256 key */
5469 aes_ctx                 swap_crypt_ctx;
5470 const unsigned char     swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
5471
5472 #if DEBUG
5473 boolean_t               swap_crypt_ctx_tested = FALSE;
5474 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
5475 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
5476 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
5477 #endif /* DEBUG */
5478
5479 extern u_long random(void);
5480
5481 /*
5482  * Initialize the encryption context: key and key size.
5483  */
5484 void swap_crypt_ctx_initialize(void); /* forward */
5485 void
5486 swap_crypt_ctx_initialize(void)
5487 {
5488         unsigned int    i;
5489
5490         /*
5491          * No need for locking to protect swap_crypt_ctx_initialized
5492          * because the first use of encryption will come from the
5493          * pageout thread (we won't pagein before there's been a pageout)
5494          * and there's only one pageout thread.
5495          */
5496         if (swap_crypt_ctx_initialized == FALSE) {
5497                 for (i = 0;
5498                      i < (sizeof (swap_crypt_key) /
5499                           sizeof (swap_crypt_key[0]));
5500                      i++) {
5501                         swap_crypt_key[i] = random();
5502                 }
5503                 aes_encrypt_key((const unsigned char *) swap_crypt_key,
5504                                 SWAP_CRYPT_AES_KEY_SIZE,
5505                                 &swap_crypt_ctx.encrypt);
5506                 aes_decrypt_key((const unsigned char *) swap_crypt_key,
5507                                 SWAP_CRYPT_AES_KEY_SIZE,
5508                                 &swap_crypt_ctx.decrypt);
5509                 swap_crypt_ctx_initialized = TRUE;
5510         }
5511
5512 #if DEBUG
5513         /*
5514          * Validate the encryption algorithms.
5515          */
5516         if (swap_crypt_ctx_tested == FALSE) {
5517                 /* initialize */
5518                 for (i = 0; i < 4096; i++) {
5519                         swap_crypt_test_page_ref[i] = (char) i;
5520                 }
5521                 /* encrypt */
5522                 aes_encrypt_cbc(swap_crypt_test_page_ref,
5523                                 swap_crypt_null_iv,
5524                                 PAGE_SIZE / AES_BLOCK_SIZE,
5525                                 swap_crypt_test_page_encrypt,
5526                                 &swap_crypt_ctx.encrypt);
5527                 /* decrypt */
5528                 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
5529                                 swap_crypt_null_iv,
5530                                 PAGE_SIZE / AES_BLOCK_SIZE,
5531                                 swap_crypt_test_page_decrypt,
5532                                 &swap_crypt_ctx.decrypt);
5533                 /* compare result with original */
5534                 for (i = 0; i < 4096; i ++) {
5535                         if (swap_crypt_test_page_decrypt[i] !=
5536                             swap_crypt_test_page_ref[i]) {
5537                                 panic("encryption test failed");
5538                         }
5539                 }
5540
5541                 /* encrypt again */
5542                 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
5543                                 swap_crypt_null_iv,
5544                                 PAGE_SIZE / AES_BLOCK_SIZE,
5545                                 swap_crypt_test_page_decrypt,
5546                                 &swap_crypt_ctx.encrypt);
5547                 /* decrypt in place */
5548                 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
5549                                 swap_crypt_null_iv,
5550                                 PAGE_SIZE / AES_BLOCK_SIZE,
5551                                 swap_crypt_test_page_decrypt,
5552                                 &swap_crypt_ctx.decrypt);
5553                 for (i = 0; i < 4096; i ++) {
5554                         if (swap_crypt_test_page_decrypt[i] !=
5555                             swap_crypt_test_page_ref[i]) {
5556                                 panic("in place encryption test failed");
5557                         }
5558                 }
5559
5560                 swap_crypt_ctx_tested = TRUE;
5561         }
5562 #endif /* DEBUG */
5563 }
5564
5565 /*
5566  * ENCRYPTED SWAP:
5567  * vm_page_encrypt:
5568  *      Encrypt the given page, for secure paging.
5569  *      The page might already be mapped at kernel virtual
5570  *      address "kernel_mapping_offset".  Otherwise, we need
5571  *      to map it.
5572  *
5573  * Context:
5574  *      The page's object is locked, but this lock will be released
5575  *      and re-acquired.
5576  *      The page is busy and not accessible by users (not entered in any pmap).
5577  */
5578 void
5579 vm_page_encrypt(
5580         vm_page_t       page,
5581         vm_map_offset_t kernel_mapping_offset)
5582 {
5583         int                     clear_refmod = 0;
5584         kern_return_t           kr;
5585         boolean_t               page_was_referenced;
5586         boolean_t               page_was_modified;
5587         vm_map_size_t           kernel_mapping_size;
5588         vm_offset_t             kernel_vaddr;
5589         union {
5590                 unsigned char   aes_iv[AES_BLOCK_SIZE];
5591                 struct {
5592                         memory_object_t         pager_object;
5593                         vm_object_offset_t      paging_offset;
5594                 } vm;
5595         } encrypt_iv;
5596
5597         if (! vm_pages_encrypted) {
5598                 vm_pages_encrypted = TRUE;
5599         }
5600
5601         assert(page->busy);
5602         assert(page->dirty || page->precious);
5603
5604         if (page->encrypted) {
5605                 /*
5606                  * Already encrypted: no need to do it again.
5607                  */
5608                 vm_page_encrypt_already_encrypted_counter++;
5609                 return;
5610         }
5611         ASSERT_PAGE_DECRYPTED(page);
5612
5613         /*
5614          * Gather the "reference" and "modified" status of the page.
5615          * We'll restore these values after the encryption, so that
5616          * the encryption is transparent to the rest of the system
5617          * and doesn't impact the VM's LRU logic.
5618          */
5619         page_was_referenced =
5620                 (page->reference || pmap_is_referenced(page->phys_page));
5621         page_was_modified =
5622                 (page->dirty || pmap_is_modified(page->phys_page));
5623
5624         if (kernel_mapping_offset == 0) {
5625                 /*
5626                  * The page hasn't already been mapped in kernel space
5627                  * by the caller.  Map it now, so that we can access
5628                  * its contents and encrypt them.
5629                  */
5630                 kernel_mapping_size = PAGE_SIZE;
5631                 kr = vm_paging_map_object(&kernel_mapping_offset,
5632                                           page,
5633                                           page->object,
5634                                           page->offset,
5635                                           &kernel_mapping_size);
5636                 if (kr != KERN_SUCCESS) {
5637                         panic("vm_page_encrypt: "
5638                               "could not map page in kernel: 0x%x\n",
5639                               kr);
5640                 }
5641         } else {
5642                 kernel_mapping_size = 0;
5643         }
5644         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5645
5646         if (swap_crypt_ctx_initialized == FALSE) {
5647                 swap_crypt_ctx_initialize();
5648         }
5649         assert(swap_crypt_ctx_initialized);
5650
5651         /*
5652          * Prepare an "initial vector" for the encryption.
5653          * We use the "pager" and the "paging_offset" for that
5654          * page to obfuscate the encrypted data a bit more and
5655          * prevent crackers from finding patterns that they could
5656          * use to break the key.
5657          */
5658         bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
5659         encrypt_iv.vm.pager_object = page->object->pager;
5660         encrypt_iv.vm.paging_offset =
5661                 page->object->paging_offset + page->offset;
5662
5663         vm_object_unlock(page->object);
5664
5665         /* encrypt the "initial vector" */
5666         aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
5667                         swap_crypt_null_iv,
5668                         1,
5669                         &encrypt_iv.aes_iv[0],
5670                         &swap_crypt_ctx.encrypt);
5671
5672         /*
5673          * Encrypt the page.
5674          */
5675         aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
5676                         &encrypt_iv.aes_iv[0],
5677                         PAGE_SIZE / AES_BLOCK_SIZE,
5678                         (unsigned char *) kernel_vaddr,
5679                         &swap_crypt_ctx.encrypt);
5680
5681         vm_page_encrypt_counter++;
5682
5683         vm_object_lock(page->object);
5684
5685         /*
5686          * Unmap the page from the kernel's address space,
5687          * if we had to map it ourselves.  Otherwise, let
5688          * the caller undo the mapping if needed.
5689          */
5690         if (kernel_mapping_size != 0) {
5691                 vm_paging_unmap_object(page->object,
5692                                        kernel_mapping_offset,
5693                                        kernel_mapping_offset + kernel_mapping_size);
5694         }
5695
5696         /*
5697          * Restore the "reference" and "modified" bits.
5698          * This should clean up any impact the encryption had
5699          * on them.
5700          */
5701         if (! page_was_referenced) {
5702                 clear_refmod |= VM_MEM_REFERENCED;
5703                 page->reference = FALSE;
5704         }
5705         if (! page_was_modified) {
5706                 clear_refmod |= VM_MEM_MODIFIED;
5707                 page->dirty = FALSE;
5708         }
5709         if (clear_refmod)
5710                 pmap_clear_refmod(page->phys_page, clear_refmod);
5711
5712         page->encrypted = TRUE;
5713 }
5714
5715 /*
5716  * ENCRYPTED SWAP:
5717  * vm_page_decrypt:
5718  *      Decrypt the given page.
5719  *      The page might already be mapped at kernel virtual
5720  *      address "kernel_mapping_offset".  Otherwise, we need
5721  *      to map it.
5722  *
5723  * Context:
5724  *      The page's VM object is locked but will be unlocked and relocked.
5725  *      The page is busy and not accessible by users (not entered in any pmap).
5726  */
5727 void
5728 vm_page_decrypt(
5729         vm_page_t       page,
5730         vm_map_offset_t kernel_mapping_offset)
5731 {
5732         int                     clear_refmod = 0;
5733         kern_return_t           kr;
5734         vm_map_size_t           kernel_mapping_size;
5735         vm_offset_t             kernel_vaddr;
5736         boolean_t               page_was_referenced;
5737         union {
5738                 unsigned char   aes_iv[AES_BLOCK_SIZE];
5739                 struct {
5740                         memory_object_t         pager_object;
5741                         vm_object_offset_t      paging_offset;
5742                 } vm;
5743         } decrypt_iv;
5744
5745         assert(page->busy);
5746         assert(page->encrypted);
5747
5748         /*
5749          * Gather the "reference" status of the page.
5750          * We'll restore its value after the decryption, so that
5751          * the decryption is transparent to the rest of the system
5752          * and doesn't impact the VM's LRU logic.
5753          */
5754         page_was_referenced =
5755                 (page->reference || pmap_is_referenced(page->phys_page));
5756
5757         if (kernel_mapping_offset == 0) {
5758                 /*
5759                  * The page hasn't already been mapped in kernel space
5760                  * by the caller.  Map it now, so that we can access
5761                  * its contents and decrypt them.
5762                  */
5763                 kernel_mapping_size = PAGE_SIZE;
5764                 kr = vm_paging_map_object(&kernel_mapping_offset,
5765                                           page,
5766                                           page->object,
5767                                           page->offset,
5768                                           &kernel_mapping_size);
5769                 if (kr != KERN_SUCCESS) {
5770                         panic("vm_page_decrypt: "
5771                               "could not map page in kernel: 0x%x\n");
5772                 }
5773         } else {
5774                 kernel_mapping_size = 0;
5775         }
5776         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5777
5778         assert(swap_crypt_ctx_initialized);
5779
5780         /*
5781          * Prepare an "initial vector" for the decryption.
5782          * It has to be the same as the "initial vector" we
5783          * used to encrypt that page.
5784          */
5785         bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
5786         decrypt_iv.vm.pager_object = page->object->pager;
5787         decrypt_iv.vm.paging_offset =
5788                 page->object->paging_offset + page->offset;
5789
5790         vm_object_unlock(page->object);
5791
5792         /* encrypt the "initial vector" */
5793         aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
5794                         swap_crypt_null_iv,
5795                         1,
5796                         &decrypt_iv.aes_iv[0],
5797                         &swap_crypt_ctx.encrypt);
5798
5799         /*
5800          * Decrypt the page.
5801          */
5802         aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
5803                         &decrypt_iv.aes_iv[0],
5804                         PAGE_SIZE / AES_BLOCK_SIZE,
5805                         (unsigned char *) kernel_vaddr,
5806                         &swap_crypt_ctx.decrypt);
5807         vm_page_decrypt_counter++;
5808
5809         vm_object_lock(page->object);
5810
5811         /*
5812          * Unmap the page from the kernel's address space,
5813          * if we had to map it ourselves.  Otherwise, let
5814          * the caller undo the mapping if needed.
5815          */
5816         if (kernel_mapping_size != 0) {
5817                 vm_paging_unmap_object(page->object,
5818                                        kernel_vaddr,
5819                                        kernel_vaddr + PAGE_SIZE);
5820         }
5821
5822         /*
5823          * After decryption, the page is actually clean.
5824          * It was encrypted as part of paging, which "cleans"
5825          * the "dirty" pages.
5826          * Noone could access it after it was encrypted
5827          * and the decryption doesn't count.
5828          */
5829         page->dirty = FALSE;
5830         clear_refmod = VM_MEM_MODIFIED;
5831
5832         /* restore the "reference" bit */
5833         if (! page_was_referenced) {
5834                 page->reference = FALSE;
5835                 clear_refmod |= VM_MEM_REFERENCED;
5836         }
5837         pmap_clear_refmod(page->phys_page, clear_refmod);
5838
5839         page->encrypted = FALSE;
5840
5841         /*
5842          * We've just modified the page's contents via the data cache and part
5843          * of the new contents might still be in the cache and not yet in RAM.
5844          * Since the page is now available and might get gathered in a UPL to
5845          * be part of a DMA transfer from a driver that expects the memory to
5846          * be coherent at this point, we have to flush the data cache.
5847          */
5848         pmap_sync_page_data_phys(page->phys_page);
5849         /*
5850          * Since the page is not mapped yet, some code might assume that it
5851          * doesn't need to invalidate the instruction cache when writing to
5852          * that page.  That code relies on "no_isync" being set, so that the
5853          * caches get syncrhonized when the page is first mapped.  So we need
5854          * to set "no_isync" here too, despite the fact that we just
5855          * synchronized the caches above...
5856          */
5857         page->no_isync = TRUE;
5858 }
5859
5860 unsigned long upl_encrypt_upls = 0;
5861 unsigned long upl_encrypt_pages = 0;
5862
5863 /*
5864  * ENCRYPTED SWAP:
5865  *
5866  * upl_encrypt:
5867  *      Encrypts all the pages in the UPL, within the specified range.
5868  *
5869  */
5870 void
5871 upl_encrypt(
5872         upl_t                   upl,
5873         upl_offset_t            crypt_offset,
5874         upl_size_t              crypt_size)
5875 {
5876         upl_size_t              upl_size;
5877         upl_offset_t            upl_offset;
5878         vm_object_t             upl_object;
5879         vm_page_t               page;
5880         vm_object_t             shadow_object;
5881         vm_object_offset_t      shadow_offset;
5882         vm_object_offset_t      paging_offset;
5883         vm_object_offset_t      base_offset;
5884
5885         upl_encrypt_upls++;
5886         upl_encrypt_pages += crypt_size / PAGE_SIZE;
5887
5888         upl_lock(upl);
5889
5890         upl_object = upl->map_object;
5891         upl_offset = upl->offset;
5892         upl_size = upl->size;
5893
5894         upl_unlock(upl);
5895
5896         vm_object_lock(upl_object);
5897
5898         /*
5899          * Find the VM object that contains the actual pages.
5900          */
5901         if (upl_object->pageout) {
5902                 shadow_object = upl_object->shadow;
5903                 /*
5904                  * The offset in the shadow object is actually also
5905                  * accounted for in upl->offset.  It possibly shouldn't be
5906                  * this way, but for now don't account for it twice.
5907                  */
5908                 shadow_offset = 0;
5909                 assert(upl_object->paging_offset == 0); /* XXX ? */
5910                 vm_object_lock(shadow_object);
5911         } else {
5912                 shadow_object = upl_object;
5913                 shadow_offset = 0;
5914         }
5915
5916         paging_offset = shadow_object->paging_offset;
5917         vm_object_paging_begin(shadow_object);
5918
5919         if (shadow_object != upl_object) {
5920                 vm_object_unlock(shadow_object);
5921         }
5922         vm_object_unlock(upl_object);
5923
5924         base_offset = shadow_offset;
5925         base_offset += upl_offset;
5926         base_offset += crypt_offset;
5927         base_offset -= paging_offset;
5928         /*
5929          * Unmap the pages, so that nobody can continue accessing them while
5930          * they're encrypted.  After that point, all accesses to these pages
5931          * will cause a page fault and block while the page is being encrypted
5932          * (busy).  After the encryption completes, any access will cause a
5933          * page fault and the page gets decrypted at that time.
5934          */
5935         assert(crypt_offset + crypt_size <= upl_size);
5936         vm_object_pmap_protect(shadow_object,
5937                                base_offset,
5938                                (vm_object_size_t)crypt_size,
5939                                PMAP_NULL,
5940                                0,
5941                                VM_PROT_NONE);
5942
5943         /* XXX FBDP could the object have changed significantly here ? */
5944         vm_object_lock(shadow_object);
5945
5946         for (upl_offset = 0;
5947              upl_offset < crypt_size;
5948              upl_offset += PAGE_SIZE) {
5949                 page = vm_page_lookup(shadow_object,
5950                                       base_offset + upl_offset);
5951                 if (page == VM_PAGE_NULL) {
5952                         panic("upl_encrypt: "
5953                               "no page for (obj=%p,off=%lld+%d)!\n",
5954                               shadow_object,
5955                               base_offset,
5956                               upl_offset);
5957                 }
5958                 vm_page_encrypt(page, 0);
5959         }
5960
5961         vm_object_paging_end(shadow_object);
5962         vm_object_unlock(shadow_object);
5963 }
5964
5965 vm_size_t
5966 upl_get_internal_pagelist_offset(void)
5967 {
5968         return sizeof(struct upl);
5969 }
5970
5971 void
5972 upl_set_dirty(
5973         upl_t   upl)
5974 {
5975         upl->flags |= UPL_CLEAR_DIRTY;
5976 }
5977
5978 void
5979 upl_clear_dirty(
5980         upl_t   upl)
5981 {
5982         upl->flags &= ~UPL_CLEAR_DIRTY;
5983 }
5984
5985
5986 #ifdef MACH_BSD
5987
5988 boolean_t  upl_page_present(upl_page_info_t *upl, int index)
5989 {
5990         return(UPL_PAGE_PRESENT(upl, index));
5991 }
5992 boolean_t  upl_dirty_page(upl_page_info_t *upl, int index)
5993 {
5994         return(UPL_DIRTY_PAGE(upl, index));
5995 }
5996 boolean_t  upl_valid_page(upl_page_info_t *upl, int index)
5997 {
5998         return(UPL_VALID_PAGE(upl, index));
5999 }
6000 ppnum_t  upl_phys_page(upl_page_info_t *upl, int index)
6001 {
6002         return(UPL_PHYS_PAGE(upl, index));
6003 }
6004
6005 void
6006 vm_countdirtypages(void)
6007 {
6008         vm_page_t m;
6009         int dpages;
6010         int pgopages;
6011         int precpages;
6012
6013
6014         dpages=0;
6015         pgopages=0;
6016         precpages=0;
6017
6018         vm_page_lock_queues();
6019         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6020         do {
6021                 if (m ==(vm_page_t )0) break;
6022
6023                 if(m->dirty) dpages++;
6024                 if(m->pageout) pgopages++;
6025                 if(m->precious) precpages++;
6026
6027                 assert(m->object != kernel_object);
6028                 m = (vm_page_t) queue_next(&m->pageq);
6029                 if (m ==(vm_page_t )0) break;
6030
6031         } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
6032         vm_page_unlock_queues();
6033
6034         vm_page_lock_queues();
6035         m = (vm_page_t) queue_first(&vm_page_queue_zf);
6036         do {
6037                 if (m ==(vm_page_t )0) break;
6038
6039                 if(m->dirty) dpages++;
6040                 if(m->pageout) pgopages++;
6041                 if(m->precious) precpages++;
6042
6043                 assert(m->object != kernel_object);
6044                 m = (vm_page_t) queue_next(&m->pageq);
6045                 if (m ==(vm_page_t )0) break;
6046
6047         } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
6048         vm_page_unlock_queues();
6049
6050         printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
6051
6052         dpages=0;
6053         pgopages=0;
6054         precpages=0;
6055
6056         vm_page_lock_queues();
6057         m = (vm_page_t) queue_first(&vm_page_queue_active);
6058
6059         do {
6060                 if(m == (vm_page_t )0) break;
6061                 if(m->dirty) dpages++;
6062                 if(m->pageout) pgopages++;
6063                 if(m->precious) precpages++;
6064
6065                 assert(m->object != kernel_object);
6066                 m = (vm_page_t) queue_next(&m->pageq);
6067                 if(m == (vm_page_t )0) break;
6068
6069         } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
6070         vm_page_unlock_queues();
6071
6072         printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
6073
6074 }
6075 #endif /* MACH_BSD */
6076
6077 #ifdef UPL_DEBUG
6078 kern_return_t  upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
6079 {
6080         upl->ubc_alias1 = alias1;
6081         upl->ubc_alias2 = alias2;
6082         return KERN_SUCCESS;
6083 }
6084 int  upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
6085 {
6086         if(al)
6087                 *al = upl->ubc_alias1;
6088         if(al2)
6089                 *al2 = upl->ubc_alias2;
6090         return KERN_SUCCESS;
6091 }
6092 #endif /* UPL_DEBUG */
6093
6094
6095
6096 #if     MACH_KDB
6097 #include <ddb/db_output.h>
6098 #include <ddb/db_print.h>
6099 #include <vm/vm_print.h>
6100
6101 #define printf  kdbprintf
6102 void            db_pageout(void);
6103
6104 void
6105 db_vm(void)
6106 {
6107
6108         iprintf("VM Statistics:\n");
6109         db_indent += 2;
6110         iprintf("pages:\n");
6111         db_indent += 2;
6112         iprintf("activ %5d  inact %5d  free  %5d",
6113                 vm_page_active_count, vm_page_inactive_count,
6114                 vm_page_free_count);
6115         printf("   wire  %5d  gobbl %5d\n",
6116                vm_page_wire_count, vm_page_gobble_count);
6117         db_indent -= 2;
6118         iprintf("target:\n");
6119         db_indent += 2;
6120         iprintf("min   %5d  inact %5d  free  %5d",
6121                 vm_page_free_min, vm_page_inactive_target,
6122                 vm_page_free_target);
6123         printf("   resrv %5d\n", vm_page_free_reserved);
6124         db_indent -= 2;
6125         iprintf("pause:\n");
6126         db_pageout();
6127         db_indent -= 2;
6128 }
6129
6130 #if     MACH_COUNTERS
6131 extern int c_laundry_pages_freed;
6132 #endif  /* MACH_COUNTERS */
6133
6134 void
6135 db_pageout(void)
6136 {
6137         iprintf("Pageout Statistics:\n");
6138         db_indent += 2;
6139         iprintf("active %5d  inactv %5d\n",
6140                 vm_pageout_active, vm_pageout_inactive);
6141         iprintf("nolock %5d  avoid  %5d  busy   %5d  absent %5d\n",
6142                 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
6143                 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
6144         iprintf("used   %5d  clean  %5d  dirty  %5d\n",
6145                 vm_pageout_inactive_used, vm_pageout_inactive_clean,
6146                 vm_pageout_inactive_dirty);
6147 #if     MACH_COUNTERS
6148         iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
6149 #endif  /* MACH_COUNTERS */
6150 #if     MACH_CLUSTER_STATS
6151         iprintf("Cluster Statistics:\n");
6152         db_indent += 2;
6153         iprintf("dirtied   %5d   cleaned  %5d   collisions  %5d\n",
6154                 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
6155                 vm_pageout_cluster_collisions);
6156         iprintf("clusters  %5d   conversions  %5d\n",
6157                 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
6158         db_indent -= 2;
6159         iprintf("Target Statistics:\n");
6160         db_indent += 2;
6161         iprintf("collisions   %5d   page_dirtied  %5d   page_freed  %5d\n",
6162                 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
6163                 vm_pageout_target_page_freed);
6164         db_indent -= 2;
6165 #endif  /* MACH_CLUSTER_STATS */
6166         db_indent -= 2;
6167 }
6168
6169 #endif  /* MACH_KDB */