osfmk/vm/vm_pageout.c

   1 /*
   2  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_pageout.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      The proverbial page-out daemon.
  64  */
  65
  66 #include <stdint.h>
  67
  68 #include <debug.h>
  69 #include <mach_pagemap.h>
  70 #include <mach_cluster_stats.h>
  71
  72 #include <mach/mach_types.h>
  73 #include <mach/memory_object.h>
  74 #include <mach/memory_object_default.h>
  75 #include <mach/memory_object_control_server.h>
  76 #include <mach/mach_host_server.h>
  77 #include <mach/upl.h>
  78 #include <mach/vm_map.h>
  79 #include <mach/vm_param.h>
  80 #include <mach/vm_statistics.h>
  81 #include <mach/sdt.h>
  82
  83 #include <kern/kern_types.h>
  84 #include <kern/counters.h>
  85 #include <kern/host_statistics.h>
  86 #include <kern/machine.h>
  87 #include <kern/misc_protos.h>
  88 #include <kern/sched.h>
  89 #include <kern/thread.h>
  90 #include <kern/xpr.h>
  91 #include <kern/kalloc.h>
  92
  93 #include <machine/vm_tuning.h>
  94 #include <machine/commpage.h>
  95
  96 #include <vm/pmap.h>
  97 #include <vm/vm_compressor_pager.h>
  98 #include <vm/vm_fault.h>
  99 #include <vm/vm_map.h>
 100 #include <vm/vm_object.h>
 101 #include <vm/vm_page.h>
 102 #include <vm/vm_pageout.h>
 103 #include <vm/vm_protos.h> /* must be last */
 104 #include <vm/memory_object.h>
 105 #include <vm/vm_purgeable_internal.h>
 106 #include <vm/vm_shared_region.h>
 107 #include <vm/vm_compressor.h>
 108
 109 #if CONFIG_PHANTOM_CACHE
 110 #include <vm/vm_phantom_cache.h>
 111 #endif
 112 /*
 113  * ENCRYPTED SWAP:
 114  */
 115 #include <libkern/crypto/aes.h>
 116 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 117
 118 extern int cs_debug;
 119
 120 #if UPL_DEBUG
 121 #include <libkern/OSDebug.h>
 122 #endif
 123
 124 extern void m_drain(void);
 125
 126 #if VM_PRESSURE_EVENTS
 127 extern unsigned int memorystatus_available_pages;
 128 extern unsigned int memorystatus_available_pages_pressure;
 129 extern unsigned int memorystatus_available_pages_critical;
 130 extern unsigned int memorystatus_frozen_count;
 131 extern unsigned int memorystatus_suspended_count;
 132
 133 extern vm_pressure_level_t memorystatus_vm_pressure_level;
 134 int memorystatus_purge_on_warning = 2;
 135 int memorystatus_purge_on_urgent = 5;
 136 int memorystatus_purge_on_critical = 8;
 137
 138 void vm_pressure_response(void);
 139 boolean_t vm_pressure_thread_running = FALSE;
 140 extern void consider_vm_pressure_events(void);
 141
 142 #define MEMORYSTATUS_SUSPENDED_THRESHOLD  4
 143 #endif /* VM_PRESSURE_EVENTS */
 144
 145 boolean_t       vm_pressure_changed = FALSE;
 146
 147 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE   /* maximum iterations of the active queue to move pages to inactive */
 148 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  100
 149 #endif
 150
 151 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
 152 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
 153 #endif
 154
 155 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
 156 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
 157 #endif
 158
 159 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
 160 #define VM_PAGEOUT_INACTIVE_RELIEF 50   /* minimum number of pages to move to the inactive q */
 161 #endif
 162
 163 #ifndef VM_PAGE_LAUNDRY_MAX
 164 #define VM_PAGE_LAUNDRY_MAX     128UL   /* maximum pageouts on a given pageout queue */
 165 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
 166
 167 #ifndef VM_PAGEOUT_BURST_WAIT
 168 #define VM_PAGEOUT_BURST_WAIT   10      /* milliseconds */
 169 #endif  /* VM_PAGEOUT_BURST_WAIT */
 170
 171 #ifndef VM_PAGEOUT_EMPTY_WAIT
 172 #define VM_PAGEOUT_EMPTY_WAIT   200     /* milliseconds */
 173 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
 174
 175 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
 176 #define VM_PAGEOUT_DEADLOCK_WAIT        300     /* milliseconds */
 177 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
 178
 179 #ifndef VM_PAGEOUT_IDLE_WAIT
 180 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
 181 #endif  /* VM_PAGEOUT_IDLE_WAIT */
 182
 183 #ifndef VM_PAGEOUT_SWAP_WAIT
 184 #define VM_PAGEOUT_SWAP_WAIT    50      /* milliseconds */
 185 #endif  /* VM_PAGEOUT_SWAP_WAIT */
 186
 187 #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
 188 #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED            1000    /* maximum pages considered before we issue a pressure event */
 189 #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
 190
 191 #ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
 192 #define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS          5       /* seconds */
 193 #endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
 194
 195 unsigned int    vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
 196 unsigned int    vm_page_speculative_percentage = 5;
 197
 198 #ifndef VM_PAGE_SPECULATIVE_TARGET
 199 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
 200 #endif /* VM_PAGE_SPECULATIVE_TARGET */
 201
 202
 203 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
 204 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
 205 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
 206
 207
 208 /*
 209  *      To obtain a reasonable LRU approximation, the inactive queue
 210  *      needs to be large enough to give pages on it a chance to be
 211  *      referenced a second time.  This macro defines the fraction
 212  *      of active+inactive pages that should be inactive.
 213  *      The pageout daemon uses it to update vm_page_inactive_target.
 214  *
 215  *      If vm_page_free_count falls below vm_page_free_target and
 216  *      vm_page_inactive_count is below vm_page_inactive_target,
 217  *      then the pageout daemon starts running.
 218  */
 219
 220 #ifndef VM_PAGE_INACTIVE_TARGET
 221 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 2)
 222 #endif  /* VM_PAGE_INACTIVE_TARGET */
 223
 224 /*
 225  *      Once the pageout daemon starts running, it keeps going
 226  *      until vm_page_free_count meets or exceeds vm_page_free_target.
 227  */
 228
 229 #ifndef VM_PAGE_FREE_TARGET
 230 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
 231 #endif  /* VM_PAGE_FREE_TARGET */
 232
 233
 234 /*
 235  *      The pageout daemon always starts running once vm_page_free_count
 236  *      falls below vm_page_free_min.
 237  */
 238
 239 #ifndef VM_PAGE_FREE_MIN
 240 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 100)
 241 #endif  /* VM_PAGE_FREE_MIN */
 242
 243 #define VM_PAGE_FREE_RESERVED_LIMIT     1700
 244 #define VM_PAGE_FREE_MIN_LIMIT          3500
 245 #define VM_PAGE_FREE_TARGET_LIMIT       4000
 246
 247 /*
 248  *      When vm_page_free_count falls below vm_page_free_reserved,
 249  *      only vm-privileged threads can allocate pages.  vm-privilege
 250  *      allows the pageout daemon and default pager (and any other
 251  *      associated threads needed for default pageout) to continue
 252  *      operation by dipping into the reserved pool of pages.
 253  */
 254
 255 #ifndef VM_PAGE_FREE_RESERVED
 256 #define VM_PAGE_FREE_RESERVED(n)        \
 257         ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
 258 #endif  /* VM_PAGE_FREE_RESERVED */
 259
 260 /*
 261  *      When we dequeue pages from the inactive list, they are
 262  *      reactivated (ie, put back on the active queue) if referenced.
 263  *      However, it is possible to starve the free list if other
 264  *      processors are referencing pages faster than we can turn off
 265  *      the referenced bit.  So we limit the number of reactivations
 266  *      we will make per call of vm_pageout_scan().
 267  */
 268 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
 269 #ifndef VM_PAGE_REACTIVATE_LIMIT
 270 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
 271 #endif  /* VM_PAGE_REACTIVATE_LIMIT */
 272 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM       100
 273
 274
 275 extern boolean_t hibernate_cleaning_in_progress;
 276
 277 /*
 278  * Exported variable used to broadcast the activation of the pageout scan
 279  * Working Set uses this to throttle its use of pmap removes.  In this
 280  * way, code which runs within memory in an uncontested context does
 281  * not keep encountering soft faults.
 282  */
 283
 284 unsigned int    vm_pageout_scan_event_counter = 0;
 285
 286 /*
 287  * Forward declarations for internal routines.
 288  */
 289 struct cq {
 290         struct vm_pageout_queue *q;
 291         void                    *current_chead;
 292         char                    *scratch_buf;
 293 };
 294
 295
 296 #if VM_PRESSURE_EVENTS
 297 void vm_pressure_thread(void);
 298
 299 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
 300 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
 301
 302 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
 303 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
 304 #endif
 305 static void vm_pageout_garbage_collect(int);
 306 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
 307 static void vm_pageout_iothread_external(void);
 308 static void vm_pageout_iothread_internal(struct cq *cq);
 309 static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t);
 310
 311 extern void vm_pageout_continue(void);
 312 extern void vm_pageout_scan(void);
 313
 314 static thread_t vm_pageout_external_iothread = THREAD_NULL;
 315 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
 316
 317 unsigned int vm_pageout_reserved_internal = 0;
 318 unsigned int vm_pageout_reserved_really = 0;
 319
 320 unsigned int vm_pageout_swap_wait = 0;
 321 unsigned int vm_pageout_idle_wait = 0;          /* milliseconds */
 322 unsigned int vm_pageout_empty_wait = 0;         /* milliseconds */
 323 unsigned int vm_pageout_burst_wait = 0;         /* milliseconds */
 324 unsigned int vm_pageout_deadlock_wait = 0;      /* milliseconds */
 325 unsigned int vm_pageout_deadlock_relief = 0;
 326 unsigned int vm_pageout_inactive_relief = 0;
 327 unsigned int vm_pageout_burst_active_throttle = 0;
 328 unsigned int vm_pageout_burst_inactive_throttle = 0;
 329
 330 int     vm_upl_wait_for_pages = 0;
 331
 332
 333 /*
 334  *      These variables record the pageout daemon's actions:
 335  *      how many pages it looks at and what happens to those pages.
 336  *      No locking needed because only one thread modifies the variables.
 337  */
 338
 339 unsigned int vm_pageout_active = 0;             /* debugging */
 340 unsigned int vm_pageout_active_busy = 0;        /* debugging */
 341 unsigned int vm_pageout_inactive = 0;           /* debugging */
 342 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
 343 unsigned int vm_pageout_inactive_forced = 0;    /* debugging */
 344 unsigned int vm_pageout_inactive_nolock = 0;    /* debugging */
 345 unsigned int vm_pageout_inactive_avoid = 0;     /* debugging */
 346 unsigned int vm_pageout_inactive_busy = 0;      /* debugging */
 347 unsigned int vm_pageout_inactive_error = 0;     /* debugging */
 348 unsigned int vm_pageout_inactive_absent = 0;    /* debugging */
 349 unsigned int vm_pageout_inactive_notalive = 0;  /* debugging */
 350 unsigned int vm_pageout_inactive_used = 0;      /* debugging */
 351 unsigned int vm_pageout_cache_evicted = 0;      /* debugging */
 352 unsigned int vm_pageout_inactive_clean = 0;     /* debugging */
 353 unsigned int vm_pageout_speculative_clean = 0;  /* debugging */
 354
 355 unsigned int vm_pageout_freed_from_cleaned = 0;
 356 unsigned int vm_pageout_freed_from_speculative = 0;
 357 unsigned int vm_pageout_freed_from_inactive_clean = 0;
 358
 359 unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0;
 360 unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
 361
 362 unsigned int vm_pageout_cleaned_reclaimed = 0;          /* debugging; how many cleaned pages are reclaimed by the pageout scan */
 363 unsigned int vm_pageout_cleaned_reactivated = 0;        /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
 364 unsigned int vm_pageout_cleaned_reference_reactivated = 0;
 365 unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
 366 unsigned int vm_pageout_cleaned_fault_reactivated = 0;
 367 unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
 368 unsigned int vm_pageout_cleaned_busy = 0;
 369 unsigned int vm_pageout_cleaned_nolock = 0;
 370
 371 unsigned int vm_pageout_inactive_dirty_internal = 0;    /* debugging */
 372 unsigned int vm_pageout_inactive_dirty_external = 0;    /* debugging */
 373 unsigned int vm_pageout_inactive_deactivated = 0;       /* debugging */
 374 unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
 375 unsigned int vm_pageout_dirty_no_pager = 0;     /* debugging */
 376 unsigned int vm_pageout_purged_objects = 0;     /* debugging */
 377 unsigned int vm_stat_discard = 0;               /* debugging */
 378 unsigned int vm_stat_discard_sent = 0;          /* debugging */
 379 unsigned int vm_stat_discard_failure = 0;       /* debugging */
 380 unsigned int vm_stat_discard_throttle = 0;      /* debugging */
 381 unsigned int vm_pageout_reactivation_limit_exceeded = 0;        /* debugging */
 382 unsigned int vm_pageout_catch_ups = 0;                          /* debugging */
 383 unsigned int vm_pageout_inactive_force_reclaim = 0;     /* debugging */
 384
 385 unsigned int vm_pageout_scan_reclaimed_throttled = 0;
 386 unsigned int vm_pageout_scan_active_throttled = 0;
 387 unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
 388 unsigned int vm_pageout_scan_inactive_throttled_external = 0;
 389 unsigned int vm_pageout_scan_throttle = 0;                      /* debugging */
 390 unsigned int vm_pageout_scan_burst_throttle = 0;                /* debugging */
 391 unsigned int vm_pageout_scan_empty_throttle = 0;                /* debugging */
 392 unsigned int vm_pageout_scan_swap_throttle = 0;         /* debugging */
 393 unsigned int vm_pageout_scan_deadlock_detected = 0;             /* debugging */
 394 unsigned int vm_pageout_scan_active_throttle_success = 0;       /* debugging */
 395 unsigned int vm_pageout_scan_inactive_throttle_success = 0;     /* debugging */
 396 unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0;      /* debugging */
 397 unsigned int vm_page_speculative_count_drifts = 0;
 398 unsigned int vm_page_speculative_count_drift_max = 0;
 399
 400
 401 /*
 402  * Backing store throttle when BS is exhausted
 403  */
 404 unsigned int    vm_backing_store_low = 0;
 405
 406 unsigned int vm_pageout_out_of_line  = 0;
 407 unsigned int vm_pageout_in_place  = 0;
 408
 409 unsigned int vm_page_steal_pageout_page = 0;
 410
 411 /*
 412  * ENCRYPTED SWAP:
 413  * counters and statistics...
 414  */
 415 unsigned long vm_page_decrypt_counter = 0;
 416 unsigned long vm_page_decrypt_for_upl_counter = 0;
 417 unsigned long vm_page_encrypt_counter = 0;
 418 unsigned long vm_page_encrypt_abort_counter = 0;
 419 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
 420 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
 421
 422 struct  vm_pageout_queue vm_pageout_queue_internal;
 423 struct  vm_pageout_queue vm_pageout_queue_external;
 424
 425 unsigned int vm_page_speculative_target = 0;
 426
 427 vm_object_t     vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 428
 429 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
 430
 431 #if DEVELOPMENT || DEBUG
 432 unsigned long vm_cs_validated_resets = 0;
 433 #endif
 434
 435 int     vm_debug_events = 0;
 436
 437 #if CONFIG_MEMORYSTATUS
 438 #if !CONFIG_JETSAM
 439 extern boolean_t memorystatus_idle_exit_from_VM(void);
 440 #endif
 441 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
 442 extern void memorystatus_on_pageout_scan_end(void);
 443 #endif
 444
 445 boolean_t       vm_page_compressions_failing = FALSE;
 446
 447 /*
 448  *      Routine:        vm_backing_store_disable
 449  *      Purpose:
 450  *              Suspend non-privileged threads wishing to extend
 451  *              backing store when we are low on backing store
 452  *              (Synchronized by caller)
 453  */
 454 void
 455 vm_backing_store_disable(
 456         boolean_t       disable)
 457 {
 458         if(disable) {
 459                 vm_backing_store_low = 1;
 460         } else {
 461                 if(vm_backing_store_low) {
 462                         vm_backing_store_low = 0;
 463                         thread_wakeup((event_t) &vm_backing_store_low);
 464                 }
 465         }
 466 }
 467
 468
 469 #if MACH_CLUSTER_STATS
 470 unsigned long vm_pageout_cluster_dirtied = 0;
 471 unsigned long vm_pageout_cluster_cleaned = 0;
 472 unsigned long vm_pageout_cluster_collisions = 0;
 473 unsigned long vm_pageout_cluster_clusters = 0;
 474 unsigned long vm_pageout_cluster_conversions = 0;
 475 unsigned long vm_pageout_target_collisions = 0;
 476 unsigned long vm_pageout_target_page_dirtied = 0;
 477 unsigned long vm_pageout_target_page_freed = 0;
 478 #define CLUSTER_STAT(clause)    clause
 479 #else   /* MACH_CLUSTER_STATS */
 480 #define CLUSTER_STAT(clause)
 481 #endif  /* MACH_CLUSTER_STATS */
 482
 483 /*
 484  *      Routine:        vm_pageout_object_terminate
 485  *      Purpose:
 486  *              Destroy the pageout_object, and perform all of the
 487  *              required cleanup actions.
 488  *
 489  *      In/Out conditions:
 490  *              The object must be locked, and will be returned locked.
 491  */
 492 void
 493 vm_pageout_object_terminate(
 494         vm_object_t     object)
 495 {
 496         vm_object_t     shadow_object;
 497
 498         /*
 499          * Deal with the deallocation (last reference) of a pageout object
 500          * (used for cleaning-in-place) by dropping the paging references/
 501          * freeing pages in the original object.
 502          */
 503
 504         assert(object->pageout);
 505         shadow_object = object->shadow;
 506         vm_object_lock(shadow_object);
 507
 508         while (!queue_empty(&object->memq)) {
 509                 vm_page_t               p, m;
 510                 vm_object_offset_t      offset;
 511
 512                 p = (vm_page_t) queue_first(&object->memq);
 513
 514                 assert(p->private);
 515                 assert(p->pageout);
 516                 p->pageout = FALSE;
 517                 assert(!p->cleaning);
 518                 assert(!p->laundry);
 519
 520                 offset = p->offset;
 521                 VM_PAGE_FREE(p);
 522                 p = VM_PAGE_NULL;
 523
 524                 m = vm_page_lookup(shadow_object,
 525                         offset + object->vo_shadow_offset);
 526
 527                 if(m == VM_PAGE_NULL)
 528                         continue;
 529
 530                 assert((m->dirty) || (m->precious) ||
 531                                 (m->busy && m->cleaning));
 532
 533                 /*
 534                  * Handle the trusted pager throttle.
 535                  * Also decrement the burst throttle (if external).
 536                  */
 537                 vm_page_lock_queues();
 538                 if (m->pageout_queue)
 539                         vm_pageout_throttle_up(m);
 540
 541                 /*
 542                  * Handle the "target" page(s). These pages are to be freed if
 543                  * successfully cleaned. Target pages are always busy, and are
 544                  * wired exactly once. The initial target pages are not mapped,
 545                  * (so cannot be referenced or modified) but converted target
 546                  * pages may have been modified between the selection as an
 547                  * adjacent page and conversion to a target.
 548                  */
 549                 if (m->pageout) {
 550                         assert(m->busy);
 551                         assert(m->wire_count == 1);
 552                         m->cleaning = FALSE;
 553                         m->encrypted_cleaning = FALSE;
 554                         m->pageout = FALSE;
 555 #if MACH_CLUSTER_STATS
 556                         if (m->wanted) vm_pageout_target_collisions++;
 557 #endif
 558                         /*
 559                          * Revoke all access to the page. Since the object is
 560                          * locked, and the page is busy, this prevents the page
 561                          * from being dirtied after the pmap_disconnect() call
 562                          * returns.
 563                          *
 564                          * Since the page is left "dirty" but "not modifed", we
 565                          * can detect whether the page was redirtied during
 566                          * pageout by checking the modify state.
 567                          */
 568                         if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) {
 569                                 SET_PAGE_DIRTY(m, FALSE);
 570                         } else {
 571                                 m->dirty = FALSE;
 572                         }
 573
 574                         if (m->dirty) {
 575                                 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
 576                                 vm_page_unwire(m, TRUE);        /* reactivates */
 577                                 VM_STAT_INCR(reactivations);
 578                                 PAGE_WAKEUP_DONE(m);
 579                         } else {
 580                                 CLUSTER_STAT(vm_pageout_target_page_freed++;)
 581                                 vm_page_free(m);/* clears busy, etc. */
 582                         }
 583                         vm_page_unlock_queues();
 584                         continue;
 585                 }
 586                 /*
 587                  * Handle the "adjacent" pages. These pages were cleaned in
 588                  * place, and should be left alone.
 589                  * If prep_pin_count is nonzero, then someone is using the
 590                  * page, so make it active.
 591                  */
 592                 if (!m->active && !m->inactive && !m->throttled && !m->private) {
 593                         if (m->reference)
 594                                 vm_page_activate(m);
 595                         else
 596                                 vm_page_deactivate(m);
 597                 }
 598                 if (m->overwriting) {
 599                         /*
 600                          * the (COPY_OUT_FROM == FALSE) request_page_list case
 601                          */
 602                         if (m->busy) {
 603                                 /*
 604                                  * We do not re-set m->dirty !
 605                                  * The page was busy so no extraneous activity
 606                                  * could have occurred. COPY_INTO is a read into the
 607                                  * new pages. CLEAN_IN_PLACE does actually write
 608                                  * out the pages but handling outside of this code
 609                                  * will take care of resetting dirty. We clear the
 610                                  * modify however for the Programmed I/O case.
 611                                  */
 612                                 pmap_clear_modify(m->phys_page);
 613
 614                                 m->busy = FALSE;
 615                                 m->absent = FALSE;
 616                         } else {
 617                                 /*
 618                                  * alternate (COPY_OUT_FROM == FALSE) request_page_list case
 619                                  * Occurs when the original page was wired
 620                                  * at the time of the list request
 621                                  */
 622                                  assert(VM_PAGE_WIRED(m));
 623                                  vm_page_unwire(m, TRUE);       /* reactivates */
 624                         }
 625                         m->overwriting = FALSE;
 626                 } else {
 627                         /*
 628                          * Set the dirty state according to whether or not the page was
 629                          * modified during the pageout. Note that we purposefully do
 630                          * NOT call pmap_clear_modify since the page is still mapped.
 631                          * If the page were to be dirtied between the 2 calls, this
 632                          * this fact would be lost. This code is only necessary to
 633                          * maintain statistics, since the pmap module is always
 634                          * consulted if m->dirty is false.
 635                          */
 636 #if MACH_CLUSTER_STATS
 637                         m->dirty = pmap_is_modified(m->phys_page);
 638
 639                         if (m->dirty)   vm_pageout_cluster_dirtied++;
 640                         else            vm_pageout_cluster_cleaned++;
 641                         if (m->wanted)  vm_pageout_cluster_collisions++;
 642 #else
 643                         m->dirty = FALSE;
 644 #endif
 645                 }
 646                 if (m->encrypted_cleaning == TRUE) {
 647                         m->encrypted_cleaning = FALSE;
 648                         m->busy = FALSE;
 649                 }
 650                 m->cleaning = FALSE;
 651
 652                 /*
 653                  * Wakeup any thread waiting for the page to be un-cleaning.
 654                  */
 655                 PAGE_WAKEUP(m);
 656                 vm_page_unlock_queues();
 657         }
 658         /*
 659          * Account for the paging reference taken in vm_paging_object_allocate.
 660          */
 661         vm_object_activity_end(shadow_object);
 662         vm_object_unlock(shadow_object);
 663
 664         assert(object->ref_count == 0);
 665         assert(object->paging_in_progress == 0);
 666         assert(object->activity_in_progress == 0);
 667         assert(object->resident_page_count == 0);
 668         return;
 669 }
 670
 671 /*
 672  * Routine:     vm_pageclean_setup
 673  *
 674  * Purpose:     setup a page to be cleaned (made non-dirty), but not
 675  *              necessarily flushed from the VM page cache.
 676  *              This is accomplished by cleaning in place.
 677  *
 678  *              The page must not be busy, and new_object
 679  *              must be locked.
 680  *
 681  */
 682 void
 683 vm_pageclean_setup(
 684         vm_page_t               m,
 685         vm_page_t               new_m,
 686         vm_object_t             new_object,
 687         vm_object_offset_t      new_offset)
 688 {
 689         assert(!m->busy);
 690 #if 0
 691         assert(!m->cleaning);
 692 #endif
 693
 694         XPR(XPR_VM_PAGEOUT,
 695     "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
 696                 m->object, m->offset, m,
 697                 new_m, new_offset);
 698
 699         pmap_clear_modify(m->phys_page);
 700
 701         /*
 702          * Mark original page as cleaning in place.
 703          */
 704         m->cleaning = TRUE;
 705         SET_PAGE_DIRTY(m, FALSE);
 706         m->precious = FALSE;
 707
 708         /*
 709          * Convert the fictitious page to a private shadow of
 710          * the real page.
 711          */
 712         assert(new_m->fictitious);
 713         assert(new_m->phys_page == vm_page_fictitious_addr);
 714         new_m->fictitious = FALSE;
 715         new_m->private = TRUE;
 716         new_m->pageout = TRUE;
 717         new_m->phys_page = m->phys_page;
 718
 719         vm_page_lockspin_queues();
 720         vm_page_wire(new_m);
 721         vm_page_unlock_queues();
 722
 723         vm_page_insert(new_m, new_object, new_offset);
 724         assert(!new_m->wanted);
 725         new_m->busy = FALSE;
 726 }
 727
 728 /*
 729  *      Routine:        vm_pageout_initialize_page
 730  *      Purpose:
 731  *              Causes the specified page to be initialized in
 732  *              the appropriate memory object. This routine is used to push
 733  *              pages into a copy-object when they are modified in the
 734  *              permanent object.
 735  *
 736  *              The page is moved to a temporary object and paged out.
 737  *
 738  *      In/out conditions:
 739  *              The page in question must not be on any pageout queues.
 740  *              The object to which it belongs must be locked.
 741  *              The page must be busy, but not hold a paging reference.
 742  *
 743  *      Implementation:
 744  *              Move this page to a completely new object.
 745  */
 746 void
 747 vm_pageout_initialize_page(
 748         vm_page_t       m)
 749 {
 750         vm_object_t             object;
 751         vm_object_offset_t      paging_offset;
 752         memory_object_t         pager;
 753
 754         XPR(XPR_VM_PAGEOUT,
 755                 "vm_pageout_initialize_page, page 0x%X\n",
 756                 m, 0, 0, 0, 0);
 757         assert(m->busy);
 758
 759         /*
 760          *      Verify that we really want to clean this page
 761          */
 762         assert(!m->absent);
 763         assert(!m->error);
 764         assert(m->dirty);
 765
 766         /*
 767          *      Create a paging reference to let us play with the object.
 768          */
 769         object = m->object;
 770         paging_offset = m->offset + object->paging_offset;
 771
 772         if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
 773                 VM_PAGE_FREE(m);
 774                 panic("reservation without pageout?"); /* alan */
 775                 vm_object_unlock(object);
 776
 777                 return;
 778         }
 779
 780         /*
 781          * If there's no pager, then we can't clean the page.  This should
 782          * never happen since this should be a copy object and therefore not
 783          * an external object, so the pager should always be there.
 784          */
 785
 786         pager = object->pager;
 787
 788         if (pager == MEMORY_OBJECT_NULL) {
 789                 VM_PAGE_FREE(m);
 790                 panic("missing pager for copy object");
 791                 return;
 792         }
 793
 794         /*
 795          * set the page for future call to vm_fault_list_request
 796          */
 797         pmap_clear_modify(m->phys_page);
 798         SET_PAGE_DIRTY(m, FALSE);
 799         m->pageout = TRUE;
 800
 801         /*
 802          * keep the object from collapsing or terminating
 803          */
 804         vm_object_paging_begin(object);
 805         vm_object_unlock(object);
 806
 807         /*
 808          *      Write the data to its pager.
 809          *      Note that the data is passed by naming the new object,
 810          *      not a virtual address; the pager interface has been
 811          *      manipulated to use the "internal memory" data type.
 812          *      [The object reference from its allocation is donated
 813          *      to the eventual recipient.]
 814          */
 815         memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
 816
 817         vm_object_lock(object);
 818         vm_object_paging_end(object);
 819 }
 820
 821 #if     MACH_CLUSTER_STATS
 822 #define MAXCLUSTERPAGES 16
 823 struct {
 824         unsigned long pages_in_cluster;
 825         unsigned long pages_at_higher_offsets;
 826         unsigned long pages_at_lower_offsets;
 827 } cluster_stats[MAXCLUSTERPAGES];
 828 #endif  /* MACH_CLUSTER_STATS */
 829
 830
 831 /*
 832  * vm_pageout_cluster:
 833  *
 834  * Given a page, queue it to the appropriate I/O thread,
 835  * which will page it out and attempt to clean adjacent pages
 836  * in the same operation.
 837  *
 838  * The object and queues must be locked. We will take a
 839  * paging reference to prevent deallocation or collapse when we
 840  * release the object lock back at the call site.  The I/O thread
 841  * is responsible for consuming this reference
 842  *
 843  * The page must not be on any pageout queue.
 844  */
 845
 846 void
 847 vm_pageout_cluster(vm_page_t m, boolean_t pageout)
 848 {
 849         vm_object_t     object = m->object;
 850         struct          vm_pageout_queue *q;
 851
 852
 853         XPR(XPR_VM_PAGEOUT,
 854                 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
 855                 object, m->offset, m, 0, 0);
 856
 857         VM_PAGE_CHECK(m);
 858 #if DEBUG
 859         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 860 #endif
 861         vm_object_lock_assert_exclusive(object);
 862
 863         /*
 864          * Only a certain kind of page is appreciated here.
 865          */
 866         assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
 867         assert(!m->cleaning && !m->pageout && !m->laundry);
 868 #ifndef CONFIG_FREEZE
 869         assert(!m->inactive && !m->active);
 870         assert(!m->throttled);
 871 #endif
 872
 873         /*
 874          * protect the object from collapse or termination
 875          */
 876         vm_object_activity_begin(object);
 877
 878         m->pageout = pageout;
 879
 880         if (object->internal == TRUE) {
 881                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
 882                         m->busy = TRUE;
 883
 884                 q = &vm_pageout_queue_internal;
 885         } else
 886                 q = &vm_pageout_queue_external;
 887
 888         /*
 889          * pgo_laundry count is tied to the laundry bit
 890          */
 891         m->laundry = TRUE;
 892         q->pgo_laundry++;
 893
 894         m->pageout_queue = TRUE;
 895         queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
 896
 897         if (q->pgo_idle == TRUE) {
 898                 q->pgo_idle = FALSE;
 899                 thread_wakeup((event_t) &q->pgo_pending);
 900         }
 901         VM_PAGE_CHECK(m);
 902 }
 903
 904
 905 unsigned long vm_pageout_throttle_up_count = 0;
 906
 907 /*
 908  * A page is back from laundry or we are stealing it back from
 909  * the laundering state.  See if there are some pages waiting to
 910  * go to laundry and if we can let some of them go now.
 911  *
 912  * Object and page queues must be locked.
 913  */
 914 void
 915 vm_pageout_throttle_up(
 916        vm_page_t       m)
 917 {
 918        struct vm_pageout_queue *q;
 919
 920        assert(m->object != VM_OBJECT_NULL);
 921        assert(m->object != kernel_object);
 922
 923 #if DEBUG
 924        lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 925        vm_object_lock_assert_exclusive(m->object);
 926 #endif
 927
 928        vm_pageout_throttle_up_count++;
 929
 930        if (m->object->internal == TRUE)
 931                q = &vm_pageout_queue_internal;
 932        else
 933                q = &vm_pageout_queue_external;
 934
 935        if (m->pageout_queue == TRUE) {
 936
 937                queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
 938                m->pageout_queue = FALSE;
 939
 940                m->pageq.next = NULL;
 941                m->pageq.prev = NULL;
 942
 943                vm_object_activity_end(m->object);
 944        }
 945        if (m->laundry == TRUE) {
 946
 947                m->laundry = FALSE;
 948                q->pgo_laundry--;
 949
 950                if (q->pgo_throttled == TRUE) {
 951                        q->pgo_throttled = FALSE;
 952                        thread_wakeup((event_t) &q->pgo_laundry);
 953                }
 954                if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
 955                        q->pgo_draining = FALSE;
 956                        thread_wakeup((event_t) (&q->pgo_laundry+1));
 957                }
 958         }
 959 }
 960
 961
 962 static void
 963 vm_pageout_throttle_up_batch(
 964         struct vm_pageout_queue *q,
 965         int             batch_cnt)
 966 {
 967 #if DEBUG
 968        lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 969 #endif
 970
 971        vm_pageout_throttle_up_count += batch_cnt;
 972
 973        q->pgo_laundry -= batch_cnt;
 974
 975        if (q->pgo_throttled == TRUE) {
 976                q->pgo_throttled = FALSE;
 977                thread_wakeup((event_t) &q->pgo_laundry);
 978        }
 979        if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
 980                q->pgo_draining = FALSE;
 981                thread_wakeup((event_t) (&q->pgo_laundry+1));
 982        }
 983 }
 984
 985
 986
 987 /*
 988  * VM memory pressure monitoring.
 989  *
 990  * vm_pageout_scan() keeps track of the number of pages it considers and
 991  * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
 992  *
 993  * compute_memory_pressure() is called every second from compute_averages()
 994  * and moves "vm_pageout_stat_now" forward, to start accumulating the number
 995  * of recalimed pages in a new vm_pageout_stat[] bucket.
 996  *
 997  * mach_vm_pressure_monitor() collects past statistics about memory pressure.
 998  * The caller provides the number of seconds ("nsecs") worth of statistics
 999  * it wants, up to 30 seconds.
1000  * It computes the number of pages reclaimed in the past "nsecs" seconds and
1001  * also returns the number of pages the system still needs to reclaim at this
1002  * moment in time.
1003  */
1004 #define VM_PAGEOUT_STAT_SIZE    31
1005 struct vm_pageout_stat {
1006         unsigned int considered;
1007         unsigned int reclaimed;
1008 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, };
1009 unsigned int vm_pageout_stat_now = 0;
1010 unsigned int vm_memory_pressure = 0;
1011
1012 #define VM_PAGEOUT_STAT_BEFORE(i) \
1013         (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1014 #define VM_PAGEOUT_STAT_AFTER(i) \
1015         (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1016
1017 #if VM_PAGE_BUCKETS_CHECK
1018 int vm_page_buckets_check_interval = 10; /* in seconds */
1019 #endif /* VM_PAGE_BUCKETS_CHECK */
1020
1021 /*
1022  * Called from compute_averages().
1023  */
1024 void
1025 compute_memory_pressure(
1026         __unused void *arg)
1027 {
1028         unsigned int vm_pageout_next;
1029
1030 #if VM_PAGE_BUCKETS_CHECK
1031         /* check the consistency of VM page buckets at regular interval */
1032         static int counter = 0;
1033         if ((++counter % vm_page_buckets_check_interval) == 0) {
1034                 vm_page_buckets_check();
1035         }
1036 #endif /* VM_PAGE_BUCKETS_CHECK */
1037
1038         vm_memory_pressure =
1039                 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed;
1040
1041         commpage_set_memory_pressure( vm_memory_pressure );
1042
1043         /* move "now" forward */
1044         vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
1045         vm_pageout_stats[vm_pageout_next].considered = 0;
1046         vm_pageout_stats[vm_pageout_next].reclaimed = 0;
1047         vm_pageout_stat_now = vm_pageout_next;
1048 }
1049
1050
1051 /*
1052  * IMPORTANT
1053  * mach_vm_ctl_page_free_wanted() is called indirectly, via
1054  * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1055  * it must be safe in the restricted stackshot context. Locks and/or
1056  * blocking are not allowable.
1057  */
1058 unsigned int
1059 mach_vm_ctl_page_free_wanted(void)
1060 {
1061         unsigned int page_free_target, page_free_count, page_free_wanted;
1062
1063         page_free_target = vm_page_free_target;
1064         page_free_count = vm_page_free_count;
1065         if (page_free_target > page_free_count) {
1066                 page_free_wanted = page_free_target - page_free_count;
1067         } else {
1068                 page_free_wanted = 0;
1069         }
1070
1071         return page_free_wanted;
1072 }
1073
1074
1075 /*
1076  * IMPORTANT:
1077  * mach_vm_pressure_monitor() is called when taking a stackshot, with
1078  * wait_for_pressure FALSE, so that code path must remain safe in the
1079  * restricted stackshot context. No blocking or locks are allowable.
1080  * on that code path.
1081  */
1082
1083 kern_return_t
1084 mach_vm_pressure_monitor(
1085         boolean_t       wait_for_pressure,
1086         unsigned int    nsecs_monitored,
1087         unsigned int    *pages_reclaimed_p,
1088         unsigned int    *pages_wanted_p)
1089 {
1090         wait_result_t   wr;
1091         unsigned int    vm_pageout_then, vm_pageout_now;
1092         unsigned int    pages_reclaimed;
1093
1094         /*
1095          * We don't take the vm_page_queue_lock here because we don't want
1096          * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1097          * thread when it's trying to reclaim memory.  We don't need fully
1098          * accurate monitoring anyway...
1099          */
1100
1101         if (wait_for_pressure) {
1102                 /* wait until there's memory pressure */
1103                 while (vm_page_free_count >= vm_page_free_target) {
1104                         wr = assert_wait((event_t) &vm_page_free_wanted,
1105                                          THREAD_INTERRUPTIBLE);
1106                         if (wr == THREAD_WAITING) {
1107                                 wr = thread_block(THREAD_CONTINUE_NULL);
1108                         }
1109                         if (wr == THREAD_INTERRUPTED) {
1110                                 return KERN_ABORTED;
1111                         }
1112                         if (wr == THREAD_AWAKENED) {
1113                                 /*
1114                                  * The memory pressure might have already
1115                                  * been relieved but let's not block again
1116                                  * and let's report that there was memory
1117                                  * pressure at some point.
1118                                  */
1119                                 break;
1120                         }
1121                 }
1122         }
1123
1124         /* provide the number of pages the system wants to reclaim */
1125         if (pages_wanted_p != NULL) {
1126                 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1127         }
1128
1129         if (pages_reclaimed_p == NULL) {
1130                 return KERN_SUCCESS;
1131         }
1132
1133         /* provide number of pages reclaimed in the last "nsecs_monitored" */
1134         do {
1135                 vm_pageout_now = vm_pageout_stat_now;
1136                 pages_reclaimed = 0;
1137                 for (vm_pageout_then =
1138                              VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1139                      vm_pageout_then != vm_pageout_now &&
1140                              nsecs_monitored-- != 0;
1141                      vm_pageout_then =
1142                              VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1143                         pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed;
1144                 }
1145         } while (vm_pageout_now != vm_pageout_stat_now);
1146         *pages_reclaimed_p = pages_reclaimed;
1147
1148         return KERN_SUCCESS;
1149 }
1150
1151
1152
1153 /*
1154  * function in BSD to apply I/O throttle to the pageout thread
1155  */
1156 extern void vm_pageout_io_throttle(void);
1157
1158 /*
1159  * Page States: Used below to maintain the page state
1160  * before it's removed from it's Q. This saved state
1161  * helps us do the right accounting in certain cases
1162  */
1163 #define PAGE_STATE_SPECULATIVE          1
1164 #define PAGE_STATE_ANONYMOUS            2
1165 #define PAGE_STATE_INACTIVE             3
1166 #define PAGE_STATE_INACTIVE_FIRST       4
1167 #define PAGE_STATE_CLEAN      5
1168
1169
1170 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m)                         \
1171         MACRO_BEGIN                                                     \
1172         /*                                                              \
1173          * If a "reusable" page somehow made it back into               \
1174          * the active queue, it's been re-used and is not               \
1175          * quite re-usable.                                             \
1176          * If the VM object was "all_reusable", consider it             \
1177          * as "all re-used" instead of converting it to                 \
1178          * "partially re-used", which could be expensive.               \
1179          */                                                             \
1180         if ((m)->reusable ||                                            \
1181             (m)->object->all_reusable) {                                \
1182                 vm_object_reuse_pages((m)->object,                      \
1183                                       (m)->offset,                      \
1184                                       (m)->offset + PAGE_SIZE_64,       \
1185                                       FALSE);                           \
1186         }                                                               \
1187         MACRO_END
1188
1189
1190 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT         64
1191 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX     1024
1192
1193 #define FCS_IDLE                0
1194 #define FCS_DELAYED             1
1195 #define FCS_DEADLOCK_DETECTED   2
1196
1197 struct flow_control {
1198         int             state;
1199         mach_timespec_t ts;
1200 };
1201
1202 uint32_t vm_pageout_considered_page = 0;
1203 uint32_t vm_page_filecache_min = 0;
1204
1205 #define VM_PAGE_FILECACHE_MIN   50000
1206 #define ANONS_GRABBED_LIMIT     2
1207
1208 /*
1209  *      vm_pageout_scan does the dirty work for the pageout daemon.
1210  *      It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1211  *      held and vm_page_free_wanted == 0.
1212  */
1213 void
1214 vm_pageout_scan(void)
1215 {
1216         unsigned int loop_count = 0;
1217         unsigned int inactive_burst_count = 0;
1218         unsigned int active_burst_count = 0;
1219         unsigned int reactivated_this_call;
1220         unsigned int reactivate_limit;
1221         vm_page_t   local_freeq = NULL;
1222         int         local_freed = 0;
1223         int         delayed_unlock;
1224         int         delayed_unlock_limit = 0;
1225         int         refmod_state = 0;
1226         int     vm_pageout_deadlock_target = 0;
1227         struct  vm_pageout_queue *iq;
1228         struct  vm_pageout_queue *eq;
1229         struct  vm_speculative_age_q *sq;
1230         struct  flow_control    flow_control = { 0, { 0, 0 } };
1231         boolean_t inactive_throttled = FALSE;
1232         boolean_t try_failed;
1233         mach_timespec_t ts;
1234         unsigned        int msecs = 0;
1235         vm_object_t     object;
1236         vm_object_t     last_object_tried;
1237         uint32_t        catch_up_count = 0;
1238         uint32_t        inactive_reclaim_run;
1239         boolean_t       forced_reclaim;
1240         boolean_t       exceeded_burst_throttle;
1241         boolean_t       grab_anonymous = FALSE;
1242         boolean_t       force_anonymous = FALSE;
1243         int             anons_grabbed = 0;
1244         int             page_prev_state = 0;
1245         int             cache_evict_throttle = 0;
1246         uint32_t        vm_pageout_inactive_external_forced_reactivate_limit = 0;
1247         int             force_purge = 0;
1248
1249 #if VM_PRESSURE_EVENTS
1250         vm_pressure_level_t pressure_level;
1251 #endif /* VM_PRESSURE_EVENTS */
1252
1253         VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1254                        vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1255                        vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1256
1257         flow_control.state = FCS_IDLE;
1258         iq = &vm_pageout_queue_internal;
1259         eq = &vm_pageout_queue_external;
1260         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1261
1262
1263         XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1264
1265
1266         vm_page_lock_queues();
1267         delayed_unlock = 1;     /* must be nonzero if Qs are locked, 0 if unlocked */
1268
1269         /*
1270          *      Calculate the max number of referenced pages on the inactive
1271          *      queue that we will reactivate.
1272          */
1273         reactivated_this_call = 0;
1274         reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1275                                                     vm_page_inactive_count);
1276         inactive_reclaim_run = 0;
1277
1278         vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1279
1280         /*
1281          *      We want to gradually dribble pages from the active queue
1282          *      to the inactive queue.  If we let the inactive queue get
1283          *      very small, and then suddenly dump many pages into it,
1284          *      those pages won't get a sufficient chance to be referenced
1285          *      before we start taking them from the inactive queue.
1286          *
1287          *      We must limit the rate at which we send pages to the pagers
1288          *      so that we don't tie up too many pages in the I/O queues.
1289          *      We implement a throttling mechanism using the laundry count
1290          *      to limit the number of pages outstanding to the default
1291          *      and external pagers.  We can bypass the throttles and look
1292          *      for clean pages if the pageout queues don't drain in a timely
1293          *      fashion since this may indicate that the pageout paths are
1294          *      stalled waiting for memory, which only we can provide.
1295          */
1296
1297
1298 Restart:
1299         assert(delayed_unlock!=0);
1300
1301         /*
1302          *      Recalculate vm_page_inactivate_target.
1303          */
1304         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1305                                                           vm_page_inactive_count +
1306                                                           vm_page_speculative_count);
1307
1308         vm_page_anonymous_min = vm_page_inactive_target / 20;
1309
1310
1311         /*
1312          * don't want to wake the pageout_scan thread up everytime we fall below
1313          * the targets... set a low water mark at 0.25% below the target
1314          */
1315         vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1316
1317         if (vm_page_speculative_percentage > 50)
1318                 vm_page_speculative_percentage = 50;
1319         else if (vm_page_speculative_percentage <= 0)
1320                 vm_page_speculative_percentage = 1;
1321
1322         vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1323                                                                 vm_page_inactive_count);
1324
1325         object = NULL;
1326         last_object_tried = NULL;
1327         try_failed = FALSE;
1328
1329         if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
1330                 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
1331         else
1332                 catch_up_count = 0;
1333
1334         for (;;) {
1335                 vm_page_t m;
1336
1337                 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1338
1339                 if (delayed_unlock == 0) {
1340                         vm_page_lock_queues();
1341                         delayed_unlock = 1;
1342                 }
1343                 if (vm_upl_wait_for_pages < 0)
1344                         vm_upl_wait_for_pages = 0;
1345
1346                 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1347
1348                 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1349                         delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1350
1351                 /*
1352                  * Move pages from active to inactive if we're below the target
1353                  */
1354                 /* if we are trying to make clean, we need to make sure we actually have inactive - mj */
1355                 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
1356                         goto done_moving_active_pages;
1357
1358                 if (object != NULL) {
1359                         vm_object_unlock(object);
1360                         object = NULL;
1361                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1362                 }
1363                 /*
1364                  * Don't sweep through active queue more than the throttle
1365                  * which should be kept relatively low
1366                  */
1367                 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
1368
1369                 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
1370                                vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
1371
1372                 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
1373                                vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1374                                vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1375                 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
1376
1377
1378                 while (!queue_empty(&vm_page_queue_active) && active_burst_count--) {
1379
1380                         vm_pageout_active++;
1381
1382                         m = (vm_page_t) queue_first(&vm_page_queue_active);
1383
1384                         assert(m->active && !m->inactive);
1385                         assert(!m->laundry);
1386                         assert(m->object != kernel_object);
1387                         assert(m->phys_page != vm_page_guard_addr);
1388
1389                         DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1390
1391                         /*
1392                          * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1393                          *
1394                          * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1395                          * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1396                          * new reference happens. If no futher references happen on the page after that remote TLB flushes
1397                          * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1398                          * by pageout_scan, which is just fine since the last reference would have happened quite far
1399                          * in the past (TLB caches don't hang around for very long), and of course could just as easily
1400                          * have happened before we moved the page
1401                          */
1402                         pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1403
1404                         /*
1405                          * The page might be absent or busy,
1406                          * but vm_page_deactivate can handle that.
1407                          * FALSE indicates that we don't want a H/W clear reference
1408                          */
1409                         vm_page_deactivate_internal(m, FALSE);
1410
1411                         if (delayed_unlock++ > delayed_unlock_limit) {
1412
1413                                 if (local_freeq) {
1414                                         vm_page_unlock_queues();
1415
1416                                         VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1417                                                        vm_page_free_count, local_freed, delayed_unlock_limit, 1);
1418
1419                                         vm_page_free_list(local_freeq, TRUE);
1420
1421                                         VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1422                                                        vm_page_free_count, 0, 0, 1);
1423
1424                                         local_freeq = NULL;
1425                                         local_freed = 0;
1426                                         vm_page_lock_queues();
1427                                 } else {
1428                                         lck_mtx_yield(&vm_page_queue_lock);
1429                                 }
1430
1431                                 delayed_unlock = 1;
1432
1433                                 /*
1434                                  * continue the while loop processing
1435                                  * the active queue... need to hold
1436                                  * the page queues lock
1437                                  */
1438                         }
1439                 }
1440
1441                 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
1442                                vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
1443                 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
1444
1445                 /**********************************************************************
1446                  * above this point we're playing with the active queue
1447                  * below this point we're playing with the throttling mechanisms
1448                  * and the inactive queue
1449                  **********************************************************************/
1450
1451 done_moving_active_pages:
1452
1453                 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1454                         if (object != NULL) {
1455                                 vm_object_unlock(object);
1456                                 object = NULL;
1457                         }
1458                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1459
1460                         if (local_freeq) {
1461                                 vm_page_unlock_queues();
1462
1463                                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1464                                                vm_page_free_count, local_freed, delayed_unlock_limit, 2);
1465
1466                                 vm_page_free_list(local_freeq, TRUE);
1467
1468                                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1469                                                vm_page_free_count, local_freed, 0, 2);
1470
1471                                 local_freeq = NULL;
1472                                 local_freed = 0;
1473                                 vm_page_lock_queues();
1474                         }
1475                         /*
1476                          * make sure the pageout I/O threads are running
1477                          * throttled in case there are still requests
1478                          * in the laundry... since we have met our targets
1479                          * we don't need the laundry to be cleaned in a timely
1480                          * fashion... so let's avoid interfering with foreground
1481                          * activity
1482                          */
1483                         vm_pageout_adjust_io_throttles(iq, eq, TRUE);
1484
1485                         /*
1486                          * recalculate vm_page_inactivate_target
1487                          */
1488                         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1489                                                                           vm_page_inactive_count +
1490                                                                           vm_page_speculative_count);
1491                         if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
1492                             !queue_empty(&vm_page_queue_active)) {
1493                                 /*
1494                                  * inactive target still not met... keep going
1495                                  * until we get the queues balanced...
1496                                  */
1497                                 continue;
1498                         }
1499                         lck_mtx_lock(&vm_page_queue_free_lock);
1500
1501                         if ((vm_page_free_count >= vm_page_free_target) &&
1502                             (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1503                                 /*
1504                                  * done - we have met our target *and*
1505                                  * there is no one waiting for a page.
1506                                  */
1507 return_from_scan:
1508                                 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1509
1510                                 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
1511                                                vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
1512                                 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
1513                                                vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1514                                                vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1515
1516                                 return;
1517                         }
1518                         lck_mtx_unlock(&vm_page_queue_free_lock);
1519                 }
1520
1521                 /*
1522                  * Before anything, we check if we have any ripe volatile
1523                  * objects around. If so, try to purge the first object.
1524                  * If the purge fails, fall through to reclaim a page instead.
1525                  * If the purge succeeds, go back to the top and reevalute
1526                  * the new memory situation.
1527                  */
1528
1529                 assert (available_for_purge>=0);
1530                 force_purge = 0; /* no force-purging */
1531
1532 #if VM_PRESSURE_EVENTS
1533                 pressure_level = memorystatus_vm_pressure_level;
1534
1535                 if (pressure_level > kVMPressureNormal) {
1536
1537                         if (pressure_level >= kVMPressureCritical) {
1538                                 force_purge = memorystatus_purge_on_critical;
1539                         } else if (pressure_level >= kVMPressureUrgent) {
1540                                 force_purge = memorystatus_purge_on_urgent;
1541                         } else if (pressure_level >= kVMPressureWarning) {
1542                                 force_purge = memorystatus_purge_on_warning;
1543                         }
1544                 }
1545 #endif /* VM_PRESSURE_EVENTS */
1546
1547                 if (available_for_purge || force_purge) {
1548
1549                         if (object != NULL) {
1550                                 vm_object_unlock(object);
1551                                 object = NULL;
1552                         }
1553
1554                         memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
1555
1556                         VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
1557                         if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
1558
1559                                 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
1560                                 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
1561                                 continue;
1562                         }
1563                         VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
1564                         memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
1565                 }
1566
1567                 if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
1568                         /*
1569                          * try to pull pages from the aging bins...
1570                          * see vm_page.h for an explanation of how
1571                          * this mechanism works
1572                          */
1573                         struct vm_speculative_age_q     *aq;
1574                         mach_timespec_t ts_fully_aged;
1575                         boolean_t       can_steal = FALSE;
1576                         int num_scanned_queues;
1577
1578                         aq = &vm_page_queue_speculative[speculative_steal_index];
1579
1580                         num_scanned_queues = 0;
1581                         while (queue_empty(&aq->age_q) &&
1582                                num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
1583
1584                                 speculative_steal_index++;
1585
1586                                 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
1587                                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
1588
1589                                 aq = &vm_page_queue_speculative[speculative_steal_index];
1590                         }
1591
1592                         if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
1593                                 /*
1594                                  * XXX We've scanned all the speculative
1595                                  * queues but still haven't found one
1596                                  * that is not empty, even though
1597                                  * vm_page_speculative_count is not 0.
1598                                  *
1599                                  * report the anomaly...
1600                                  */
1601                                 printf("vm_pageout_scan: "
1602                                        "all speculative queues empty "
1603                                        "but count=%d.  Re-adjusting.\n",
1604                                        vm_page_speculative_count);
1605                                 if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
1606                                         vm_page_speculative_count_drift_max = vm_page_speculative_count;
1607                                 vm_page_speculative_count_drifts++;
1608 #if 6553678
1609                                 Debugger("vm_pageout_scan: no speculative pages");
1610 #endif
1611                                 /* readjust... */
1612                                 vm_page_speculative_count = 0;
1613                                 /* ... and continue */
1614                                 continue;
1615                         }
1616
1617                         if (vm_page_speculative_count > vm_page_speculative_target)
1618                                 can_steal = TRUE;
1619                         else {
1620                                 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
1621                                 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
1622                                                       * 1000 * NSEC_PER_USEC;
1623
1624                                 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
1625
1626                                 clock_sec_t sec;
1627                                 clock_nsec_t nsec;
1628                                 clock_get_system_nanotime(&sec, &nsec);
1629                                 ts.tv_sec = (unsigned int) sec;
1630                                 ts.tv_nsec = nsec;
1631
1632                                 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
1633                                         can_steal = TRUE;
1634                         }
1635                         if (can_steal == TRUE)
1636                                 vm_page_speculate_ageit(aq);
1637                 }
1638                 if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
1639                         int     pages_evicted;
1640
1641                         if (object != NULL) {
1642                                 vm_object_unlock(object);
1643                                 object = NULL;
1644                         }
1645                         pages_evicted = vm_object_cache_evict(100, 10);
1646
1647                         if (pages_evicted) {
1648
1649                                 vm_pageout_cache_evicted += pages_evicted;
1650
1651                                 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
1652                                                vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
1653                                 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
1654
1655                                 /*
1656                                  * we just freed up to 100 pages,
1657                                  * so go back to the top of the main loop
1658                                  * and re-evaulate the memory situation
1659                                  */
1660                                 continue;
1661                         } else
1662                                 cache_evict_throttle = 100;
1663                 }
1664                 if  (cache_evict_throttle)
1665                         cache_evict_throttle--;
1666
1667
1668                 exceeded_burst_throttle = FALSE;
1669                 /*
1670                  * Sometimes we have to pause:
1671                  *      1) No inactive pages - nothing to do.
1672                  *      2) Loop control - no acceptable pages found on the inactive queue
1673                  *         within the last vm_pageout_burst_inactive_throttle iterations
1674                  *      3) Flow control - default pageout queue is full
1675                  */
1676                 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) {
1677                         vm_pageout_scan_empty_throttle++;
1678                         msecs = vm_pageout_empty_wait;
1679                         goto vm_pageout_scan_delay;
1680
1681                 } else if (inactive_burst_count >=
1682                            MIN(vm_pageout_burst_inactive_throttle,
1683                                (vm_page_inactive_count +
1684                                 vm_page_speculative_count))) {
1685                         vm_pageout_scan_burst_throttle++;
1686                         msecs = vm_pageout_burst_wait;
1687
1688                         exceeded_burst_throttle = TRUE;
1689                         goto vm_pageout_scan_delay;
1690
1691                 } else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
1692                            VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
1693                         vm_pageout_scan_swap_throttle++;
1694                         msecs = vm_pageout_swap_wait;
1695                         goto vm_pageout_scan_delay;
1696
1697                 } else if (VM_PAGE_Q_THROTTLED(iq) &&
1698                                   VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
1699                         clock_sec_t sec;
1700                         clock_nsec_t nsec;
1701
1702                         switch (flow_control.state) {
1703
1704                         case FCS_IDLE:
1705                                 if ((vm_page_free_count + local_freed) < vm_page_free_target) {
1706
1707                                         if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) {
1708                                                 anons_grabbed = ANONS_GRABBED_LIMIT;
1709                                                 goto consider_inactive;
1710                                         }
1711                                         if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
1712                                                 continue;
1713                                 }
1714 reset_deadlock_timer:
1715                                 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1716                                 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1717                                 clock_get_system_nanotime(&sec, &nsec);
1718                                 flow_control.ts.tv_sec = (unsigned int) sec;
1719                                 flow_control.ts.tv_nsec = nsec;
1720                                 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1721
1722                                 flow_control.state = FCS_DELAYED;
1723                                 msecs = vm_pageout_deadlock_wait;
1724
1725                                 break;
1726
1727                         case FCS_DELAYED:
1728                                 clock_get_system_nanotime(&sec, &nsec);
1729                                 ts.tv_sec = (unsigned int) sec;
1730                                 ts.tv_nsec = nsec;
1731
1732                                 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1733                                         /*
1734                                          * the pageout thread for the default pager is potentially
1735                                          * deadlocked since the
1736                                          * default pager queue has been throttled for more than the
1737                                          * allowable time... we need to move some clean pages or dirty
1738                                          * pages belonging to the external pagers if they aren't throttled
1739                                          * vm_page_free_wanted represents the number of threads currently
1740                                          * blocked waiting for pages... we'll move one page for each of
1741                                          * these plus a fixed amount to break the logjam... once we're done
1742                                          * moving this number of pages, we'll re-enter the FSC_DELAYED state
1743                                          * with a new timeout target since we have no way of knowing
1744                                          * whether we've broken the deadlock except through observation
1745                                          * of the queue associated with the default pager... we need to
1746                                          * stop moving pages and allow the system to run to see what
1747                                          * state it settles into.
1748                                          */
1749                                         vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
1750                                         vm_pageout_scan_deadlock_detected++;
1751                                         flow_control.state = FCS_DEADLOCK_DETECTED;
1752                                         thread_wakeup((event_t) &vm_pageout_garbage_collect);
1753                                         goto consider_inactive;
1754                                 }
1755                                 /*
1756                                  * just resniff instead of trying
1757                                  * to compute a new delay time... we're going to be
1758                                  * awakened immediately upon a laundry completion,
1759                                  * so we won't wait any longer than necessary
1760                                  */
1761                                 msecs = vm_pageout_idle_wait;
1762                                 break;
1763
1764                         case FCS_DEADLOCK_DETECTED:
1765                                 if (vm_pageout_deadlock_target)
1766                                         goto consider_inactive;
1767                                 goto reset_deadlock_timer;
1768
1769                         }
1770 vm_pageout_scan_delay:
1771                         if (object != NULL) {
1772                                 vm_object_unlock(object);
1773                                 object = NULL;
1774                         }
1775                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1776
1777                         vm_page_unlock_queues();
1778
1779                         if (local_freeq) {
1780
1781                                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1782                                                vm_page_free_count, local_freed, delayed_unlock_limit, 3);
1783
1784                                 vm_page_free_list(local_freeq, TRUE);
1785
1786                                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1787                                                vm_page_free_count, local_freed, 0, 3);
1788
1789                                 local_freeq = NULL;
1790                                 local_freed = 0;
1791                         }
1792                         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
1793                                 vm_consider_waking_compactor_swapper();
1794
1795                         vm_page_lock_queues();
1796
1797                         if (flow_control.state == FCS_DELAYED &&
1798                             !VM_PAGE_Q_THROTTLED(iq)) {
1799                                 flow_control.state = FCS_IDLE;
1800                                 goto consider_inactive;
1801                         }
1802
1803                         if (vm_page_free_count >= vm_page_free_target) {
1804                                 /*
1805                                  * we're here because
1806                                  *  1) someone else freed up some pages while we had
1807                                  *     the queues unlocked above
1808                                  * and we've hit one of the 3 conditions that
1809                                  * cause us to pause the pageout scan thread
1810                                  *
1811                                  * since we already have enough free pages,
1812                                  * let's avoid stalling and return normally
1813                                  *
1814                                  * before we return, make sure the pageout I/O threads
1815                                  * are running throttled in case there are still requests
1816                                  * in the laundry... since we have enough free pages
1817                                  * we don't need the laundry to be cleaned in a timely
1818                                  * fashion... so let's avoid interfering with foreground
1819                                  * activity
1820                                  *
1821                                  * we don't want to hold vm_page_queue_free_lock when
1822                                  * calling vm_pageout_adjust_io_throttles (since it
1823                                  * may cause other locks to be taken), we do the intitial
1824                                  * check outside of the lock.  Once we take the lock,
1825                                  * we recheck the condition since it may have changed.
1826                                  * if it has, no problem, we will make the threads
1827                                  * non-throttled before actually blocking
1828                                  */
1829                                 vm_pageout_adjust_io_throttles(iq, eq, TRUE);
1830                         }
1831                         lck_mtx_lock(&vm_page_queue_free_lock);
1832
1833                         if (vm_page_free_count >= vm_page_free_target &&
1834                             (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1835                                 goto return_from_scan;
1836                         }
1837                         lck_mtx_unlock(&vm_page_queue_free_lock);
1838
1839                         if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
1840                                 /*
1841                                  * we're most likely about to block due to one of
1842                                  * the 3 conditions that cause vm_pageout_scan to
1843                                  * not be able to make forward progress w/r
1844                                  * to providing new pages to the free queue,
1845                                  * so unthrottle the I/O threads in case we
1846                                  * have laundry to be cleaned... it needs
1847                                  * to be completed ASAP.
1848                                  *
1849                                  * even if we don't block, we want the io threads
1850                                  * running unthrottled since the sum of free +
1851                                  * clean pages is still under our free target
1852                                  */
1853                                 vm_pageout_adjust_io_throttles(iq, eq, FALSE);
1854                         }
1855                         if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
1856                                 /*
1857                                  * if we get here we're below our free target and
1858                                  * we're stalling due to a full laundry queue or
1859                                  * we don't have any inactive pages other then
1860                                  * those in the clean queue...
1861                                  * however, we have pages on the clean queue that
1862                                  * can be moved to the free queue, so let's not
1863                                  * stall the pageout scan
1864                                  */
1865                                 flow_control.state = FCS_IDLE;
1866                                 goto consider_inactive;
1867                         }
1868                         VM_CHECK_MEMORYSTATUS;
1869
1870                         if (flow_control.state != FCS_IDLE)
1871                                 vm_pageout_scan_throttle++;
1872                         iq->pgo_throttled = TRUE;
1873
1874                         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
1875                                 vm_consider_waking_compactor_swapper();
1876
1877                         assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
1878                         counter(c_vm_pageout_scan_block++);
1879
1880                         vm_page_unlock_queues();
1881
1882                         assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1883
1884                         VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
1885                                        iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
1886                         memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
1887
1888                         thread_block(THREAD_CONTINUE_NULL);
1889
1890                         VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
1891                                        iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
1892                         memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
1893
1894                         vm_page_lock_queues();
1895                         delayed_unlock = 1;
1896
1897                         iq->pgo_throttled = FALSE;
1898
1899                         if (loop_count >= vm_page_inactive_count)
1900                                 loop_count = 0;
1901                         inactive_burst_count = 0;
1902
1903                         goto Restart;
1904                         /*NOTREACHED*/
1905                 }
1906
1907
1908                 flow_control.state = FCS_IDLE;
1909 consider_inactive:
1910                 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
1911                                                                             vm_pageout_inactive_external_forced_reactivate_limit);
1912                 loop_count++;
1913                 inactive_burst_count++;
1914                 vm_pageout_inactive++;
1915
1916
1917                 /*
1918                  * Choose a victim.
1919                  */
1920                 while (1) {
1921                         m = NULL;
1922
1923                         if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
1924                                 assert(vm_page_throttled_count == 0);
1925                                 assert(queue_empty(&vm_page_queue_throttled));
1926                         }
1927                         /*
1928                          * The most eligible pages are ones we paged in speculatively,
1929                          * but which have not yet been touched.
1930                          */
1931                         if (!queue_empty(&sq->age_q) && force_anonymous == FALSE) {
1932                                 m = (vm_page_t) queue_first(&sq->age_q);
1933
1934                                 page_prev_state = PAGE_STATE_SPECULATIVE;
1935
1936                                 break;
1937                         }
1938                         /*
1939                          * Try a clean-queue inactive page.
1940                          */
1941                         if (!queue_empty(&vm_page_queue_cleaned)) {
1942                                 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
1943
1944                                 page_prev_state = PAGE_STATE_CLEAN;
1945
1946                                 break;
1947                         }
1948
1949                         grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
1950
1951                         if (vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) {
1952                                 grab_anonymous = TRUE;
1953                                 anons_grabbed = 0;
1954                         }
1955
1956                         if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) {
1957
1958                                 if ( !queue_empty(&vm_page_queue_inactive) ) {
1959                                         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1960
1961                                         page_prev_state = PAGE_STATE_INACTIVE;
1962                                         anons_grabbed = 0;
1963
1964                                         break;
1965                                 }
1966                         }
1967                         if ( !queue_empty(&vm_page_queue_anonymous) ) {
1968                                 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
1969
1970                                 page_prev_state = PAGE_STATE_ANONYMOUS;
1971                                 anons_grabbed++;
1972
1973                                 break;
1974                         }
1975
1976                         /*
1977                          * if we've gotten here, we have no victim page.
1978                          * if making clean, free the local freed list and return.
1979                          * if making free, check to see if we've finished balancing the queues
1980                          * yet, if we haven't just continue, else panic
1981                          */
1982                         vm_page_unlock_queues();
1983
1984                         if (object != NULL) {
1985                                 vm_object_unlock(object);
1986                                 object = NULL;
1987                         }
1988                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1989
1990                         if (local_freeq) {
1991                                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1992                                                vm_page_free_count, local_freed, delayed_unlock_limit, 5);
1993
1994                                 vm_page_free_list(local_freeq, TRUE);
1995
1996                                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1997                                                vm_page_free_count, local_freed, 0, 5);
1998
1999                                 local_freeq = NULL;
2000                                 local_freed = 0;
2001                         }
2002                         vm_page_lock_queues();
2003                         delayed_unlock = 1;
2004
2005                         force_anonymous = FALSE;
2006
2007                         if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
2008                                 goto Restart;
2009
2010                         if (!queue_empty(&sq->age_q))
2011                                 goto Restart;
2012
2013                         panic("vm_pageout: no victim");
2014
2015                         /* NOTREACHED */
2016                 }
2017                 force_anonymous = FALSE;
2018
2019                 /*
2020                  * we just found this page on one of our queues...
2021                  * it can't also be on the pageout queue, so safe
2022                  * to call VM_PAGE_QUEUES_REMOVE
2023                  */
2024                 assert(!m->pageout_queue);
2025
2026                 VM_PAGE_QUEUES_REMOVE(m);
2027
2028                 assert(!m->laundry);
2029                 assert(!m->private);
2030                 assert(!m->fictitious);
2031                 assert(m->object != kernel_object);
2032                 assert(m->phys_page != vm_page_guard_addr);
2033
2034
2035                 if (page_prev_state != PAGE_STATE_SPECULATIVE)
2036                         vm_pageout_stats[vm_pageout_stat_now].considered++;
2037
2038                 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2039
2040                 /*
2041                  * check to see if we currently are working
2042                  * with the same object... if so, we've
2043                  * already got the lock
2044                  */
2045                 if (m->object != object) {
2046                         /*
2047                          * the object associated with candidate page is
2048                          * different from the one we were just working
2049                          * with... dump the lock if we still own it
2050                          */
2051                         if (object != NULL) {
2052                                 vm_object_unlock(object);
2053                                 object = NULL;
2054                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2055                         }
2056                         /*
2057                          * Try to lock object; since we've alread got the
2058                          * page queues lock, we can only 'try' for this one.
2059                          * if the 'try' fails, we need to do a mutex_pause
2060                          * to allow the owner of the object lock a chance to
2061                          * run... otherwise, we're likely to trip over this
2062                          * object in the same state as we work our way through
2063                          * the queue... clumps of pages associated with the same
2064                          * object are fairly typical on the inactive and active queues
2065                          */
2066                         if (!vm_object_lock_try_scan(m->object)) {
2067                                 vm_page_t m_want = NULL;
2068
2069                                 vm_pageout_inactive_nolock++;
2070
2071                                 if (page_prev_state == PAGE_STATE_CLEAN)
2072                                         vm_pageout_cleaned_nolock++;
2073
2074                                 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2075                                         page_prev_state = PAGE_STATE_INACTIVE_FIRST;
2076
2077                                 pmap_clear_reference(m->phys_page);
2078                                 m->reference = FALSE;
2079
2080                                 /*
2081                                  * m->object must be stable since we hold the page queues lock...
2082                                  * we can update the scan_collisions field sans the object lock
2083                                  * since it is a separate field and this is the only spot that does
2084                                  * a read-modify-write operation and it is never executed concurrently...
2085                                  * we can asynchronously set this field to 0 when creating a UPL, so it
2086                                  * is possible for the value to be a bit non-determistic, but that's ok
2087                                  * since it's only used as a hint
2088                                  */
2089                                 m->object->scan_collisions++;
2090
2091                                 if ( !queue_empty(&sq->age_q) )
2092                                         m_want = (vm_page_t) queue_first(&sq->age_q);
2093                                 else if ( !queue_empty(&vm_page_queue_cleaned))
2094                                         m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned);
2095                                 else if (anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous))
2096                                         m_want = (vm_page_t) queue_first(&vm_page_queue_inactive);
2097                                 else if ( !queue_empty(&vm_page_queue_anonymous))
2098                                         m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous);
2099
2100                                 /*
2101                                  * this is the next object we're going to be interested in
2102                                  * try to make sure its available after the mutex_yield
2103                                  * returns control
2104                                  */
2105                                 if (m_want)
2106                                         vm_pageout_scan_wants_object = m_want->object;
2107
2108                                 /*
2109                                  * force us to dump any collected free pages
2110                                  * and to pause before moving on
2111                                  */
2112                                 try_failed = TRUE;
2113
2114                                 goto requeue_page;
2115                         }
2116                         object = m->object;
2117                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2118
2119                         try_failed = FALSE;
2120                 }
2121                 if (catch_up_count)
2122                         catch_up_count--;
2123
2124                 if (m->busy) {
2125                         if (m->encrypted_cleaning) {
2126                                 /*
2127                                  * ENCRYPTED SWAP:
2128                                  * if this page has already been picked up as
2129                                  * part of a page-out cluster, it will be busy
2130                                  * because it is being encrypted (see
2131                                  * vm_object_upl_request()).  But we still
2132                                  * want to demote it from "clean-in-place"
2133                                  * (aka "adjacent") to "clean-and-free" (aka
2134                                  * "target"), so let's ignore its "busy" bit
2135                                  * here and proceed to check for "cleaning" a
2136                                  * little bit below...
2137                                  *
2138                                  * CAUTION CAUTION:
2139                                  * A "busy" page should still be left alone for
2140                                  * most purposes, so we have to be very careful
2141                                  * not to process that page too much.
2142                                  */
2143                                 assert(m->cleaning);
2144                                 goto consider_inactive_page;
2145                         }
2146
2147                         /*
2148                          *      Somebody is already playing with this page.
2149                          *      Put it back on the appropriate queue
2150                          *
2151                          */
2152                         vm_pageout_inactive_busy++;
2153
2154                         if (page_prev_state == PAGE_STATE_CLEAN)
2155                                 vm_pageout_cleaned_busy++;
2156
2157 requeue_page:
2158                         switch (page_prev_state) {
2159
2160                         case PAGE_STATE_SPECULATIVE:
2161                                 vm_page_speculate(m, FALSE);
2162                                 break;
2163
2164                         case PAGE_STATE_ANONYMOUS:
2165                         case PAGE_STATE_CLEAN:
2166                         case PAGE_STATE_INACTIVE:
2167                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2168                                 break;
2169
2170                         case PAGE_STATE_INACTIVE_FIRST:
2171                                 VM_PAGE_ENQUEUE_INACTIVE(m, TRUE);
2172                                 break;
2173                         }
2174                         goto done_with_inactivepage;
2175                 }
2176
2177
2178                 /*
2179                  *      If it's absent, in error or the object is no longer alive,
2180                  *      we can reclaim the page... in the no longer alive case,
2181                  *      there are 2 states the page can be in that preclude us
2182                  *      from reclaiming it - busy or cleaning - that we've already
2183                  *      dealt with
2184                  */
2185                 if (m->absent || m->error || !object->alive) {
2186
2187                         if (m->absent)
2188                                 vm_pageout_inactive_absent++;
2189                         else if (!object->alive)
2190                                 vm_pageout_inactive_notalive++;
2191                         else
2192                                 vm_pageout_inactive_error++;
2193 reclaim_page:
2194                         if (vm_pageout_deadlock_target) {
2195                                 vm_pageout_scan_inactive_throttle_success++;
2196                                 vm_pageout_deadlock_target--;
2197                         }
2198
2199                         DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2200
2201                         if (object->internal) {
2202                                 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2203                         } else {
2204                                 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2205                         }
2206                         assert(!m->cleaning);
2207                         assert(!m->laundry);
2208
2209                         m->busy = TRUE;
2210
2211                         /*
2212                          * remove page from object here since we're already
2213                          * behind the object lock... defer the rest of the work
2214                          * we'd normally do in vm_page_free_prepare_object
2215                          * until 'vm_page_free_list' is called
2216                          */
2217                         if (m->tabled)
2218                                 vm_page_remove(m, TRUE);
2219
2220                         assert(m->pageq.next == NULL &&
2221                                m->pageq.prev == NULL);
2222                         m->pageq.next = (queue_entry_t)local_freeq;
2223                         local_freeq = m;
2224                         local_freed++;
2225
2226                         if (page_prev_state == PAGE_STATE_SPECULATIVE)
2227                                 vm_pageout_freed_from_speculative++;
2228                         else if (page_prev_state == PAGE_STATE_CLEAN)
2229                                 vm_pageout_freed_from_cleaned++;
2230                         else
2231                                 vm_pageout_freed_from_inactive_clean++;
2232
2233                         if (page_prev_state != PAGE_STATE_SPECULATIVE)
2234                                 vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
2235
2236                         inactive_burst_count = 0;
2237                         goto done_with_inactivepage;
2238                 }
2239                 /*
2240                  * If the object is empty, the page must be reclaimed even
2241                  * if dirty or used.
2242                  * If the page belongs to a volatile object, we stick it back
2243                  * on.
2244                  */
2245                 if (object->copy == VM_OBJECT_NULL) {
2246                         if (object->purgable == VM_PURGABLE_EMPTY) {
2247                                 if (m->pmapped == TRUE) {
2248                                         /* unmap the page */
2249                                         refmod_state = pmap_disconnect(m->phys_page);
2250                                         if (refmod_state & VM_MEM_MODIFIED) {
2251                                                 SET_PAGE_DIRTY(m, FALSE);
2252                                         }
2253                                 }
2254                                 if (m->dirty || m->precious) {
2255                                         /* we saved the cost of cleaning this page ! */
2256                                         vm_page_purged_count++;
2257                                 }
2258                                 goto reclaim_page;
2259                         }
2260
2261                         if (COMPRESSED_PAGER_IS_ACTIVE) {
2262                                 /*
2263                                  * With the VM compressor, the cost of
2264                                  * reclaiming a page is much lower (no I/O),
2265                                  * so if we find a "volatile" page, it's better
2266                                  * to let it get compressed rather than letting
2267                                  * it occupy a full page until it gets purged.
2268                                  * So no need to check for "volatile" here.
2269                                  */
2270                         } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2271                                 /*
2272                                  * Avoid cleaning a "volatile" page which might
2273                                  * be purged soon.
2274                                  */
2275
2276                                 /* if it's wired, we can't put it on our queue */
2277                                 assert(!VM_PAGE_WIRED(m));
2278
2279                                 /* just stick it back on! */
2280                                 reactivated_this_call++;
2281
2282                                 if (page_prev_state == PAGE_STATE_CLEAN)
2283                                         vm_pageout_cleaned_volatile_reactivated++;
2284
2285                                 goto reactivate_page;
2286                         }
2287                 }
2288
2289 consider_inactive_page:
2290                 if (m->busy) {
2291                         /*
2292                          * CAUTION CAUTION:
2293                          * A "busy" page should always be left alone, except...
2294                          */
2295                         if (m->cleaning && m->encrypted_cleaning) {
2296                                 /*
2297                                  * ENCRYPTED_SWAP:
2298                                  * We could get here with a "busy" page
2299                                  * if it's being encrypted during a
2300                                  * "clean-in-place" operation.  We'll deal
2301                                  * with it right away by testing if it has been
2302                                  * referenced and either reactivating it or
2303                                  * promoting it from "clean-in-place" to
2304                                  * "clean-and-free".
2305                                  */
2306                         } else {
2307                                 panic("\"busy\" page considered for pageout\n");
2308                         }
2309                 }
2310
2311                 /*
2312                  *      If it's being used, reactivate.
2313                  *      (Fictitious pages are either busy or absent.)
2314                  *      First, update the reference and dirty bits
2315                  *      to make sure the page is unreferenced.
2316                  */
2317                 refmod_state = -1;
2318
2319                 if (m->reference == FALSE && m->pmapped == TRUE) {
2320                         refmod_state = pmap_get_refmod(m->phys_page);
2321
2322                         if (refmod_state & VM_MEM_REFERENCED)
2323                                 m->reference = TRUE;
2324                         if (refmod_state & VM_MEM_MODIFIED) {
2325                                 SET_PAGE_DIRTY(m, FALSE);
2326                         }
2327                 }
2328
2329                 /*
2330                  *   if (m->cleaning && !m->pageout)
2331                  *      If already cleaning this page in place and it hasn't
2332                  *      been recently referenced, just pull off the queue.
2333                  *      We can leave the page mapped, and upl_commit_range
2334                  *      will put it on the clean queue.
2335                  *
2336                  *      note: if m->encrypted_cleaning == TRUE, then
2337                  *              m->cleaning == TRUE
2338                  *      and we'll handle it here
2339                  *
2340                  *   if (m->pageout && !m->cleaning)
2341                  *      an msync INVALIDATE is in progress...
2342                  *      this page has been marked for destruction
2343                  *      after it has been cleaned,
2344                  *      but not yet gathered into a UPL
2345                  *      where 'cleaning' will be set...
2346                  *      just leave it off the paging queues
2347                  *
2348                  *   if (m->pageout && m->clenaing)
2349                  *      an msync INVALIDATE is in progress
2350                  *      and the UPL has already gathered this page...
2351                  *      just leave it off the paging queues
2352                  */
2353
2354                 /*
2355                  * page with m->pageout and still on the queues means that an
2356                  * MS_INVALIDATE is in progress on this page... leave it alone
2357                  */
2358                 if (m->pageout) {
2359                         goto done_with_inactivepage;
2360                 }
2361
2362                 /* if cleaning, reactivate if referenced.  otherwise, just pull off queue */
2363                 if (m->cleaning) {
2364                         if (m->reference == TRUE) {
2365                                 reactivated_this_call++;
2366                                 goto reactivate_page;
2367                         } else {
2368                                 goto done_with_inactivepage;
2369                         }
2370                 }
2371
2372                 if (m->reference || m->dirty) {
2373                         /* deal with a rogue "reusable" page */
2374                         VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
2375                 }
2376
2377                 if (!m->no_cache &&
2378                     (m->reference ||
2379                      (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
2380                         /*
2381                          * The page we pulled off the inactive list has
2382                          * been referenced.  It is possible for other
2383                          * processors to be touching pages faster than we
2384                          * can clear the referenced bit and traverse the
2385                          * inactive queue, so we limit the number of
2386                          * reactivations.
2387                          */
2388                         if (++reactivated_this_call >= reactivate_limit) {
2389                                 vm_pageout_reactivation_limit_exceeded++;
2390                         } else if (catch_up_count) {
2391                                 vm_pageout_catch_ups++;
2392                         } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2393                                 vm_pageout_inactive_force_reclaim++;
2394                         } else {
2395                                 uint32_t isinuse;
2396
2397                                 if (page_prev_state == PAGE_STATE_CLEAN)
2398                                         vm_pageout_cleaned_reference_reactivated++;
2399
2400 reactivate_page:
2401                                 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2402                                      vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2403                                         /*
2404                                          * no explict mappings of this object exist
2405                                          * and it's not open via the filesystem
2406                                          */
2407                                         vm_page_deactivate(m);
2408                                         vm_pageout_inactive_deactivated++;
2409                                 } else {
2410                                         /*
2411                                          * The page was/is being used, so put back on active list.
2412                                          */
2413                                         vm_page_activate(m);
2414                                         VM_STAT_INCR(reactivations);
2415                                         inactive_burst_count = 0;
2416                                 }
2417
2418                                 if (page_prev_state == PAGE_STATE_CLEAN)
2419                                         vm_pageout_cleaned_reactivated++;
2420
2421                                 vm_pageout_inactive_used++;
2422
2423                                 goto done_with_inactivepage;
2424                         }
2425                         /*
2426                          * Make sure we call pmap_get_refmod() if it
2427                          * wasn't already called just above, to update
2428                          * the dirty bit.
2429                          */
2430                         if ((refmod_state == -1) && !m->dirty && m->pmapped) {
2431                                 refmod_state = pmap_get_refmod(m->phys_page);
2432                                 if (refmod_state & VM_MEM_MODIFIED) {
2433                                         SET_PAGE_DIRTY(m, FALSE);
2434                                 }
2435                         }
2436                         forced_reclaim = TRUE;
2437                 } else {
2438                         forced_reclaim = FALSE;
2439                 }
2440
2441                 XPR(XPR_VM_PAGEOUT,
2442                 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2443                 object, m->offset, m, 0,0);
2444
2445                 /*
2446                  * we've got a candidate page to steal...
2447                  *
2448                  * m->dirty is up to date courtesy of the
2449                  * preceding check for m->reference... if
2450                  * we get here, then m->reference had to be
2451                  * FALSE (or possibly "reactivate_limit" was
2452                  * exceeded), but in either case we called
2453                  * pmap_get_refmod() and updated both
2454                  * m->reference and m->dirty
2455                  *
2456                  * if it's dirty or precious we need to
2457                  * see if the target queue is throtttled
2458                  * it if is, we need to skip over it by moving it back
2459                  * to the end of the inactive queue
2460                  */
2461
2462                 inactive_throttled = FALSE;
2463
2464                 if (m->dirty || m->precious) {
2465                         if (object->internal) {
2466                                 if (VM_PAGE_Q_THROTTLED(iq))
2467                                         inactive_throttled = TRUE;
2468                         } else if (VM_PAGE_Q_THROTTLED(eq)) {
2469                                 inactive_throttled = TRUE;
2470                         }
2471                 }
2472 throttle_inactive:
2473                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2474                     object->internal && m->dirty &&
2475                     (object->purgable == VM_PURGABLE_DENY ||
2476                      object->purgable == VM_PURGABLE_NONVOLATILE ||
2477                      object->purgable == VM_PURGABLE_VOLATILE)) {
2478                         queue_enter(&vm_page_queue_throttled, m,
2479                                     vm_page_t, pageq);
2480                         m->throttled = TRUE;
2481                         vm_page_throttled_count++;
2482
2483                         vm_pageout_scan_reclaimed_throttled++;
2484
2485                         inactive_burst_count = 0;
2486                         goto done_with_inactivepage;
2487                 }
2488                 if (inactive_throttled == TRUE) {
2489
2490                         if (object->internal == FALSE) {
2491                                 /*
2492                                  * we need to break up the following potential deadlock case...
2493                                  *  a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
2494                                  *  b) The thread doing the writing is waiting for pages while holding the truncate lock
2495                                  *  c) Most of the pages in the inactive queue belong to this file.
2496                                  *
2497                                  * we are potentially in this deadlock because...
2498                                  *  a) the external pageout queue is throttled
2499                                  *  b) we're done with the active queue and moved on to the inactive queue
2500                                  *  c) we've got a dirty external page
2501                                  *
2502                                  * since we don't know the reason for the external pageout queue being throttled we
2503                                  * must suspect that we are deadlocked, so move the current page onto the active queue
2504                                  * in an effort to cause a page from the active queue to 'age' to the inactive queue
2505                                  *
2506                                  * if we don't have jetsam configured (i.e. we have a dynamic pager), set
2507                                  * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
2508                                  * pool the next time we select a victim page... if we can make enough new free pages,
2509                                  * the deadlock will break, the external pageout queue will empty and it will no longer
2510                                  * be throttled
2511                                  *
2512                                  * if we have jestam configured, keep a count of the pages reactivated this way so
2513                                  * that we can try to find clean pages in the active/inactive queues before
2514                                  * deciding to jetsam a process
2515                                  */
2516                                 vm_pageout_scan_inactive_throttled_external++;
2517
2518                                 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2519                                 m->active = TRUE;
2520                                 vm_page_active_count++;
2521                                 vm_page_pageable_external_count++;
2522
2523                                 vm_pageout_adjust_io_throttles(iq, eq, FALSE);
2524
2525 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2526                                 vm_pageout_inactive_external_forced_reactivate_limit--;
2527
2528                                 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
2529                                         vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
2530                                         /*
2531                                          * Possible deadlock scenario so request jetsam action
2532                                          */
2533                                         assert(object);
2534                                         vm_object_unlock(object);
2535                                         object = VM_OBJECT_NULL;
2536                                         vm_page_unlock_queues();
2537
2538                                         VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
2539                                                vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
2540
2541                                         /* Kill first suitable process */
2542                                         if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) {
2543                                                 panic("vm_pageout_scan: Jetsam request failed\n");
2544                                         }
2545
2546                                         VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
2547
2548                                         vm_pageout_inactive_external_forced_jetsam_count++;
2549                                         vm_page_lock_queues();
2550                                         delayed_unlock = 1;
2551                                 }
2552 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
2553                                 force_anonymous = TRUE;
2554 #endif
2555                                 inactive_burst_count = 0;
2556                                 goto done_with_inactivepage;
2557                         } else {
2558                                 if (page_prev_state == PAGE_STATE_SPECULATIVE)
2559                                         page_prev_state = PAGE_STATE_INACTIVE;
2560
2561                                 vm_pageout_scan_inactive_throttled_internal++;
2562
2563                                 goto requeue_page;
2564                         }
2565                 }
2566
2567                 /*
2568                  * we've got a page that we can steal...
2569                  * eliminate all mappings and make sure
2570                  * we have the up-to-date modified state
2571                  *
2572                  * if we need to do a pmap_disconnect then we
2573                  * need to re-evaluate m->dirty since the pmap_disconnect
2574                  * provides the true state atomically... the
2575                  * page was still mapped up to the pmap_disconnect
2576                  * and may have been dirtied at the last microsecond
2577                  *
2578                  * Note that if 'pmapped' is FALSE then the page is not
2579                  * and has not been in any map, so there is no point calling
2580                  * pmap_disconnect().  m->dirty could have been set in anticipation
2581                  * of likely usage of the page.
2582                  */
2583                 if (m->pmapped == TRUE) {
2584
2585                         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE || object->internal == FALSE) {
2586                                 /*
2587                                  * Don't count this page as going into the compressor if any of these are true:
2588                                  * 1) We have the dynamic pager i.e. no compressed pager
2589                                  * 2) Freezer enabled device with a freezer file to hold the app data i.e. no compressed pager
2590                                  * 3) Freezer enabled device with compressed pager backend (exclusive use) i.e. most of the VM system
2591                                       (including vm_pageout_scan) has no knowledge of the compressor
2592                                  * 4) This page belongs to a file and hence will not be sent into the compressor
2593                                  */
2594
2595                                 refmod_state = pmap_disconnect_options(m->phys_page, 0, NULL);
2596                         } else {
2597                                 refmod_state = pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
2598                         }
2599
2600                         if (refmod_state & VM_MEM_MODIFIED) {
2601                                 SET_PAGE_DIRTY(m, FALSE);
2602                         }
2603                 }
2604                 /*
2605                  * reset our count of pages that have been reclaimed
2606                  * since the last page was 'stolen'
2607                  */
2608                 inactive_reclaim_run = 0;
2609
2610                 /*
2611                  *      If it's clean and not precious, we can free the page.
2612                  */
2613                 if (!m->dirty && !m->precious) {
2614
2615                         if (page_prev_state == PAGE_STATE_SPECULATIVE)
2616                                 vm_pageout_speculative_clean++;
2617                         else {
2618                                 if (page_prev_state == PAGE_STATE_ANONYMOUS)
2619                                         vm_pageout_inactive_anonymous++;
2620                                 else if (page_prev_state == PAGE_STATE_CLEAN)
2621                                         vm_pageout_cleaned_reclaimed++;
2622
2623                                 vm_pageout_inactive_clean++;
2624                         }
2625
2626                         /*
2627                          * OK, at this point we have found a page we are going to free.
2628                          */
2629 #if CONFIG_PHANTOM_CACHE
2630                         if (!object->internal)
2631                                 vm_phantom_cache_add_ghost(m);
2632 #endif
2633                         goto reclaim_page;
2634                 }
2635
2636                 /*
2637                  * The page may have been dirtied since the last check
2638                  * for a throttled target queue (which may have been skipped
2639                  * if the page was clean then).  With the dirty page
2640                  * disconnected here, we can make one final check.
2641                  */
2642                 if (object->internal) {
2643                         if (VM_PAGE_Q_THROTTLED(iq))
2644                                 inactive_throttled = TRUE;
2645                 } else if (VM_PAGE_Q_THROTTLED(eq)) {
2646                         inactive_throttled = TRUE;
2647                 }
2648
2649                 if (inactive_throttled == TRUE)
2650                         goto throttle_inactive;
2651
2652 #if VM_PRESSURE_EVENTS
2653 #if CONFIG_JETSAM
2654
2655                 /*
2656                  * If Jetsam is enabled, then the sending
2657                  * of memory pressure notifications is handled
2658                  * from the same thread that takes care of high-water
2659                  * and other jetsams i.e. the memorystatus_thread.
2660                  */
2661
2662 #else /* CONFIG_JETSAM */
2663
2664                 vm_pressure_response();
2665
2666 #endif /* CONFIG_JETSAM */
2667 #endif /* VM_PRESSURE_EVENTS */
2668
2669                 /*
2670                  * do NOT set the pageout bit!
2671                  * sure, we might need free pages, but this page is going to take time to become free
2672                  * anyway, so we may as well put it on the clean queue first and take it from there later
2673                  * if necessary.  that way, we'll ensure we don't free up too much. -mj
2674                  */
2675                 vm_pageout_cluster(m, FALSE);
2676
2677                 if (page_prev_state == PAGE_STATE_ANONYMOUS)
2678                         vm_pageout_inactive_anonymous++;
2679                 if (object->internal)
2680                         vm_pageout_inactive_dirty_internal++;
2681                 else
2682                         vm_pageout_inactive_dirty_external++;
2683
2684
2685 done_with_inactivepage:
2686
2687                 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) {
2688                         boolean_t       need_delay = TRUE;
2689
2690                         if (object != NULL) {
2691                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2692                                 vm_object_unlock(object);
2693                                 object = NULL;
2694                         }
2695                         vm_page_unlock_queues();
2696
2697                         if (local_freeq) {
2698
2699                                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2700                                                vm_page_free_count, local_freed, delayed_unlock_limit, 4);
2701
2702                                 vm_page_free_list(local_freeq, TRUE);
2703
2704                                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2705                                                vm_page_free_count, local_freed, 0, 4);
2706
2707                                 local_freeq = NULL;
2708                                 local_freed = 0;
2709                                 need_delay = FALSE;
2710                         }
2711                         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2712                                 vm_consider_waking_compactor_swapper();
2713                                 need_delay = FALSE;
2714                         }
2715                         vm_page_lock_queues();
2716
2717                         if (need_delay == TRUE)
2718                                 lck_mtx_yield(&vm_page_queue_lock);
2719
2720                         delayed_unlock = 1;
2721                 }
2722                 vm_pageout_considered_page++;
2723
2724                 /*
2725                  * back to top of pageout scan loop
2726                  */
2727         }
2728 }
2729
2730
2731 int vm_page_free_count_init;
2732
2733 void
2734 vm_page_free_reserve(
2735         int pages)
2736 {
2737         int             free_after_reserve;
2738
2739         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2740
2741                 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
2742                         vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
2743                 else
2744                         vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
2745
2746         } else {
2747                 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
2748                         vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
2749                 else
2750                         vm_page_free_reserved += pages;
2751         }
2752         free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
2753
2754         vm_page_free_min = vm_page_free_reserved +
2755                 VM_PAGE_FREE_MIN(free_after_reserve);
2756
2757         if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
2758                 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
2759
2760         vm_page_free_target = vm_page_free_reserved +
2761                 VM_PAGE_FREE_TARGET(free_after_reserve);
2762
2763         if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
2764                 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
2765
2766         if (vm_page_free_target < vm_page_free_min + 5)
2767                 vm_page_free_target = vm_page_free_min + 5;
2768
2769         vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3);
2770         vm_page_creation_throttle = vm_page_free_target * 3;
2771 }
2772
2773 /*
2774  *      vm_pageout is the high level pageout daemon.
2775  */
2776
2777 void
2778 vm_pageout_continue(void)
2779 {
2780         DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
2781         vm_pageout_scan_event_counter++;
2782
2783         vm_pageout_scan();
2784         /*
2785          * we hold both the vm_page_queue_free_lock
2786          * and the vm_page_queues_lock at this point
2787          */
2788         assert(vm_page_free_wanted == 0);
2789         assert(vm_page_free_wanted_privileged == 0);
2790         assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
2791
2792         lck_mtx_unlock(&vm_page_queue_free_lock);
2793         vm_page_unlock_queues();
2794
2795         counter(c_vm_pageout_block++);
2796         thread_block((thread_continue_t)vm_pageout_continue);
2797         /*NOTREACHED*/
2798 }
2799
2800
2801 #ifdef FAKE_DEADLOCK
2802
2803 #define FAKE_COUNT      5000
2804
2805 int internal_count = 0;
2806 int fake_deadlock = 0;
2807
2808 #endif
2809
2810 static void
2811 vm_pageout_iothread_continue(struct vm_pageout_queue *q)
2812 {
2813         vm_page_t       m = NULL;
2814         vm_object_t     object;
2815         vm_object_offset_t offset;
2816         memory_object_t pager;
2817         thread_t        self = current_thread();
2818
2819         if ((vm_pageout_internal_iothread != THREAD_NULL)
2820             && (self == vm_pageout_external_iothread )
2821             && (self->options & TH_OPT_VMPRIV))
2822                 self->options &= ~TH_OPT_VMPRIV;
2823
2824         vm_page_lockspin_queues();
2825
2826         while ( !queue_empty(&q->pgo_pending) ) {
2827
2828                    q->pgo_busy = TRUE;
2829                    queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
2830                    if (m->object->object_slid) {
2831                            panic("slid page %p not allowed on this path\n", m);
2832                    }
2833                    VM_PAGE_CHECK(m);
2834                    m->pageout_queue = FALSE;
2835                    m->pageq.next = NULL;
2836                    m->pageq.prev = NULL;
2837
2838                    /*
2839                     * grab a snapshot of the object and offset this
2840                     * page is tabled in so that we can relookup this
2841                     * page after we've taken the object lock - these
2842                     * fields are stable while we hold the page queues lock
2843                     * but as soon as we drop it, there is nothing to keep
2844                     * this page in this object... we hold an activity_in_progress
2845                     * on this object which will keep it from terminating
2846                     */
2847                    object = m->object;
2848                    offset = m->offset;
2849
2850                    vm_page_unlock_queues();
2851
2852 #ifdef FAKE_DEADLOCK
2853                    if (q == &vm_pageout_queue_internal) {
2854                            vm_offset_t addr;
2855                            int  pg_count;
2856
2857                            internal_count++;
2858
2859                            if ((internal_count == FAKE_COUNT)) {
2860
2861                                    pg_count = vm_page_free_count + vm_page_free_reserved;
2862
2863                                    if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
2864                                            kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
2865                                    }
2866                                    internal_count = 0;
2867                                    fake_deadlock++;
2868                            }
2869                    }
2870 #endif
2871                    vm_object_lock(object);
2872
2873                    m = vm_page_lookup(object, offset);
2874
2875                    if (m == NULL ||
2876                        m->busy || m->cleaning || m->pageout_queue || !m->laundry) {
2877                            /*
2878                             * it's either the same page that someone else has
2879                             * started cleaning (or it's finished cleaning or
2880                             * been put back on the pageout queue), or
2881                             * the page has been freed or we have found a
2882                             * new page at this offset... in all of these cases
2883                             * we merely need to release the activity_in_progress
2884                             * we took when we put the page on the pageout queue
2885                             */
2886                            vm_object_activity_end(object);
2887                            vm_object_unlock(object);
2888
2889                            vm_page_lockspin_queues();
2890                            continue;
2891                    }
2892                    if (!object->pager_initialized) {
2893
2894                            /*
2895                             *   If there is no memory object for the page, create
2896                             *   one and hand it to the default pager.
2897                             */
2898
2899                            if (!object->pager_initialized)
2900                                    vm_object_collapse(object,
2901                                                       (vm_object_offset_t) 0,
2902                                                       TRUE);
2903                            if (!object->pager_initialized)
2904                                    vm_object_pager_create(object);
2905                            if (!object->pager_initialized) {
2906                                    /*
2907                                     *   Still no pager for the object.
2908                                     *   Reactivate the page.
2909                                     *
2910                                     *   Should only happen if there is no
2911                                     *   default pager.
2912                                     */
2913                                    m->pageout = FALSE;
2914
2915                                    vm_page_lockspin_queues();
2916
2917                                    vm_pageout_throttle_up(m);
2918                                    vm_page_activate(m);
2919                                    vm_pageout_dirty_no_pager++;
2920
2921                                    vm_page_unlock_queues();
2922
2923                                    /*
2924                                     *   And we are done with it.
2925                                     */
2926                                    vm_object_activity_end(object);
2927                                    vm_object_unlock(object);
2928
2929                                    vm_page_lockspin_queues();
2930                                    continue;
2931                            }
2932                    }
2933                    pager = object->pager;
2934
2935                    if (pager == MEMORY_OBJECT_NULL) {
2936                            /*
2937                             * This pager has been destroyed by either
2938                             * memory_object_destroy or vm_object_destroy, and
2939                             * so there is nowhere for the page to go.
2940                             */
2941                            if (m->pageout) {
2942                                    /*
2943                                     * Just free the page... VM_PAGE_FREE takes
2944                                     * care of cleaning up all the state...
2945                                     * including doing the vm_pageout_throttle_up
2946                                     */
2947                                    VM_PAGE_FREE(m);
2948                            } else {
2949                                    vm_page_lockspin_queues();
2950
2951                                    vm_pageout_throttle_up(m);
2952                                    vm_page_activate(m);
2953
2954                                    vm_page_unlock_queues();
2955
2956                                    /*
2957                                     *   And we are done with it.
2958                                     */
2959                            }
2960                            vm_object_activity_end(object);
2961                            vm_object_unlock(object);
2962
2963                            vm_page_lockspin_queues();
2964                            continue;
2965                    }
2966 #if 0
2967                    /*
2968                     * we don't hold the page queue lock
2969                     * so this check isn't safe to make
2970                     */
2971                    VM_PAGE_CHECK(m);
2972 #endif
2973                    /*
2974                     * give back the activity_in_progress reference we
2975                     * took when we queued up this page and replace it
2976                     * it with a paging_in_progress reference that will
2977                     * also hold the paging offset from changing and
2978                     * prevent the object from terminating
2979                     */
2980                    vm_object_activity_end(object);
2981                    vm_object_paging_begin(object);
2982                    vm_object_unlock(object);
2983
2984                    /*
2985                     * Send the data to the pager.
2986                     * any pageout clustering happens there
2987                     */
2988                    memory_object_data_return(pager,
2989                                              m->offset + object->paging_offset,
2990                                              PAGE_SIZE,
2991                                              NULL,
2992                                              NULL,
2993                                              FALSE,
2994                                              FALSE,
2995                                              0);
2996
2997                    vm_object_lock(object);
2998                    vm_object_paging_end(object);
2999                    vm_object_unlock(object);
3000
3001                    vm_pageout_io_throttle();
3002
3003                    vm_page_lockspin_queues();
3004         }
3005         q->pgo_busy = FALSE;
3006         q->pgo_idle = TRUE;
3007
3008         assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3009         vm_page_unlock_queues();
3010
3011         thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) q);
3012         /*NOTREACHED*/
3013 }
3014
3015
3016 static void
3017 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3018 {
3019         vm_page_t       m = NULL;
3020         vm_object_t     object;
3021         vm_object_offset_t offset;
3022         memory_object_t pager;
3023
3024
3025         if (vm_pageout_internal_iothread != THREAD_NULL)
3026                 current_thread()->options &= ~TH_OPT_VMPRIV;
3027
3028         vm_page_lockspin_queues();
3029
3030         while ( !queue_empty(&q->pgo_pending) ) {
3031
3032                    q->pgo_busy = TRUE;
3033                    queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3034                    if (m->object->object_slid) {
3035                            panic("slid page %p not allowed on this path\n", m);
3036                    }
3037                    VM_PAGE_CHECK(m);
3038                    m->pageout_queue = FALSE;
3039                    m->pageq.next = NULL;
3040                    m->pageq.prev = NULL;
3041
3042                    /*
3043                     * grab a snapshot of the object and offset this
3044                     * page is tabled in so that we can relookup this
3045                     * page after we've taken the object lock - these
3046                     * fields are stable while we hold the page queues lock
3047                     * but as soon as we drop it, there is nothing to keep
3048                     * this page in this object... we hold an activity_in_progress
3049                     * on this object which will keep it from terminating
3050                     */
3051                    object = m->object;
3052                    offset = m->offset;
3053
3054                    vm_page_unlock_queues();
3055
3056                    vm_object_lock(object);
3057
3058                    m = vm_page_lookup(object, offset);
3059
3060                    if (m == NULL ||
3061                        m->busy || m->cleaning || m->pageout_queue || !m->laundry) {
3062                            /*
3063                             * it's either the same page that someone else has
3064                             * started cleaning (or it's finished cleaning or
3065                             * been put back on the pageout queue), or
3066                             * the page has been freed or we have found a
3067                             * new page at this offset... in all of these cases
3068                             * we merely need to release the activity_in_progress
3069                             * we took when we put the page on the pageout queue
3070                             */
3071                            vm_object_activity_end(object);
3072                            vm_object_unlock(object);
3073
3074                            vm_page_lockspin_queues();
3075                            continue;
3076                    }
3077                    pager = object->pager;
3078
3079                    if (pager == MEMORY_OBJECT_NULL) {
3080                            /*
3081                             * This pager has been destroyed by either
3082                             * memory_object_destroy or vm_object_destroy, and
3083                             * so there is nowhere for the page to go.
3084                             */
3085                            if (m->pageout) {
3086                                    /*
3087                                     * Just free the page... VM_PAGE_FREE takes
3088                                     * care of cleaning up all the state...
3089                                     * including doing the vm_pageout_throttle_up
3090                                     */
3091                                    VM_PAGE_FREE(m);
3092                            } else {
3093                                    vm_page_lockspin_queues();
3094
3095                                    vm_pageout_throttle_up(m);
3096                                    vm_page_activate(m);
3097
3098                                    vm_page_unlock_queues();
3099
3100                                    /*
3101                                     *   And we are done with it.
3102                                     */
3103                            }
3104                            vm_object_activity_end(object);
3105                            vm_object_unlock(object);
3106
3107                            vm_page_lockspin_queues();
3108                            continue;
3109                    }
3110 #if 0
3111                    /*
3112                     * we don't hold the page queue lock
3113                     * so this check isn't safe to make
3114                     */
3115                    VM_PAGE_CHECK(m);
3116 #endif
3117                    /*
3118                     * give back the activity_in_progress reference we
3119                     * took when we queued up this page and replace it
3120                     * it with a paging_in_progress reference that will
3121                     * also hold the paging offset from changing and
3122                     * prevent the object from terminating
3123                     */
3124                    vm_object_activity_end(object);
3125                    vm_object_paging_begin(object);
3126                    vm_object_unlock(object);
3127
3128                    /*
3129                     * Send the data to the pager.
3130                     * any pageout clustering happens there
3131                     */
3132                    memory_object_data_return(pager,
3133                                              m->offset + object->paging_offset,
3134                                              PAGE_SIZE,
3135                                              NULL,
3136                                              NULL,
3137                                              FALSE,
3138                                              FALSE,
3139                                              0);
3140
3141                    vm_object_lock(object);
3142                    vm_object_paging_end(object);
3143                    vm_object_unlock(object);
3144
3145                    vm_pageout_io_throttle();
3146
3147                    vm_page_lockspin_queues();
3148         }
3149         q->pgo_busy = FALSE;
3150         q->pgo_idle = TRUE;
3151
3152         assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3153         vm_page_unlock_queues();
3154
3155         thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3156         /*NOTREACHED*/
3157 }
3158
3159
3160 uint32_t        vm_compressor_failed;
3161
3162 static void
3163 vm_pageout_iothread_internal_continue(struct cq *cq)
3164 {
3165         struct vm_pageout_queue *q;
3166         vm_page_t       m = NULL;
3167         vm_object_t     object;
3168         memory_object_t pager;
3169         boolean_t       pgo_draining;
3170         vm_page_t   local_q;
3171         int         local_cnt;
3172         vm_page_t   local_freeq = NULL;
3173         int         local_freed = 0;
3174         int         local_batch_size;
3175         kern_return_t   retval;
3176         int             compressed_count_delta;
3177
3178
3179         KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3180
3181         q = cq->q;
3182         local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 4);
3183
3184         while (TRUE) {
3185
3186                 local_cnt = 0;
3187                 local_q = NULL;
3188
3189                 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3190
3191                 vm_page_lock_queues();
3192
3193                 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3194
3195                 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3196
3197                 while ( !queue_empty(&q->pgo_pending) && local_cnt <  local_batch_size) {
3198
3199                         queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3200
3201                         VM_PAGE_CHECK(m);
3202
3203                         m->pageout_queue = FALSE;
3204                         m->pageq.prev = NULL;
3205
3206                         m->pageq.next = (queue_entry_t)local_q;
3207                         local_q = m;
3208                         local_cnt++;
3209                 }
3210                 if (local_q == NULL)
3211                         break;
3212
3213                 q->pgo_busy = TRUE;
3214
3215                 if ((pgo_draining = q->pgo_draining) == FALSE)
3216                         vm_pageout_throttle_up_batch(q, local_cnt);
3217
3218                 vm_page_unlock_queues();
3219
3220                 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3221
3222                 while (local_q) {
3223
3224                         m = local_q;
3225                         local_q = (vm_page_t)m->pageq.next;
3226                         m->pageq.next = NULL;
3227
3228                         if (m->object->object_slid) {
3229                                 panic("slid page %p not allowed on this path\n", m);
3230                         }
3231
3232                         object = m->object;
3233                         pager = object->pager;
3234
3235                         if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)  {
3236
3237                                 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3238
3239                                 vm_object_lock(object);
3240
3241                                 /*
3242                                  * If there is no memory object for the page, create
3243                                  * one and hand it to the compression pager.
3244                                  */
3245
3246                                 if (!object->pager_initialized)
3247                                         vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3248                                 if (!object->pager_initialized)
3249                                         vm_object_compressor_pager_create(object);
3250
3251                                 if (!object->pager_initialized) {
3252                                         /*
3253                                          * Still no pager for the object.
3254                                          * Reactivate the page.
3255                                          *
3256                                          * Should only happen if there is no
3257                                          * compression pager
3258                                          */
3259                                         m->pageout = FALSE;
3260                                         m->laundry = FALSE;
3261                                         PAGE_WAKEUP_DONE(m);
3262
3263                                         vm_page_lockspin_queues();
3264                                         vm_page_activate(m);
3265                                         vm_pageout_dirty_no_pager++;
3266                                         vm_page_unlock_queues();
3267
3268                                         /*
3269                                          *      And we are done with it.
3270                                          */
3271                                         vm_object_activity_end(object);
3272                                         vm_object_unlock(object);
3273
3274                                         continue;
3275                                 }
3276                                 pager = object->pager;
3277
3278                                 if (pager == MEMORY_OBJECT_NULL) {
3279                                         /*
3280                                          * This pager has been destroyed by either
3281                                          * memory_object_destroy or vm_object_destroy, and
3282                                          * so there is nowhere for the page to go.
3283                                          */
3284                                         if (m->pageout) {
3285                                                 /*
3286                                                  * Just free the page... VM_PAGE_FREE takes
3287                                                  * care of cleaning up all the state...
3288                                                  * including doing the vm_pageout_throttle_up
3289                                                  */
3290                                                 VM_PAGE_FREE(m);
3291                                         } else {
3292                                                 m->laundry = FALSE;
3293                                                 PAGE_WAKEUP_DONE(m);
3294
3295                                                 vm_page_lockspin_queues();
3296                                                 vm_page_activate(m);
3297                                                 vm_page_unlock_queues();
3298
3299                                                 /*
3300                                                  *      And we are done with it.
3301                                                  */
3302                                         }
3303                                         vm_object_activity_end(object);
3304                                         vm_object_unlock(object);
3305
3306                                         continue;
3307                                 }
3308                                 vm_object_unlock(object);
3309
3310                                 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3311                         }
3312                         while (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) {
3313                                 kern_return_t   wait_result;
3314                                 int             need_wakeup = 0;
3315
3316                                 if (local_freeq) {
3317                                         vm_page_free_list(local_freeq, TRUE);
3318
3319                                         local_freeq = NULL;
3320                                         local_freed = 0;
3321
3322                                         continue;
3323                                 }
3324                                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3325
3326                                 if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) {
3327
3328                                         if (vm_page_free_wanted_privileged++ == 0)
3329                                                 need_wakeup = 1;
3330                                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3331
3332                                         lck_mtx_unlock(&vm_page_queue_free_lock);
3333
3334                                         if (need_wakeup)
3335                                                 thread_wakeup((event_t)&vm_page_free_wanted);
3336
3337                                         if (wait_result == THREAD_WAITING)
3338                                                 thread_block(THREAD_CONTINUE_NULL);
3339                                 } else
3340                                         lck_mtx_unlock(&vm_page_queue_free_lock);
3341                         }
3342
3343                         assert(object->activity_in_progress > 0);
3344
3345                         retval = vm_compressor_pager_put(
3346                                 pager,
3347                                 m->offset + object->paging_offset,
3348                                 m->phys_page,
3349                                 &cq->current_chead,
3350                                 cq->scratch_buf,
3351                                 &compressed_count_delta);
3352
3353                         vm_object_lock(object);
3354                         assert(object->activity_in_progress > 0);
3355
3356                         assert(m->object == object);
3357
3358                         vm_compressor_pager_count(pager,
3359                                                   compressed_count_delta,
3360                                                   FALSE, /* shared_lock */
3361                                                   object);
3362
3363                         m->laundry = FALSE;
3364                         m->pageout = FALSE;
3365
3366                         if (retval == KERN_SUCCESS) {
3367                                 /*
3368                                  * If the object is purgeable, its owner's
3369                                  * purgeable ledgers will be updated in
3370                                  * vm_page_remove() but the page still
3371                                  * contributes to the owner's memory footprint,
3372                                  * so account for it as such.
3373                                  */
3374                                 if (object->purgable != VM_PURGABLE_DENY &&
3375                                     object->vo_purgeable_owner != NULL) {
3376                                         /* one more compressed purgeable page */
3377                                         vm_purgeable_compressed_update(object,
3378                                                                        +1);
3379                                 }
3380
3381                                 vm_page_compressions_failing = FALSE;
3382
3383                                 VM_STAT_INCR(compressions);
3384
3385                                 if (m->tabled)
3386                                         vm_page_remove(m, TRUE);
3387                                 vm_object_activity_end(object);
3388                                 vm_object_unlock(object);
3389
3390                                 m->pageq.next = (queue_entry_t)local_freeq;
3391                                 local_freeq = m;
3392                                 local_freed++;
3393
3394                         } else {
3395                                 PAGE_WAKEUP_DONE(m);
3396
3397                                 vm_page_lockspin_queues();
3398
3399                                 vm_page_activate(m);
3400                                 vm_compressor_failed++;
3401
3402                                 vm_page_compressions_failing = TRUE;
3403
3404                                 vm_page_unlock_queues();
3405
3406                                 vm_object_activity_end(object);
3407                                 vm_object_unlock(object);
3408                         }
3409                 }
3410                 if (local_freeq) {
3411                         vm_page_free_list(local_freeq, TRUE);
3412
3413                         local_freeq = NULL;
3414                         local_freed = 0;
3415                 }
3416                 if (pgo_draining == TRUE) {
3417                         vm_page_lockspin_queues();
3418                         vm_pageout_throttle_up_batch(q, local_cnt);
3419                         vm_page_unlock_queues();
3420                 }
3421         }
3422         KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3423
3424         /*
3425          * queue lock is held and our q is empty
3426          */
3427         q->pgo_busy = FALSE;
3428         q->pgo_idle = TRUE;
3429
3430         assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3431         vm_page_unlock_queues();
3432
3433         KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3434
3435         thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3436         /*NOTREACHED*/
3437 }
3438
3439
3440
3441 static void
3442 vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3443 {
3444         uint32_t        policy;
3445         boolean_t       set_iq = FALSE;
3446         boolean_t       set_eq = FALSE;
3447
3448         if (hibernate_cleaning_in_progress == TRUE)
3449                 req_lowpriority = FALSE;
3450
3451         if ((DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) && iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority)
3452                 set_iq = TRUE;
3453
3454         if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority)
3455                 set_eq = TRUE;
3456
3457         if (set_iq == TRUE || set_eq == TRUE) {
3458
3459                 vm_page_unlock_queues();
3460
3461                 if (req_lowpriority == TRUE) {
3462                         policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
3463                         DTRACE_VM(laundrythrottle);
3464                 } else {
3465                         policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
3466                         DTRACE_VM(laundryunthrottle);
3467                 }
3468                 if (set_iq == TRUE) {
3469                         proc_set_task_policy_thread(kernel_task, iq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3470
3471                         iq->pgo_lowpriority = req_lowpriority;
3472                 }
3473                 if (set_eq == TRUE) {
3474                         proc_set_task_policy_thread(kernel_task, eq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3475
3476                         eq->pgo_lowpriority = req_lowpriority;
3477                 }
3478                 vm_page_lock_queues();
3479         }
3480 }
3481
3482
3483 static void
3484 vm_pageout_iothread_external(void)
3485 {
3486         thread_t        self = current_thread();
3487
3488         self->options |= TH_OPT_VMPRIV;
3489
3490         DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
3491
3492         proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
3493                                     TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3494
3495         vm_page_lock_queues();
3496
3497         vm_pageout_queue_external.pgo_tid = self->thread_id;
3498         vm_pageout_queue_external.pgo_lowpriority = TRUE;
3499         vm_pageout_queue_external.pgo_inited = TRUE;
3500
3501         vm_page_unlock_queues();
3502
3503         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
3504                 vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
3505         else
3506                 vm_pageout_iothread_continue(&vm_pageout_queue_external);
3507
3508         /*NOTREACHED*/
3509 }
3510
3511
3512 static void
3513 vm_pageout_iothread_internal(struct cq *cq)
3514 {
3515         thread_t        self = current_thread();
3516
3517         self->options |= TH_OPT_VMPRIV;
3518
3519         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
3520                 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
3521
3522                 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL,
3523                                             TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3524         }
3525         vm_page_lock_queues();
3526
3527         vm_pageout_queue_internal.pgo_tid = self->thread_id;
3528         vm_pageout_queue_internal.pgo_lowpriority = TRUE;
3529         vm_pageout_queue_internal.pgo_inited = TRUE;
3530
3531         vm_page_unlock_queues();
3532
3533         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
3534                 cq->q = &vm_pageout_queue_internal;
3535                 cq->current_chead = NULL;
3536                 cq->scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
3537
3538                 vm_pageout_iothread_internal_continue(cq);
3539         } else
3540                 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
3541
3542         /*NOTREACHED*/
3543 }
3544
3545 kern_return_t
3546 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
3547 {
3548         if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
3549                 return KERN_SUCCESS;
3550         } else {
3551                 return KERN_FAILURE; /* Already set */
3552         }
3553 }
3554
3555 extern boolean_t        memorystatus_manual_testing_on;
3556 extern unsigned int     memorystatus_level;
3557
3558
3559 #if VM_PRESSURE_EVENTS
3560
3561 boolean_t vm_pressure_events_enabled = FALSE;
3562
3563 void
3564 vm_pressure_response(void)
3565 {
3566
3567         vm_pressure_level_t     old_level = kVMPressureNormal;
3568         int                     new_level = -1;
3569
3570         uint64_t                available_memory = 0;
3571
3572         if (vm_pressure_events_enabled == FALSE)
3573                 return;
3574
3575
3576         available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100);
3577
3578
3579         memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem));
3580
3581         if (memorystatus_manual_testing_on) {
3582                 return;
3583         }
3584
3585         old_level = memorystatus_vm_pressure_level;
3586
3587         switch (memorystatus_vm_pressure_level) {
3588
3589                 case kVMPressureNormal:
3590                 {
3591                         if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3592                                 new_level = kVMPressureCritical;
3593                         }  else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
3594                                 new_level = kVMPressureWarning;
3595                         }
3596                         break;
3597                 }
3598
3599                 case kVMPressureWarning:
3600                 case kVMPressureUrgent:
3601                 {
3602                         if (VM_PRESSURE_WARNING_TO_NORMAL()) {
3603                                 new_level = kVMPressureNormal;
3604                         }  else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3605                                 new_level = kVMPressureCritical;
3606                         }
3607                         break;
3608                 }
3609
3610                 case kVMPressureCritical:
3611                 {
3612                         if (VM_PRESSURE_WARNING_TO_NORMAL()) {
3613                                 new_level = kVMPressureNormal;
3614                         }  else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
3615                                 new_level = kVMPressureWarning;
3616                         }
3617                         break;
3618                 }
3619
3620                 default:
3621                         return;
3622         }
3623
3624         if (new_level != -1) {
3625                 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
3626
3627                 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) {
3628                         if (vm_pressure_thread_running == FALSE) {
3629                                 thread_wakeup(&vm_pressure_thread);
3630                         }
3631
3632                         if (old_level != new_level) {
3633                                 thread_wakeup(&vm_pressure_changed);
3634                         }
3635                 }
3636         }
3637
3638 }
3639 #endif /* VM_PRESSURE_EVENTS */
3640
3641 kern_return_t
3642 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
3643
3644 #if   !VM_PRESSURE_EVENTS
3645
3646         return KERN_FAILURE;
3647
3648 #else /* VM_PRESSURE_EVENTS */
3649
3650         kern_return_t   kr = KERN_SUCCESS;
3651
3652         if (pressure_level != NULL) {
3653
3654                 vm_pressure_level_t     old_level = memorystatus_vm_pressure_level;
3655
3656                 if (wait_for_pressure == TRUE) {
3657                         wait_result_t           wr = 0;
3658
3659                         while (old_level == *pressure_level) {
3660                                 wr = assert_wait((event_t) &vm_pressure_changed,
3661                                                  THREAD_INTERRUPTIBLE);
3662                                 if (wr == THREAD_WAITING) {
3663                                         wr = thread_block(THREAD_CONTINUE_NULL);
3664                                 }
3665                                 if (wr == THREAD_INTERRUPTED) {
3666                                         return KERN_ABORTED;
3667                                 }
3668                                 if (wr == THREAD_AWAKENED) {
3669
3670                                         old_level = memorystatus_vm_pressure_level;
3671
3672                                         if (old_level != *pressure_level) {
3673                                                 break;
3674                                         }
3675                                 }
3676                         }
3677                 }
3678
3679                 *pressure_level = old_level;
3680                 kr = KERN_SUCCESS;
3681         } else {
3682                 kr = KERN_INVALID_ARGUMENT;
3683         }
3684
3685         return kr;
3686 #endif /* VM_PRESSURE_EVENTS */
3687 }
3688
3689 #if VM_PRESSURE_EVENTS
3690 void
3691 vm_pressure_thread(void) {
3692         static boolean_t thread_initialized = FALSE;
3693
3694         if (thread_initialized == TRUE) {
3695                 vm_pressure_thread_running = TRUE;
3696                 consider_vm_pressure_events();
3697                 vm_pressure_thread_running = FALSE;
3698         }
3699
3700         thread_initialized = TRUE;
3701         assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
3702         thread_block((thread_continue_t)vm_pressure_thread);
3703 }
3704 #endif /* VM_PRESSURE_EVENTS */
3705
3706
3707 uint32_t vm_pageout_considered_page_last = 0;
3708
3709 /*
3710  * called once per-second via "compute_averages"
3711  */
3712 void
3713 compute_pageout_gc_throttle()
3714 {
3715         if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
3716
3717                 vm_pageout_considered_page_last = vm_pageout_considered_page;
3718
3719                 thread_wakeup((event_t) &vm_pageout_garbage_collect);
3720         }
3721 }
3722
3723
3724 static void
3725 vm_pageout_garbage_collect(int collect)
3726 {
3727
3728         if (collect) {
3729                 boolean_t buf_large_zfree = FALSE;
3730                 boolean_t first_try = TRUE;
3731
3732                 stack_collect();
3733
3734                 consider_machine_collect();
3735                 m_drain();
3736
3737                 do {
3738                         if (consider_buffer_cache_collect != NULL) {
3739                                 buf_large_zfree = (*consider_buffer_cache_collect)(0);
3740                         }
3741                         if (first_try == TRUE || buf_large_zfree == TRUE) {
3742                                 /*
3743                                  * consider_zone_gc should be last, because the other operations
3744                                  * might return memory to zones.
3745                                  */
3746                                 consider_zone_gc(buf_large_zfree);
3747                         }
3748                         first_try = FALSE;
3749
3750                 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
3751
3752                 consider_machine_adjust();
3753         }
3754         assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
3755
3756         thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
3757         /*NOTREACHED*/
3758 }
3759
3760
3761 void    vm_pageout_reinit_tuneables(void);
3762
3763 void
3764 vm_pageout_reinit_tuneables(void)
3765 {
3766         vm_page_filecache_min = (uint32_t) (max_mem / PAGE_SIZE) / 15;
3767
3768         if (vm_page_filecache_min < VM_PAGE_FILECACHE_MIN)
3769                 vm_page_filecache_min = VM_PAGE_FILECACHE_MIN;
3770
3771         vm_compressor_minorcompact_threshold_divisor = 18;
3772         vm_compressor_majorcompact_threshold_divisor = 22;
3773         vm_compressor_unthrottle_threshold_divisor = 32;
3774 }
3775
3776
3777 #if VM_PAGE_BUCKETS_CHECK
3778 #if VM_PAGE_FAKE_BUCKETS
3779 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
3780 #endif /* VM_PAGE_FAKE_BUCKETS */
3781 #endif /* VM_PAGE_BUCKETS_CHECK */
3782
3783 #define FBDP_TEST_COLLAPSE_COMPRESSOR 0
3784 #if FBDP_TEST_COLLAPSE_COMPRESSOR
3785 extern boolean_t vm_object_collapse_compressor_allowed;
3786 #include <IOKit/IOLib.h>
3787 #endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
3788
3789 #define FBDP_TEST_WIRE_AND_EXTRACT 0
3790 #if FBDP_TEST_WIRE_AND_EXTRACT
3791 extern ledger_template_t        task_ledger_template;
3792 #include <mach/mach_vm.h>
3793 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
3794                                     vm_offset_t offset);
3795 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
3796
3797 void
3798 vm_pageout(void)
3799 {
3800         thread_t        self = current_thread();
3801         thread_t        thread;
3802         kern_return_t   result;
3803         spl_t           s;
3804
3805         /*
3806          * Set thread privileges.
3807          */
3808         s = splsched();
3809         thread_lock(self);
3810         self->priority = BASEPRI_PREEMPT - 1;
3811         set_sched_pri(self, self->priority);
3812         thread_unlock(self);
3813
3814         if (!self->reserved_stack)
3815                 self->reserved_stack = self->kernel_stack;
3816
3817         splx(s);
3818
3819         /*
3820          *      Initialize some paging parameters.
3821          */
3822
3823         if (vm_pageout_swap_wait == 0)
3824                 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
3825
3826         if (vm_pageout_idle_wait == 0)
3827                 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
3828
3829         if (vm_pageout_burst_wait == 0)
3830                 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
3831
3832         if (vm_pageout_empty_wait == 0)
3833                 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
3834
3835         if (vm_pageout_deadlock_wait == 0)
3836                 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
3837
3838         if (vm_pageout_deadlock_relief == 0)
3839                 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
3840
3841         if (vm_pageout_inactive_relief == 0)
3842                 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
3843
3844         if (vm_pageout_burst_active_throttle == 0)
3845                 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
3846
3847         if (vm_pageout_burst_inactive_throttle == 0)
3848                 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
3849
3850 #if !CONFIG_JETSAM
3851         vm_page_filecache_min = (uint32_t) (max_mem / PAGE_SIZE) / 20;
3852         if (vm_page_filecache_min < VM_PAGE_FILECACHE_MIN)
3853                 vm_page_filecache_min = VM_PAGE_FILECACHE_MIN;
3854 #endif
3855
3856         /*
3857          * Set kernel task to low backing store privileged
3858          * status
3859          */
3860         task_lock(kernel_task);
3861         kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
3862         task_unlock(kernel_task);
3863
3864         vm_page_free_count_init = vm_page_free_count;
3865
3866         /*
3867          * even if we've already called vm_page_free_reserve
3868          * call it again here to insure that the targets are
3869          * accurately calculated (it uses vm_page_free_count_init)
3870          * calling it with an arg of 0 will not change the reserve
3871          * but will re-calculate free_min and free_target
3872          */
3873         if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
3874                 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
3875         } else
3876                 vm_page_free_reserve(0);
3877
3878
3879         queue_init(&vm_pageout_queue_external.pgo_pending);
3880         vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
3881         vm_pageout_queue_external.pgo_laundry = 0;
3882         vm_pageout_queue_external.pgo_idle = FALSE;
3883         vm_pageout_queue_external.pgo_busy = FALSE;
3884         vm_pageout_queue_external.pgo_throttled = FALSE;
3885         vm_pageout_queue_external.pgo_draining = FALSE;
3886         vm_pageout_queue_external.pgo_lowpriority = FALSE;
3887         vm_pageout_queue_external.pgo_tid = -1;
3888         vm_pageout_queue_external.pgo_inited = FALSE;
3889
3890
3891         queue_init(&vm_pageout_queue_internal.pgo_pending);
3892         vm_pageout_queue_internal.pgo_maxlaundry = 0;
3893         vm_pageout_queue_internal.pgo_laundry = 0;
3894         vm_pageout_queue_internal.pgo_idle = FALSE;
3895         vm_pageout_queue_internal.pgo_busy = FALSE;
3896         vm_pageout_queue_internal.pgo_throttled = FALSE;
3897         vm_pageout_queue_internal.pgo_draining = FALSE;
3898         vm_pageout_queue_internal.pgo_lowpriority = FALSE;
3899         vm_pageout_queue_internal.pgo_tid = -1;
3900         vm_pageout_queue_internal.pgo_inited = FALSE;
3901
3902         /* internal pageout thread started when default pager registered first time */
3903         /* external pageout and garbage collection threads started here */
3904
3905         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
3906                                               BASEPRI_PREEMPT - 1,
3907                                               &vm_pageout_external_iothread);
3908         if (result != KERN_SUCCESS)
3909                 panic("vm_pageout_iothread_external: create failed");
3910
3911         thread_deallocate(vm_pageout_external_iothread);
3912
3913         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
3914                                               BASEPRI_DEFAULT,
3915                                               &thread);
3916         if (result != KERN_SUCCESS)
3917                 panic("vm_pageout_garbage_collect: create failed");
3918
3919         thread_deallocate(thread);
3920
3921 #if VM_PRESSURE_EVENTS
3922         result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
3923                                                 BASEPRI_DEFAULT,
3924                                                 &thread);
3925
3926         if (result != KERN_SUCCESS)
3927                 panic("vm_pressure_thread: create failed");
3928
3929         thread_deallocate(thread);
3930 #endif
3931
3932         vm_object_reaper_init();
3933
3934         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
3935                 vm_compressor_pager_init();
3936
3937 #if VM_PRESSURE_EVENTS
3938         vm_pressure_events_enabled = TRUE;
3939 #endif /* VM_PRESSURE_EVENTS */
3940
3941 #if CONFIG_PHANTOM_CACHE
3942         vm_phantom_cache_init();
3943 #endif
3944 #if VM_PAGE_BUCKETS_CHECK
3945 #if VM_PAGE_FAKE_BUCKETS
3946         printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
3947                (uint64_t) vm_page_fake_buckets_start,
3948                (uint64_t) vm_page_fake_buckets_end);
3949         pmap_protect(kernel_pmap,
3950                      vm_page_fake_buckets_start,
3951                      vm_page_fake_buckets_end,
3952                      VM_PROT_READ);
3953 //      *(char *) vm_page_fake_buckets_start = 'x';     /* panic! */
3954 #endif /* VM_PAGE_FAKE_BUCKETS */
3955 #endif /* VM_PAGE_BUCKETS_CHECK */
3956
3957 #if VM_OBJECT_TRACKING
3958         vm_object_tracking_init();
3959 #endif /* VM_OBJECT_TRACKING */
3960
3961
3962 #if FBDP_TEST_COLLAPSE_COMPRESSOR
3963         vm_object_size_t        backing_size, top_size;
3964         vm_object_t             backing_object, top_object;
3965         vm_map_offset_t         backing_offset, top_offset;
3966         unsigned char           *backing_address, *top_address;
3967         kern_return_t           kr;
3968
3969         printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n");
3970
3971         /* create backing object */
3972         backing_size = 15 * PAGE_SIZE;
3973         backing_object = vm_object_allocate(backing_size);
3974         assert(backing_object != VM_OBJECT_NULL);
3975         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
3976                 backing_object);
3977         /* map backing object */
3978         backing_offset = 0;
3979         kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
3980                           VM_FLAGS_ANYWHERE, backing_object, 0, FALSE,
3981                           VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
3982         assert(kr == KERN_SUCCESS);
3983         backing_address = (unsigned char *) backing_offset;
3984         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
3985                "mapped backing object %p at 0x%llx\n",
3986                backing_object, (uint64_t) backing_offset);
3987         /* populate with pages to be compressed in backing object */
3988         backing_address[0x1*PAGE_SIZE] = 0xB1;
3989         backing_address[0x4*PAGE_SIZE] = 0xB4;
3990         backing_address[0x7*PAGE_SIZE] = 0xB7;
3991         backing_address[0xa*PAGE_SIZE] = 0xBA;
3992         backing_address[0xd*PAGE_SIZE] = 0xBD;
3993         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
3994                "populated pages to be compressed in "
3995                "backing_object %p\n", backing_object);
3996         /* compress backing object */
3997         vm_object_pageout(backing_object);
3998         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
3999                backing_object);
4000         /* wait for all the pages to be gone */
4001         while (*(volatile int *)&backing_object->resident_page_count != 0)
4002                 IODelay(10);
4003         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
4004                backing_object);
4005         /* populate with pages to be resident in backing object */
4006         backing_address[0x0*PAGE_SIZE] = 0xB0;
4007         backing_address[0x3*PAGE_SIZE] = 0xB3;
4008         backing_address[0x6*PAGE_SIZE] = 0xB6;
4009         backing_address[0x9*PAGE_SIZE] = 0xB9;
4010         backing_address[0xc*PAGE_SIZE] = 0xBC;
4011         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4012                "populated pages to be resident in "
4013                "backing_object %p\n", backing_object);
4014         /* leave the other pages absent */
4015         /* mess with the paging_offset of the backing_object */
4016         assert(backing_object->paging_offset == 0);
4017         backing_object->paging_offset = 0x3000;
4018
4019         /* create top object */
4020         top_size = 9 * PAGE_SIZE;
4021         top_object = vm_object_allocate(top_size);
4022         assert(top_object != VM_OBJECT_NULL);
4023         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
4024                 top_object);
4025         /* map top object */
4026         top_offset = 0;
4027         kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
4028                           VM_FLAGS_ANYWHERE, top_object, 0, FALSE,
4029                           VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
4030         assert(kr == KERN_SUCCESS);
4031         top_address = (unsigned char *) top_offset;
4032         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4033                "mapped top object %p at 0x%llx\n",
4034                top_object, (uint64_t) top_offset);
4035         /* populate with pages to be compressed in top object */
4036         top_address[0x3*PAGE_SIZE] = 0xA3;
4037         top_address[0x4*PAGE_SIZE] = 0xA4;
4038         top_address[0x5*PAGE_SIZE] = 0xA5;
4039         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4040                "populated pages to be compressed in "
4041                "top_object %p\n", top_object);
4042         /* compress top object */
4043         vm_object_pageout(top_object);
4044         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
4045                top_object);
4046         /* wait for all the pages to be gone */
4047         while (top_object->resident_page_count != 0);
4048         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
4049                top_object);
4050         /* populate with pages to be resident in top object */
4051         top_address[0x0*PAGE_SIZE] = 0xA0;
4052         top_address[0x1*PAGE_SIZE] = 0xA1;
4053         top_address[0x2*PAGE_SIZE] = 0xA2;
4054         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4055                "populated pages to be resident in "
4056                "top_object %p\n", top_object);
4057         /* leave the other pages absent */
4058
4059         /* link the 2 objects */
4060         vm_object_reference(backing_object);
4061         top_object->shadow = backing_object;
4062         top_object->vo_shadow_offset = 0x3000;
4063         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
4064                top_object, backing_object);
4065
4066         /* unmap backing object */
4067         vm_map_remove(kernel_map,
4068                       backing_offset,
4069                       backing_offset + backing_size,
4070                       0);
4071         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4072                "unmapped backing_object %p [0x%llx:0x%llx]\n",
4073                backing_object,
4074                (uint64_t) backing_offset,
4075                (uint64_t) (backing_offset + backing_size));
4076
4077         /* collapse */
4078         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
4079         vm_object_lock(top_object);
4080         vm_object_collapse(top_object, 0, FALSE);
4081         vm_object_unlock(top_object);
4082         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
4083
4084         /* did it work? */
4085         if (top_object->shadow != VM_OBJECT_NULL) {
4086                 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
4087                 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4088                 if (vm_object_collapse_compressor_allowed) {
4089                         panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4090                 }
4091         } else {
4092                 /* check the contents of the mapping */
4093                 unsigned char expect[9] =
4094                         { 0xA0, 0xA1, 0xA2,     /* resident in top */
4095                           0xA3, 0xA4, 0xA5,     /* compressed in top */
4096                           0xB9, /* resident in backing + shadow_offset */
4097                           0xBD, /* compressed in backing + shadow_offset + paging_offset */
4098                           0x00 };               /* absent in both */
4099                 unsigned char actual[9];
4100                 unsigned int i, errors;
4101
4102                 errors = 0;
4103                 for (i = 0; i < sizeof (actual); i++) {
4104                         actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
4105                         if (actual[i] != expect[i]) {
4106                                 errors++;
4107                         }
4108                 }
4109                 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4110                        "actual [%x %x %x %x %x %x %x %x %x] "
4111                        "expect [%x %x %x %x %x %x %x %x %x] "
4112                        "%d errors\n",
4113                        actual[0], actual[1], actual[2], actual[3],
4114                        actual[4], actual[5], actual[6], actual[7],
4115                        actual[8],
4116                        expect[0], expect[1], expect[2], expect[3],
4117                        expect[4], expect[5], expect[6], expect[7],
4118                        expect[8],
4119                        errors);
4120                 if (errors) {
4121                         panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4122                 } else {
4123                         printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n");
4124                 }
4125         }
4126 #endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4127
4128 #if FBDP_TEST_WIRE_AND_EXTRACT
4129         ledger_t                ledger;
4130         vm_map_t                user_map, wire_map;
4131         mach_vm_address_t       user_addr, wire_addr;
4132         mach_vm_size_t          user_size, wire_size;
4133         mach_vm_offset_t        cur_offset;
4134         vm_prot_t               cur_prot, max_prot;
4135         ppnum_t                 user_ppnum, wire_ppnum;
4136         kern_return_t           kr;
4137
4138         ledger = ledger_instantiate(task_ledger_template,
4139                                     LEDGER_CREATE_ACTIVE_ENTRIES);
4140         user_map = vm_map_create(pmap_create(ledger, 0, TRUE),
4141                                  0x100000000ULL,
4142                                  0x200000000ULL,
4143                                  TRUE);
4144         wire_map = vm_map_create(NULL,
4145                                  0x100000000ULL,
4146                                  0x200000000ULL,
4147                                  TRUE);
4148         user_addr = 0;
4149         user_size = 0x10000;
4150         kr = mach_vm_allocate(user_map,
4151                               &user_addr,
4152                               user_size,
4153                               VM_FLAGS_ANYWHERE);
4154         assert(kr == KERN_SUCCESS);
4155         wire_addr = 0;
4156         wire_size = user_size;
4157         kr = mach_vm_remap(wire_map,
4158                            &wire_addr,
4159                            wire_size,
4160                            0,
4161                            VM_FLAGS_ANYWHERE,
4162                            user_map,
4163                            user_addr,
4164                            FALSE,
4165                            &cur_prot,
4166                            &max_prot,
4167                            VM_INHERIT_NONE);
4168         assert(kr == KERN_SUCCESS);
4169         for (cur_offset = 0;
4170              cur_offset < wire_size;
4171              cur_offset += PAGE_SIZE) {
4172                 kr = vm_map_wire_and_extract(wire_map,
4173                                              wire_addr + cur_offset,
4174                                              VM_PROT_DEFAULT,
4175                                              TRUE,
4176                                              &wire_ppnum);
4177                 assert(kr == KERN_SUCCESS);
4178                 user_ppnum = vm_map_get_phys_page(user_map,
4179                                                   user_addr + cur_offset);
4180                 printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x "
4181                        "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
4182                        kr,
4183                        user_map, user_addr + cur_offset, user_ppnum,
4184                        wire_map, wire_addr + cur_offset, wire_ppnum);
4185                 if (kr != KERN_SUCCESS ||
4186                     wire_ppnum == 0 ||
4187                     wire_ppnum != user_ppnum) {
4188                         panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
4189                 }
4190         }
4191         cur_offset -= PAGE_SIZE;
4192         kr = vm_map_wire_and_extract(wire_map,
4193                                      wire_addr + cur_offset,
4194                                      VM_PROT_DEFAULT,
4195                                      TRUE,
4196                                      &wire_ppnum);
4197         assert(kr == KERN_SUCCESS);
4198         printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
4199                "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
4200                kr,
4201                user_map, user_addr + cur_offset, user_ppnum,
4202                wire_map, wire_addr + cur_offset, wire_ppnum);
4203         if (kr != KERN_SUCCESS ||
4204             wire_ppnum == 0 ||
4205             wire_ppnum != user_ppnum) {
4206                 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
4207         }
4208
4209         printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n");
4210 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
4211
4212
4213         vm_pageout_continue();
4214
4215         /*
4216          * Unreached code!
4217          *
4218          * The vm_pageout_continue() call above never returns, so the code below is never
4219          * executed.  We take advantage of this to declare several DTrace VM related probe
4220          * points that our kernel doesn't have an analog for.  These are probe points that
4221          * exist in Solaris and are in the DTrace documentation, so people may have written
4222          * scripts that use them.  Declaring the probe points here means their scripts will
4223          * compile and execute which we want for portability of the scripts, but since this
4224          * section of code is never reached, the probe points will simply never fire.  Yes,
4225          * this is basically a hack.  The problem is the DTrace probe points were chosen with
4226          * Solaris specific VM events in mind, not portability to different VM implementations.
4227          */
4228
4229         DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4230         DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4231         DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4232         DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4233         DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4234         DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4235         DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4236         /*NOTREACHED*/
4237 }
4238
4239
4240
4241 #define MAX_COMRPESSOR_THREAD_COUNT     8
4242
4243 struct cq ciq[MAX_COMRPESSOR_THREAD_COUNT];
4244
4245 int vm_compressor_thread_count = 2;
4246
4247 kern_return_t
4248 vm_pageout_internal_start(void)
4249 {
4250         kern_return_t   result;
4251         int             i;
4252         host_basic_info_data_t hinfo;
4253
4254         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
4255                 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4256 #define BSD_HOST 1
4257                 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4258
4259                 assert(hinfo.max_cpus > 0);
4260
4261                 if (vm_compressor_thread_count >= hinfo.max_cpus)
4262                         vm_compressor_thread_count = hinfo.max_cpus - 1;
4263                 if (vm_compressor_thread_count <= 0)
4264                         vm_compressor_thread_count = 1;
4265                 else if (vm_compressor_thread_count > MAX_COMRPESSOR_THREAD_COUNT)
4266                         vm_compressor_thread_count = MAX_COMRPESSOR_THREAD_COUNT;
4267
4268                 vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4269         } else {
4270                 vm_compressor_thread_count = 1;
4271                 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4272         }
4273
4274         for (i = 0; i < vm_compressor_thread_count; i++) {
4275
4276                 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
4277                 if (result == KERN_SUCCESS)
4278                         thread_deallocate(vm_pageout_internal_iothread);
4279                 else
4280                         break;
4281         }
4282         return result;
4283 }
4284
4285 #if CONFIG_IOSCHED
4286 /*
4287  * To support I/O Expedite for compressed files we mark the upls with special flags.
4288  * The way decmpfs works is that we create a big upl which marks all the pages needed to
4289  * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4290  * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4291  * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4292  * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4293  * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4294  * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4295  * unless the real I/O upl is being destroyed).
4296  */
4297
4298
4299 static void
4300 upl_set_decmp_info(upl_t upl, upl_t src_upl)
4301 {
4302         assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4303
4304         upl_lock(src_upl);
4305         if (src_upl->decmp_io_upl) {
4306                 /*
4307                  * If there is already an alive real I/O UPL, ignore this new UPL.
4308                  * This case should rarely happen and even if it does, it just means
4309                  * that we might issue a spurious expedite which the driver is expected
4310                  * to handle.
4311                  */
4312                 upl_unlock(src_upl);
4313                 return;
4314         }
4315         src_upl->decmp_io_upl = (void *)upl;
4316         src_upl->ref_count++;
4317         upl_unlock(src_upl);
4318
4319         upl->flags |= UPL_DECMP_REAL_IO;
4320         upl->decmp_io_upl = (void *)src_upl;
4321
4322 }
4323 #endif /* CONFIG_IOSCHED */
4324
4325 #if UPL_DEBUG
4326 int     upl_debug_enabled = 1;
4327 #else
4328 int     upl_debug_enabled = 0;
4329 #endif
4330
4331 static upl_t
4332 upl_create(int type, int flags, upl_size_t size)
4333 {
4334         upl_t   upl;
4335         vm_size_t       page_field_size = 0;
4336         int     upl_flags = 0;
4337         vm_size_t       upl_size  = sizeof(struct upl);
4338
4339         size = round_page_32(size);
4340
4341         if (type & UPL_CREATE_LITE) {
4342                 page_field_size = (atop(size) + 7) >> 3;
4343                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4344
4345                 upl_flags |= UPL_LITE;
4346         }
4347         if (type & UPL_CREATE_INTERNAL) {
4348                 upl_size += sizeof(struct upl_page_info) * atop(size);
4349
4350                 upl_flags |= UPL_INTERNAL;
4351         }
4352         upl = (upl_t)kalloc(upl_size + page_field_size);
4353
4354         if (page_field_size)
4355                 bzero((char *)upl + upl_size, page_field_size);
4356
4357         upl->flags = upl_flags | flags;
4358         upl->src_object = NULL;
4359         upl->kaddr = (vm_offset_t)0;
4360         upl->size = 0;
4361         upl->map_object = NULL;
4362         upl->ref_count = 1;
4363         upl->ext_ref_count = 0;
4364         upl->highest_page = 0;
4365         upl_lock_init(upl);
4366         upl->vector_upl = NULL;
4367 #if CONFIG_IOSCHED
4368         if (type & UPL_CREATE_IO_TRACKING) {
4369                 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4370         }
4371
4372         upl->upl_reprio_info = 0;
4373         upl->decmp_io_upl = 0;
4374         if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4375                 /* Only support expedite on internal UPLs */
4376                 thread_t        curthread = current_thread();
4377                 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4378                 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4379                 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4380                 if (curthread->decmp_upl != NULL)
4381                         upl_set_decmp_info(upl, curthread->decmp_upl);
4382         }
4383 #endif
4384 #if CONFIG_IOSCHED || UPL_DEBUG
4385         if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4386                 upl->upl_creator = current_thread();
4387                 upl->uplq.next = 0;
4388                 upl->uplq.prev = 0;
4389                 upl->flags |= UPL_TRACKED_BY_OBJECT;
4390         }
4391 #endif
4392
4393 #if UPL_DEBUG
4394         upl->ubc_alias1 = 0;
4395         upl->ubc_alias2 = 0;
4396
4397         upl->upl_state = 0;
4398         upl->upl_commit_index = 0;
4399         bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4400
4401         (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4402 #endif /* UPL_DEBUG */
4403
4404         return(upl);
4405 }
4406
4407 static void
4408 upl_destroy(upl_t upl)
4409 {
4410         int     page_field_size;  /* bit field in word size buf */
4411         int     size;
4412
4413         if (upl->ext_ref_count) {
4414                 panic("upl(%p) ext_ref_count", upl);
4415         }
4416
4417 #if CONFIG_IOSCHED
4418         if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4419                 upl_t src_upl;
4420                 src_upl = upl->decmp_io_upl;
4421                 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4422                 upl_lock(src_upl);
4423                 src_upl->decmp_io_upl = NULL;
4424                 upl_unlock(src_upl);
4425                 upl_deallocate(src_upl);
4426         }
4427 #endif /* CONFIG_IOSCHED */
4428
4429 #if CONFIG_IOSCHED || UPL_DEBUG
4430         if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4431                 vm_object_t     object;
4432
4433                 if (upl->flags & UPL_SHADOWED) {
4434                         object = upl->map_object->shadow;
4435                 } else {
4436                         object = upl->map_object;
4437                 }
4438
4439                 vm_object_lock(object);
4440                 queue_remove(&object->uplq, upl, upl_t, uplq);
4441                 vm_object_activity_end(object);
4442                 vm_object_collapse(object, 0, TRUE);
4443                 vm_object_unlock(object);
4444         }
4445 #endif
4446         /*
4447          * drop a reference on the map_object whether or
4448          * not a pageout object is inserted
4449          */
4450         if (upl->flags & UPL_SHADOWED)
4451                 vm_object_deallocate(upl->map_object);
4452
4453         if (upl->flags & UPL_DEVICE_MEMORY)
4454                 size = PAGE_SIZE;
4455         else
4456                 size = upl->size;
4457         page_field_size = 0;
4458
4459         if (upl->flags & UPL_LITE) {
4460                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4461                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4462         }
4463         upl_lock_destroy(upl);
4464         upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4465
4466 #if CONFIG_IOSCHED
4467         if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4468                 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4469 #endif
4470
4471         if (upl->flags & UPL_INTERNAL) {
4472                 kfree(upl,
4473                       sizeof(struct upl) +
4474                       (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4475                       + page_field_size);
4476         } else {
4477                 kfree(upl, sizeof(struct upl) + page_field_size);
4478         }
4479 }
4480
4481 void
4482 upl_deallocate(upl_t upl)
4483 {
4484         upl_lock(upl);
4485         if (--upl->ref_count == 0) {
4486                 if(vector_upl_is_valid(upl))
4487                         vector_upl_deallocate(upl);
4488                 upl_unlock(upl);
4489                 upl_destroy(upl);
4490         }
4491         else
4492                 upl_unlock(upl);
4493 }
4494
4495 #if CONFIG_IOSCHED
4496 void
4497 upl_mark_decmp(upl_t upl)
4498 {
4499         if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4500                 upl->flags |= UPL_DECMP_REQ;
4501                 upl->upl_creator->decmp_upl = (void *)upl;
4502         }
4503 }
4504
4505 void
4506 upl_unmark_decmp(upl_t upl)
4507 {
4508         if(upl && (upl->flags & UPL_DECMP_REQ)) {
4509                 upl->upl_creator->decmp_upl = NULL;
4510         }
4511 }
4512
4513 #endif /* CONFIG_IOSCHED */
4514
4515 #define VM_PAGE_Q_BACKING_UP(q)         \
4516         ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4517
4518 boolean_t must_throttle_writes(void);
4519
4520 boolean_t
4521 must_throttle_writes()
4522 {
4523         if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4524             vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4525                 return (TRUE);
4526
4527         return (FALSE);
4528 }
4529
4530
4531 #if DEVELOPMENT || DEBUG
4532 /*/*
4533  * Statistics about UPL enforcement of copy-on-write obligations.
4534  */
4535 unsigned long upl_cow = 0;
4536 unsigned long upl_cow_again = 0;
4537 unsigned long upl_cow_pages = 0;
4538 unsigned long upl_cow_again_pages = 0;
4539
4540 unsigned long iopl_cow = 0;
4541 unsigned long iopl_cow_pages = 0;
4542 #endif
4543
4544 /*
4545  *      Routine:        vm_object_upl_request
4546  *      Purpose:
4547  *              Cause the population of a portion of a vm_object.
4548  *              Depending on the nature of the request, the pages
4549  *              returned may be contain valid data or be uninitialized.
4550  *              A page list structure, listing the physical pages
4551  *              will be returned upon request.
4552  *              This function is called by the file system or any other
4553  *              supplier of backing store to a pager.
4554  *              IMPORTANT NOTE: The caller must still respect the relationship
4555  *              between the vm_object and its backing memory object.  The
4556  *              caller MUST NOT substitute changes in the backing file
4557  *              without first doing a memory_object_lock_request on the
4558  *              target range unless it is know that the pages are not
4559  *              shared with another entity at the pager level.
4560  *              Copy_in_to:
4561  *                      if a page list structure is present
4562  *                      return the mapped physical pages, where a
4563  *                      page is not present, return a non-initialized
4564  *                      one.  If the no_sync bit is turned on, don't
4565  *                      call the pager unlock to synchronize with other
4566  *                      possible copies of the page. Leave pages busy
4567  *                      in the original object, if a page list structure
4568  *                      was specified.  When a commit of the page list
4569  *                      pages is done, the dirty bit will be set for each one.
4570  *              Copy_out_from:
4571  *                      If a page list structure is present, return
4572  *                      all mapped pages.  Where a page does not exist
4573  *                      map a zero filled one. Leave pages busy in
4574  *                      the original object.  If a page list structure
4575  *                      is not specified, this call is a no-op.
4576  *
4577  *              Note:  access of default pager objects has a rather interesting
4578  *              twist.  The caller of this routine, presumably the file system
4579  *              page cache handling code, will never actually make a request
4580  *              against a default pager backed object.  Only the default
4581  *              pager will make requests on backing store related vm_objects
4582  *              In this way the default pager can maintain the relationship
4583  *              between backing store files (abstract memory objects) and
4584  *              the vm_objects (cache objects), they support.
4585  *
4586  */
4587
4588 __private_extern__ kern_return_t
4589 vm_object_upl_request(
4590         vm_object_t             object,
4591         vm_object_offset_t      offset,
4592         upl_size_t              size,
4593         upl_t                   *upl_ptr,
4594         upl_page_info_array_t   user_page_list,
4595         unsigned int            *page_list_count,
4596         int                     cntrl_flags)
4597 {
4598         vm_page_t               dst_page = VM_PAGE_NULL;
4599         vm_object_offset_t      dst_offset;
4600         upl_size_t              xfer_size;
4601         unsigned int            size_in_pages;
4602         boolean_t               dirty;
4603         boolean_t               hw_dirty;
4604         upl_t                   upl = NULL;
4605         unsigned int            entry;
4606 #if MACH_CLUSTER_STATS
4607         boolean_t               encountered_lrp = FALSE;
4608 #endif
4609         vm_page_t               alias_page = NULL;
4610         int                     refmod_state = 0;
4611         wpl_array_t             lite_list = NULL;
4612         vm_object_t             last_copy_object;
4613         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
4614         struct  vm_page_delayed_work    *dwp;
4615         int                     dw_count;
4616         int                     dw_limit;
4617         int                     io_tracking_flag = 0;
4618
4619         if (cntrl_flags & ~UPL_VALID_FLAGS) {
4620                 /*
4621                  * For forward compatibility's sake,
4622                  * reject any unknown flag.
4623                  */
4624                 return KERN_INVALID_VALUE;
4625         }
4626         if ( (!object->internal) && (object->paging_offset != 0) )
4627                 panic("vm_object_upl_request: external object with non-zero paging offset\n");
4628         if (object->phys_contiguous)
4629                 panic("vm_object_upl_request: contiguous object specified\n");
4630
4631
4632         if (size > MAX_UPL_SIZE_BYTES)
4633                 size = MAX_UPL_SIZE_BYTES;
4634
4635         if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
4636                 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
4637
4638 #if CONFIG_IOSCHED || UPL_DEBUG
4639         if (object->io_tracking || upl_debug_enabled)
4640                 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
4641 #endif
4642 #if CONFIG_IOSCHED
4643         if (object->io_tracking)
4644                 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
4645 #endif
4646
4647         if (cntrl_flags & UPL_SET_INTERNAL) {
4648                 if (cntrl_flags & UPL_SET_LITE) {
4649
4650                         upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4651
4652                         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4653                         lite_list = (wpl_array_t)
4654                                         (((uintptr_t)user_page_list) +
4655                                         ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4656                         if (size == 0) {
4657                                 user_page_list = NULL;
4658                                 lite_list = NULL;
4659                         }
4660                 } else {
4661                         upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
4662
4663                         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4664                         if (size == 0) {
4665                                 user_page_list = NULL;
4666                         }
4667                 }
4668         } else {
4669                 if (cntrl_flags & UPL_SET_LITE) {
4670
4671                         upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4672
4673                         lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4674                         if (size == 0) {
4675                                 lite_list = NULL;
4676                         }
4677                 } else {
4678                         upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
4679                 }
4680         }
4681         *upl_ptr = upl;
4682
4683         if (user_page_list)
4684                 user_page_list[0].device = FALSE;
4685
4686         if (cntrl_flags & UPL_SET_LITE) {
4687                 upl->map_object = object;
4688         } else {
4689                 upl->map_object = vm_object_allocate(size);
4690                 /*
4691                  * No neeed to lock the new object: nobody else knows
4692                  * about it yet, so it's all ours so far.
4693                  */
4694                 upl->map_object->shadow = object;
4695                 upl->map_object->pageout = TRUE;
4696                 upl->map_object->can_persist = FALSE;
4697                 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4698                 upl->map_object->vo_shadow_offset = offset;
4699                 upl->map_object->wimg_bits = object->wimg_bits;
4700
4701                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4702
4703                 upl->flags |= UPL_SHADOWED;
4704         }
4705         /*
4706          * ENCRYPTED SWAP:
4707          * Just mark the UPL as "encrypted" here.
4708          * We'll actually encrypt the pages later,
4709          * in upl_encrypt(), when the caller has
4710          * selected which pages need to go to swap.
4711          */
4712         if (cntrl_flags & UPL_ENCRYPT)
4713                 upl->flags |= UPL_ENCRYPTED;
4714
4715         if (cntrl_flags & UPL_FOR_PAGEOUT)
4716                 upl->flags |= UPL_PAGEOUT;
4717
4718         vm_object_lock(object);
4719         vm_object_activity_begin(object);
4720
4721         /*
4722          * we can lock in the paging_offset once paging_in_progress is set
4723          */
4724         upl->size = size;
4725         upl->offset = offset + object->paging_offset;
4726
4727 #if CONFIG_IOSCHED || UPL_DEBUG
4728         if (object->io_tracking || upl_debug_enabled) {
4729                 vm_object_activity_begin(object);
4730                 queue_enter(&object->uplq, upl, upl_t, uplq);
4731         }
4732 #endif
4733         if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
4734                 /*
4735                  * Honor copy-on-write obligations
4736                  *
4737                  * The caller is gathering these pages and
4738                  * might modify their contents.  We need to
4739                  * make sure that the copy object has its own
4740                  * private copies of these pages before we let
4741                  * the caller modify them.
4742                  */
4743                 vm_object_update(object,
4744                                  offset,
4745                                  size,
4746                                  NULL,
4747                                  NULL,
4748                                  FALSE, /* should_return */
4749                                  MEMORY_OBJECT_COPY_SYNC,
4750                                  VM_PROT_NO_CHANGE);
4751 #if DEVELOPMENT || DEBUG
4752                 upl_cow++;
4753                 upl_cow_pages += size >> PAGE_SHIFT;
4754 #endif
4755         }
4756         /*
4757          * remember which copy object we synchronized with
4758          */
4759         last_copy_object = object->copy;
4760         entry = 0;
4761
4762         xfer_size = size;
4763         dst_offset = offset;
4764         size_in_pages = size / PAGE_SIZE;
4765
4766         dwp = &dw_array[0];
4767         dw_count = 0;
4768         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
4769
4770         if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
4771             object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
4772                 object->scan_collisions = 0;
4773
4774         if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
4775                 boolean_t       isSSD = FALSE;
4776
4777                 vnode_pager_get_isSSD(object->pager, &isSSD);
4778                 vm_object_unlock(object);
4779
4780                 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
4781
4782                 if (isSSD == TRUE)
4783                         delay(1000 * size_in_pages);
4784                 else
4785                         delay(5000 * size_in_pages);
4786                 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
4787
4788                 vm_object_lock(object);
4789         }
4790
4791         while (xfer_size) {
4792
4793                 dwp->dw_mask = 0;
4794
4795                 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
4796                         vm_object_unlock(object);
4797                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
4798                         vm_object_lock(object);
4799                 }
4800                 if (cntrl_flags & UPL_COPYOUT_FROM) {
4801                         upl->flags |= UPL_PAGE_SYNC_DONE;
4802
4803                         if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
4804                                 dst_page->fictitious ||
4805                                 dst_page->absent ||
4806                                 dst_page->error ||
4807                                 dst_page->cleaning ||
4808                                 (VM_PAGE_WIRED(dst_page))) {
4809
4810                                 if (user_page_list)
4811                                         user_page_list[entry].phys_addr = 0;
4812
4813                                 goto try_next_page;
4814                         }
4815                         /*
4816                          * grab this up front...
4817                          * a high percentange of the time we're going to
4818                          * need the hardware modification state a bit later
4819                          * anyway... so we can eliminate an extra call into
4820                          * the pmap layer by grabbing it here and recording it
4821                          */
4822                         if (dst_page->pmapped)
4823                                 refmod_state = pmap_get_refmod(dst_page->phys_page);
4824                         else
4825                                 refmod_state = 0;
4826
4827                         if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) {
4828                                 /*
4829                                  * page is on inactive list and referenced...
4830                                  * reactivate it now... this gets it out of the
4831                                  * way of vm_pageout_scan which would have to
4832                                  * reactivate it upon tripping over it
4833                                  */
4834                                 dwp->dw_mask |= DW_vm_page_activate;
4835                         }
4836                         if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
4837                                 /*
4838                                  * we're only asking for DIRTY pages to be returned
4839                                  */
4840                                 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
4841                                         /*
4842                                          * if we were the page stolen by vm_pageout_scan to be
4843                                          * cleaned (as opposed to a buddy being clustered in
4844                                          * or this request is not being driven by a PAGEOUT cluster
4845                                          * then we only need to check for the page being dirty or
4846                                          * precious to decide whether to return it
4847                                          */
4848                                         if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
4849                                                 goto check_busy;
4850                                         goto dont_return;
4851                                 }
4852                                 /*
4853                                  * this is a request for a PAGEOUT cluster and this page
4854                                  * is merely along for the ride as a 'buddy'... not only
4855                                  * does it have to be dirty to be returned, but it also
4856                                  * can't have been referenced recently...
4857                                  */
4858                                 if ( (hibernate_cleaning_in_progress == TRUE ||
4859                                       (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) || dst_page->throttled)) &&
4860                                       ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
4861                                         goto check_busy;
4862                                 }
4863 dont_return:
4864                                 /*
4865                                  * if we reach here, we're not to return
4866                                  * the page... go on to the next one
4867                                  */
4868                                 if (dst_page->laundry == TRUE) {
4869                                         /*
4870                                          * if we get here, the page is not 'cleaning' (filtered out above).
4871                                          * since it has been referenced, remove it from the laundry
4872                                          * so we don't pay the cost of an I/O to clean a page
4873                                          * we're just going to take back
4874                                          */
4875                                         vm_page_lockspin_queues();
4876
4877                                         vm_pageout_steal_laundry(dst_page, TRUE);
4878                                         vm_page_activate(dst_page);
4879
4880                                         vm_page_unlock_queues();
4881                                 }
4882                                 if (user_page_list)
4883                                         user_page_list[entry].phys_addr = 0;
4884
4885                                 goto try_next_page;
4886                         }
4887 check_busy:
4888                         if (dst_page->busy) {
4889                                 if (cntrl_flags & UPL_NOBLOCK) {
4890                                 if (user_page_list)
4891                                                 user_page_list[entry].phys_addr = 0;
4892
4893                                         goto try_next_page;
4894                                 }
4895                                 /*
4896                                  * someone else is playing with the
4897                                  * page.  We will have to wait.
4898                                  */
4899                                 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
4900
4901                                 continue;
4902                         }
4903                         /*
4904                          * ENCRYPTED SWAP:
4905                          * The caller is gathering this page and might
4906                          * access its contents later on.  Decrypt the
4907                          * page before adding it to the UPL, so that
4908                          * the caller never sees encrypted data.
4909                          */
4910                         if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
4911                                 int  was_busy;
4912
4913                                 /*
4914                                  * save the current state of busy
4915                                  * mark page as busy while decrypt
4916                                  * is in progress since it will drop
4917                                  * the object lock...
4918                                  */
4919                                 was_busy = dst_page->busy;
4920                                 dst_page->busy = TRUE;
4921
4922                                 vm_page_decrypt(dst_page, 0);
4923                                 vm_page_decrypt_for_upl_counter++;
4924                                 /*
4925                                  * restore to original busy state
4926                                  */
4927                                 dst_page->busy = was_busy;
4928                         }
4929                         if (dst_page->pageout_queue == TRUE) {
4930
4931                                 vm_page_lockspin_queues();
4932
4933                                 if (dst_page->pageout_queue == TRUE) {
4934                                         /*
4935                                          * we've buddied up a page for a clustered pageout
4936                                          * that has already been moved to the pageout
4937                                          * queue by pageout_scan... we need to remove
4938                                          * it from the queue and drop the laundry count
4939                                          * on that queue
4940                                          */
4941                                         vm_pageout_throttle_up(dst_page);
4942                                 }
4943                                 vm_page_unlock_queues();
4944                         }
4945 #if MACH_CLUSTER_STATS
4946                         /*
4947                          * pageout statistics gathering.  count
4948                          * all the pages we will page out that
4949                          * were not counted in the initial
4950                          * vm_pageout_scan work
4951                          */
4952                         if (dst_page->pageout)
4953                                 encountered_lrp = TRUE;
4954                         if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious))) {
4955                                 if (encountered_lrp)
4956                                         CLUSTER_STAT(pages_at_higher_offsets++;)
4957                                 else
4958                                         CLUSTER_STAT(pages_at_lower_offsets++;)
4959                         }
4960 #endif
4961                         hw_dirty = refmod_state & VM_MEM_MODIFIED;
4962                         dirty = hw_dirty ? TRUE : dst_page->dirty;
4963
4964                         if (dst_page->phys_page > upl->highest_page)
4965                                 upl->highest_page = dst_page->phys_page;
4966
4967                         if (cntrl_flags & UPL_SET_LITE) {
4968                                 unsigned int    pg_num;
4969
4970                                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
4971                                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
4972                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
4973
4974                                 if (hw_dirty)
4975                                         pmap_clear_modify(dst_page->phys_page);
4976
4977                                 /*
4978                                  * Mark original page as cleaning
4979                                  * in place.
4980                                  */
4981                                 dst_page->cleaning = TRUE;
4982                                 dst_page->precious = FALSE;
4983                         } else {
4984                                 /*
4985                                  * use pageclean setup, it is more
4986                                  * convenient even for the pageout
4987                                  * cases here
4988                                  */
4989                                 vm_object_lock(upl->map_object);
4990                                 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
4991                                 vm_object_unlock(upl->map_object);
4992
4993                                 alias_page->absent = FALSE;
4994                                 alias_page = NULL;
4995                         }
4996 #if     MACH_PAGEMAP
4997                         /*
4998                          * Record that this page has been
4999                          * written out
5000                          */
5001                         vm_external_state_set(object->existence_map, dst_page->offset);
5002 #endif  /*MACH_PAGEMAP*/
5003                         if (dirty) {
5004                                 SET_PAGE_DIRTY(dst_page, FALSE);
5005                         } else {
5006                                 dst_page->dirty = FALSE;
5007                         }
5008
5009                         if (!dirty)
5010                                 dst_page->precious = TRUE;
5011
5012                         if ( (cntrl_flags & UPL_ENCRYPT) ) {
5013                                 /*
5014                                  * ENCRYPTED SWAP:
5015                                  * We want to deny access to the target page
5016                                  * because its contents are about to be
5017                                  * encrypted and the user would be very
5018                                  * confused to see encrypted data instead
5019                                  * of their data.
5020                                  * We also set "encrypted_cleaning" to allow
5021                                  * vm_pageout_scan() to demote that page
5022                                  * from "adjacent/clean-in-place" to
5023                                  * "target/clean-and-free" if it bumps into
5024                                  * this page during its scanning while we're
5025                                  * still processing this cluster.
5026                                  */
5027                                 dst_page->busy = TRUE;
5028                                 dst_page->encrypted_cleaning = TRUE;
5029                         }
5030                         if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5031                                 if ( !VM_PAGE_WIRED(dst_page))
5032                                         dst_page->pageout = TRUE;
5033                         }
5034                 } else {
5035                         if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5036                                 /*
5037                                  * Honor copy-on-write obligations
5038                                  *
5039                                  * The copy object has changed since we
5040                                  * last synchronized for copy-on-write.
5041                                  * Another copy object might have been
5042                                  * inserted while we released the object's
5043                                  * lock.  Since someone could have seen the
5044                                  * original contents of the remaining pages
5045                                  * through that new object, we have to
5046                                  * synchronize with it again for the remaining
5047                                  * pages only.  The previous pages are "busy"
5048                                  * so they can not be seen through the new
5049                                  * mapping.  The new mapping will see our
5050                                  * upcoming changes for those previous pages,
5051                                  * but that's OK since they couldn't see what
5052                                  * was there before.  It's just a race anyway
5053                                  * and there's no guarantee of consistency or
5054                                  * atomicity.  We just don't want new mappings
5055                                  * to see both the *before* and *after* pages.
5056                                  */
5057                                 if (object->copy != VM_OBJECT_NULL) {
5058                                         vm_object_update(
5059                                                 object,
5060                                                 dst_offset,/* current offset */
5061                                                 xfer_size, /* remaining size */
5062                                                 NULL,
5063                                                 NULL,
5064                                                 FALSE,     /* should_return */
5065                                                 MEMORY_OBJECT_COPY_SYNC,
5066                                                 VM_PROT_NO_CHANGE);
5067
5068 #if DEVELOPMENT || DEBUG
5069                                         upl_cow_again++;
5070                                         upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
5071 #endif
5072                                 }
5073                                 /*
5074                                  * remember the copy object we synced with
5075                                  */
5076                                 last_copy_object = object->copy;
5077                         }
5078                         dst_page = vm_page_lookup(object, dst_offset);
5079
5080                         if (dst_page != VM_PAGE_NULL) {
5081
5082                                 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5083                                         /*
5084                                          * skip over pages already present in the cache
5085                                          */
5086                                         if (user_page_list)
5087                                                 user_page_list[entry].phys_addr = 0;
5088
5089                                         goto try_next_page;
5090                                 }
5091                                 if (dst_page->fictitious) {
5092                                         panic("need corner case for fictitious page");
5093                                 }
5094
5095                                 if (dst_page->busy || dst_page->cleaning) {
5096                                         /*
5097                                          * someone else is playing with the
5098                                          * page.  We will have to wait.
5099                                          */
5100                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5101
5102                                         continue;
5103                                 }
5104                                 if (dst_page->laundry) {
5105                                         dst_page->pageout = FALSE;
5106
5107                                         vm_pageout_steal_laundry(dst_page, FALSE);
5108                                 }
5109                         } else {
5110                                 if (object->private) {
5111                                         /*
5112                                          * This is a nasty wrinkle for users
5113                                          * of upl who encounter device or
5114                                          * private memory however, it is
5115                                          * unavoidable, only a fault can
5116                                          * resolve the actual backing
5117                                          * physical page by asking the
5118                                          * backing device.
5119                                          */
5120                                         if (user_page_list)
5121                                                 user_page_list[entry].phys_addr = 0;
5122
5123                                         goto try_next_page;
5124                                 }
5125                                 if (object->scan_collisions) {
5126                                         /*
5127                                          * the pageout_scan thread is trying to steal
5128                                          * pages from this object, but has run into our
5129                                          * lock... grab 2 pages from the head of the object...
5130                                          * the first is freed on behalf of pageout_scan, the
5131                                          * 2nd is for our own use... we use vm_object_page_grab
5132                                          * in both cases to avoid taking pages from the free
5133                                          * list since we are under memory pressure and our
5134                                          * lock on this object is getting in the way of
5135                                          * relieving it
5136                                          */
5137                                         dst_page = vm_object_page_grab(object);
5138
5139                                         if (dst_page != VM_PAGE_NULL)
5140                                                 vm_page_release(dst_page);
5141
5142                                         dst_page = vm_object_page_grab(object);
5143                                 }
5144                                 if (dst_page == VM_PAGE_NULL) {
5145                                         /*
5146                                          * need to allocate a page
5147                                          */
5148                                         dst_page = vm_page_grab();
5149                                 }
5150                                 if (dst_page == VM_PAGE_NULL) {
5151                                         if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5152                                                /*
5153                                                 * we don't want to stall waiting for pages to come onto the free list
5154                                                 * while we're already holding absent pages in this UPL
5155                                                 * the caller will deal with the empty slots
5156                                                 */
5157                                                 if (user_page_list)
5158                                                         user_page_list[entry].phys_addr = 0;
5159
5160                                                 goto try_next_page;
5161                                         }
5162                                         /*
5163                                          * no pages available... wait
5164                                          * then try again for the same
5165                                          * offset...
5166                                          */
5167                                         vm_object_unlock(object);
5168
5169                                         OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5170
5171                                         VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5172
5173                                         VM_PAGE_WAIT();
5174                                         OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5175
5176                                         VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5177
5178                                         vm_object_lock(object);
5179
5180                                         continue;
5181                                 }
5182                                 vm_page_insert(dst_page, object, dst_offset);
5183
5184                                 dst_page->absent = TRUE;
5185                                 dst_page->busy = FALSE;
5186
5187                                 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5188                                         /*
5189                                          * if UPL_RET_ONLY_ABSENT was specified,
5190                                          * than we're definitely setting up a
5191                                          * upl for a clustered read/pagein
5192                                          * operation... mark the pages as clustered
5193                                          * so upl_commit_range can put them on the
5194                                          * speculative list
5195                                          */
5196                                         dst_page->clustered = TRUE;
5197
5198                                         if ( !(cntrl_flags & UPL_FILE_IO))
5199                                                 VM_STAT_INCR(pageins);
5200                                 }
5201                         }
5202                         /*
5203                          * ENCRYPTED SWAP:
5204                          */
5205                         if (cntrl_flags & UPL_ENCRYPT) {
5206                                 /*
5207                                  * The page is going to be encrypted when we
5208                                  * get it from the pager, so mark it so.
5209                                  */
5210                                 dst_page->encrypted = TRUE;
5211                         } else {
5212                                 /*
5213                                  * Otherwise, the page will not contain
5214                                  * encrypted data.
5215                                  */
5216                                 dst_page->encrypted = FALSE;
5217                         }
5218                         dst_page->overwriting = TRUE;
5219
5220                         if (dst_page->pmapped) {
5221                                 if ( !(cntrl_flags & UPL_FILE_IO))
5222                                         /*
5223                                          * eliminate all mappings from the
5224                                          * original object and its prodigy
5225                                          */
5226                                         refmod_state = pmap_disconnect(dst_page->phys_page);
5227                                 else
5228                                         refmod_state = pmap_get_refmod(dst_page->phys_page);
5229                         } else
5230                                 refmod_state = 0;
5231
5232                         hw_dirty = refmod_state & VM_MEM_MODIFIED;
5233                         dirty = hw_dirty ? TRUE : dst_page->dirty;
5234
5235                         if (cntrl_flags & UPL_SET_LITE) {
5236                                 unsigned int    pg_num;
5237
5238                                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5239                                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5240                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5241
5242                                 if (hw_dirty)
5243                                         pmap_clear_modify(dst_page->phys_page);
5244
5245                                 /*
5246                                  * Mark original page as cleaning
5247                                  * in place.
5248                                  */
5249                                 dst_page->cleaning = TRUE;
5250                                 dst_page->precious = FALSE;
5251                         } else {
5252                                 /*
5253                                  * use pageclean setup, it is more
5254                                  * convenient even for the pageout
5255                                  * cases here
5256                                  */
5257                                 vm_object_lock(upl->map_object);
5258                                 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5259                                 vm_object_unlock(upl->map_object);
5260
5261                                 alias_page->absent = FALSE;
5262                                 alias_page = NULL;
5263                         }
5264
5265                         if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5266                                 upl->flags &= ~UPL_CLEAR_DIRTY;
5267                                 upl->flags |= UPL_SET_DIRTY;
5268                                 dirty = TRUE;
5269                                 upl->flags |= UPL_SET_DIRTY;
5270                         } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5271                                 /*
5272                                  * clean in place for read implies
5273                                  * that a write will be done on all
5274                                  * the pages that are dirty before
5275                                  * a upl commit is done.  The caller
5276                                  * is obligated to preserve the
5277                                  * contents of all pages marked dirty
5278                                  */
5279                                 upl->flags |= UPL_CLEAR_DIRTY;
5280                         }
5281                         dst_page->dirty = dirty;
5282
5283                         if (!dirty)
5284                                 dst_page->precious = TRUE;
5285
5286                         if ( !VM_PAGE_WIRED(dst_page)) {
5287                                 /*
5288                                  * deny access to the target page while
5289                                  * it is being worked on
5290                                  */
5291                                 dst_page->busy = TRUE;
5292                         } else
5293                                 dwp->dw_mask |= DW_vm_page_wire;
5294
5295                         /*
5296                          * We might be about to satisfy a fault which has been
5297                          * requested. So no need for the "restart" bit.
5298                          */
5299                         dst_page->restart = FALSE;
5300                         if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5301                                 /*
5302                                  * expect the page to be used
5303                                  */
5304                                 dwp->dw_mask |= DW_set_reference;
5305                         }
5306                         if (cntrl_flags & UPL_PRECIOUS) {
5307                                 if (dst_page->object->internal) {
5308                                         SET_PAGE_DIRTY(dst_page, FALSE);
5309                                         dst_page->precious = FALSE;
5310                                 } else {
5311                                         dst_page->precious = TRUE;
5312                                 }
5313                         } else {
5314                                 dst_page->precious = FALSE;
5315                         }
5316                 }
5317                 if (dst_page->busy)
5318                         upl->flags |= UPL_HAS_BUSY;
5319
5320                 if (dst_page->phys_page > upl->highest_page)
5321                         upl->highest_page = dst_page->phys_page;
5322                 if (user_page_list) {
5323                         user_page_list[entry].phys_addr = dst_page->phys_page;
5324                         user_page_list[entry].pageout   = dst_page->pageout;
5325                         user_page_list[entry].absent    = dst_page->absent;
5326                         user_page_list[entry].dirty     = dst_page->dirty;
5327                         user_page_list[entry].precious  = dst_page->precious;
5328                         user_page_list[entry].device    = FALSE;
5329                         user_page_list[entry].needed    = FALSE;
5330                         if (dst_page->clustered == TRUE)
5331                                 user_page_list[entry].speculative = dst_page->speculative;
5332                         else
5333                                 user_page_list[entry].speculative = FALSE;
5334                         user_page_list[entry].cs_validated = dst_page->cs_validated;
5335                         user_page_list[entry].cs_tainted = dst_page->cs_tainted;
5336                 }
5337                 /*
5338                  * if UPL_RET_ONLY_ABSENT is set, then
5339                  * we are working with a fresh page and we've
5340                  * just set the clustered flag on it to
5341                  * indicate that it was drug in as part of a
5342                  * speculative cluster... so leave it alone
5343                  */
5344                 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5345                         /*
5346                          * someone is explicitly grabbing this page...
5347                          * update clustered and speculative state
5348                          *
5349                          */
5350                         if (dst_page->clustered)
5351                                 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5352                 }
5353 try_next_page:
5354                 if (dwp->dw_mask) {
5355                         if (dwp->dw_mask & DW_vm_page_activate)
5356                                 VM_STAT_INCR(reactivations);
5357
5358                         VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5359
5360                         if (dw_count >= dw_limit) {
5361                                 vm_page_do_delayed_work(object, &dw_array[0], dw_count);
5362
5363                                 dwp = &dw_array[0];
5364                                 dw_count = 0;
5365                         }
5366                 }
5367                 entry++;
5368                 dst_offset += PAGE_SIZE_64;
5369                 xfer_size -= PAGE_SIZE;
5370         }
5371         if (dw_count)
5372                 vm_page_do_delayed_work(object, &dw_array[0], dw_count);
5373
5374         if (alias_page != NULL) {
5375                 VM_PAGE_FREE(alias_page);
5376         }
5377
5378         if (page_list_count != NULL) {
5379                 if (upl->flags & UPL_INTERNAL)
5380                         *page_list_count = 0;
5381                 else if (*page_list_count > entry)
5382                         *page_list_count = entry;
5383         }
5384 #if UPL_DEBUG
5385         upl->upl_state = 1;
5386 #endif
5387         vm_object_unlock(object);
5388
5389         return KERN_SUCCESS;
5390 }
5391
5392 /* JMM - Backward compatability for now */
5393 kern_return_t
5394 vm_fault_list_request(                  /* forward */
5395         memory_object_control_t         control,
5396         vm_object_offset_t      offset,
5397         upl_size_t              size,
5398         upl_t                   *upl_ptr,
5399         upl_page_info_t         **user_page_list_ptr,
5400         unsigned int            page_list_count,
5401         int                     cntrl_flags);
5402 kern_return_t
5403 vm_fault_list_request(
5404         memory_object_control_t         control,
5405         vm_object_offset_t      offset,
5406         upl_size_t              size,
5407         upl_t                   *upl_ptr,
5408         upl_page_info_t         **user_page_list_ptr,
5409         unsigned int            page_list_count,
5410         int                     cntrl_flags)
5411 {
5412         unsigned int            local_list_count;
5413         upl_page_info_t         *user_page_list;
5414         kern_return_t           kr;
5415
5416         if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)
5417                  return KERN_INVALID_ARGUMENT;
5418
5419         if (user_page_list_ptr != NULL) {
5420                 local_list_count = page_list_count;
5421                 user_page_list = *user_page_list_ptr;
5422         } else {
5423                 local_list_count = 0;
5424                 user_page_list = NULL;
5425         }
5426         kr =  memory_object_upl_request(control,
5427                                 offset,
5428                                 size,
5429                                 upl_ptr,
5430                                 user_page_list,
5431                                 &local_list_count,
5432                                 cntrl_flags);
5433
5434         if(kr != KERN_SUCCESS)
5435                 return kr;
5436
5437         if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
5438                 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
5439         }
5440
5441         return KERN_SUCCESS;
5442 }
5443
5444
5445
5446 /*
5447  *      Routine:        vm_object_super_upl_request
5448  *      Purpose:
5449  *              Cause the population of a portion of a vm_object
5450  *              in much the same way as memory_object_upl_request.
5451  *              Depending on the nature of the request, the pages
5452  *              returned may be contain valid data or be uninitialized.
5453  *              However, the region may be expanded up to the super
5454  *              cluster size provided.
5455  */
5456
5457 __private_extern__ kern_return_t
5458 vm_object_super_upl_request(
5459         vm_object_t object,
5460         vm_object_offset_t      offset,
5461         upl_size_t              size,
5462         upl_size_t              super_cluster,
5463         upl_t                   *upl,
5464         upl_page_info_t         *user_page_list,
5465         unsigned int            *page_list_count,
5466         int                     cntrl_flags)
5467 {
5468         if (object->paging_offset > offset  || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5469                 return KERN_FAILURE;
5470
5471         assert(object->paging_in_progress);
5472         offset = offset - object->paging_offset;
5473
5474         if (super_cluster > size) {
5475
5476                 vm_object_offset_t      base_offset;
5477                 upl_size_t              super_size;
5478                 vm_object_size_t        super_size_64;
5479
5480                 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5481                 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5482                 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5483                 super_size = (upl_size_t) super_size_64;
5484                 assert(super_size == super_size_64);
5485
5486                 if (offset > (base_offset + super_size)) {
5487                         panic("vm_object_super_upl_request: Missed target pageout"
5488                               " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5489                               offset, base_offset, super_size, super_cluster,
5490                               size, object->paging_offset);
5491                 }
5492                 /*
5493                  * apparently there is a case where the vm requests a
5494                  * page to be written out who's offset is beyond the
5495                  * object size
5496                  */
5497                 if ((offset + size) > (base_offset + super_size)) {
5498                         super_size_64 = (offset + size) - base_offset;
5499                         super_size = (upl_size_t) super_size_64;
5500                         assert(super_size == super_size_64);
5501                 }
5502
5503                 offset = base_offset;
5504                 size = super_size;
5505         }
5506         return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
5507 }
5508
5509
5510 kern_return_t
5511 vm_map_create_upl(
5512         vm_map_t                map,
5513         vm_map_address_t        offset,
5514         upl_size_t              *upl_size,
5515         upl_t                   *upl,
5516         upl_page_info_array_t   page_list,
5517         unsigned int            *count,
5518         int                     *flags)
5519 {
5520         vm_map_entry_t  entry;
5521         int             caller_flags;
5522         int             force_data_sync;
5523         int             sync_cow_data;
5524         vm_object_t     local_object;
5525         vm_map_offset_t local_offset;
5526         vm_map_offset_t local_start;
5527         kern_return_t   ret;
5528
5529         caller_flags = *flags;
5530
5531         if (caller_flags & ~UPL_VALID_FLAGS) {
5532                 /*
5533                  * For forward compatibility's sake,
5534                  * reject any unknown flag.
5535                  */
5536                 return KERN_INVALID_VALUE;
5537         }
5538         force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5539         sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5540
5541         if (upl == NULL)
5542                 return KERN_INVALID_ARGUMENT;
5543
5544 REDISCOVER_ENTRY:
5545         vm_map_lock_read(map);
5546
5547         if (vm_map_lookup_entry(map, offset, &entry)) {
5548
5549                 if ((entry->vme_end - offset) < *upl_size) {
5550                         *upl_size = (upl_size_t) (entry->vme_end - offset);
5551                         assert(*upl_size == entry->vme_end - offset);
5552                 }
5553
5554                 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5555                         *flags = 0;
5556
5557                         if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) {
5558                                 if (entry->object.vm_object->private)
5559                                         *flags = UPL_DEV_MEMORY;
5560
5561                                 if (entry->object.vm_object->phys_contiguous)
5562                                         *flags |= UPL_PHYS_CONTIG;
5563                         }
5564                         vm_map_unlock_read(map);
5565
5566                         return KERN_SUCCESS;
5567                 }
5568
5569                 if (entry->is_sub_map) {
5570                         vm_map_t        submap;
5571
5572                         submap = entry->object.sub_map;
5573                         local_start = entry->vme_start;
5574                         local_offset = entry->offset;
5575
5576                         vm_map_reference(submap);
5577                         vm_map_unlock_read(map);
5578
5579                         ret = vm_map_create_upl(submap,
5580                                                 local_offset + (offset - local_start),
5581                                                 upl_size, upl, page_list, count, flags);
5582                         vm_map_deallocate(submap);
5583
5584                         return ret;
5585                 }
5586
5587                 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) {
5588                         if (*upl_size > MAX_UPL_SIZE_BYTES)
5589                                 *upl_size = MAX_UPL_SIZE_BYTES;
5590                 }
5591                 /*
5592                  *      Create an object if necessary.
5593                  */
5594                 if (entry->object.vm_object == VM_OBJECT_NULL) {
5595
5596                         if (vm_map_lock_read_to_write(map))
5597                                 goto REDISCOVER_ENTRY;
5598
5599                         entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start));
5600                         entry->offset = 0;
5601
5602                         vm_map_lock_write_to_read(map);
5603                 }
5604                 if (!(caller_flags & UPL_COPYOUT_FROM)) {
5605                         if (!(entry->protection & VM_PROT_WRITE)) {
5606                                 vm_map_unlock_read(map);
5607                                 return KERN_PROTECTION_FAILURE;
5608                         }
5609                 }
5610
5611                 local_object = entry->object.vm_object;
5612                 if (vm_map_entry_should_cow_for_true_share(entry) &&
5613                     local_object->vo_size > *upl_size &&
5614                     *upl_size != 0) {
5615                         vm_prot_t       prot;
5616
5617                         /*
5618                          * Set up the targeted range for copy-on-write to avoid
5619                          * applying true_share/copy_delay to the entire object.
5620                          */
5621
5622                         if (vm_map_lock_read_to_write(map)) {
5623                                 goto REDISCOVER_ENTRY;
5624                         }
5625
5626                         vm_map_clip_start(map,
5627                                           entry,
5628                                           vm_map_trunc_page(offset,
5629                                                             VM_MAP_PAGE_MASK(map)));
5630                         vm_map_clip_end(map,
5631                                         entry,
5632                                         vm_map_round_page(offset + *upl_size,
5633                                                           VM_MAP_PAGE_MASK(map)));
5634                         if ((entry->vme_end - offset) < *upl_size) {
5635                                 *upl_size = (upl_size_t) (entry->vme_end - offset);
5636                                 assert(*upl_size == entry->vme_end - offset);
5637                         }
5638
5639                         prot = entry->protection & ~VM_PROT_WRITE;
5640                         if (override_nx(map, entry->alias) && prot)
5641                                 prot |= VM_PROT_EXECUTE;
5642                         vm_object_pmap_protect(local_object,
5643                                                entry->offset,
5644                                                entry->vme_end - entry->vme_start,
5645                                                ((entry->is_shared || map->mapped_in_other_pmaps)
5646                                                 ? PMAP_NULL
5647                                                 : map->pmap),
5648                                                entry->vme_start,
5649                                                prot);
5650                         entry->needs_copy = TRUE;
5651
5652                         vm_map_lock_write_to_read(map);
5653                 }
5654
5655                 if (entry->needs_copy)  {
5656                         /*
5657                          * Honor copy-on-write for COPY_SYMMETRIC
5658                          * strategy.
5659                          */
5660                         vm_map_t                local_map;
5661                         vm_object_t             object;
5662                         vm_object_offset_t      new_offset;
5663                         vm_prot_t               prot;
5664                         boolean_t               wired;
5665                         vm_map_version_t        version;
5666                         vm_map_t                real_map;
5667                         vm_prot_t               fault_type;
5668
5669                         local_map = map;
5670
5671                         if (caller_flags & UPL_COPYOUT_FROM) {
5672                                 fault_type = VM_PROT_READ | VM_PROT_COPY;
5673                                 vm_counters.create_upl_extra_cow++;
5674                                 vm_counters.create_upl_extra_cow_pages += (entry->vme_end - entry->vme_start) / PAGE_SIZE;
5675                         } else {
5676                                 fault_type = VM_PROT_WRITE;
5677                         }
5678                         if (vm_map_lookup_locked(&local_map,
5679                                                  offset, fault_type,
5680                                                  OBJECT_LOCK_EXCLUSIVE,
5681                                                  &version, &object,
5682                                                  &new_offset, &prot, &wired,
5683                                                  NULL,
5684                                                  &real_map) != KERN_SUCCESS) {
5685                                 if (fault_type == VM_PROT_WRITE) {
5686                                         vm_counters.create_upl_lookup_failure_write++;
5687                                 } else {
5688                                         vm_counters.create_upl_lookup_failure_copy++;
5689                                 }
5690                                 vm_map_unlock_read(local_map);
5691                                 return KERN_FAILURE;
5692                         }
5693                         if (real_map != map)
5694                                 vm_map_unlock(real_map);
5695                         vm_map_unlock_read(local_map);
5696
5697                         vm_object_unlock(object);
5698
5699                         goto REDISCOVER_ENTRY;
5700                 }
5701
5702                 if (sync_cow_data) {
5703                         if (entry->object.vm_object->shadow || entry->object.vm_object->copy) {
5704                                 local_object = entry->object.vm_object;
5705                                 local_start = entry->vme_start;
5706                                 local_offset = entry->offset;
5707
5708                                 vm_object_reference(local_object);
5709                                 vm_map_unlock_read(map);
5710
5711                                 if (local_object->shadow && local_object->copy) {
5712                                         vm_object_lock_request(
5713                                                                local_object->shadow,
5714                                                                (vm_object_offset_t)
5715                                                                ((offset - local_start) +
5716                                                                 local_offset) +
5717                                                                local_object->vo_shadow_offset,
5718                                                                *upl_size, FALSE,
5719                                                                MEMORY_OBJECT_DATA_SYNC,
5720                                                                VM_PROT_NO_CHANGE);
5721                                 }
5722                                 sync_cow_data = FALSE;
5723                                 vm_object_deallocate(local_object);
5724
5725                                 goto REDISCOVER_ENTRY;
5726                         }
5727                 }
5728                 if (force_data_sync) {
5729                         local_object = entry->object.vm_object;
5730                         local_start = entry->vme_start;
5731                         local_offset = entry->offset;
5732
5733                         vm_object_reference(local_object);
5734                         vm_map_unlock_read(map);
5735
5736                         vm_object_lock_request(
5737                                                local_object,
5738                                                (vm_object_offset_t)
5739                                                ((offset - local_start) + local_offset),
5740                                                (vm_object_size_t)*upl_size, FALSE,
5741                                                MEMORY_OBJECT_DATA_SYNC,
5742                                                VM_PROT_NO_CHANGE);
5743
5744                         force_data_sync = FALSE;
5745                         vm_object_deallocate(local_object);
5746
5747                         goto REDISCOVER_ENTRY;
5748                 }
5749                 if (entry->object.vm_object->private)
5750                         *flags = UPL_DEV_MEMORY;
5751                 else
5752                         *flags = 0;
5753
5754                 if (entry->object.vm_object->phys_contiguous)
5755                         *flags |= UPL_PHYS_CONTIG;
5756
5757                 local_object = entry->object.vm_object;
5758                 local_offset = entry->offset;
5759                 local_start = entry->vme_start;
5760
5761                 vm_object_reference(local_object);
5762                 vm_map_unlock_read(map);
5763
5764                 ret = vm_object_iopl_request(local_object,
5765                                               (vm_object_offset_t) ((offset - local_start) + local_offset),
5766                                               *upl_size,
5767                                               upl,
5768                                               page_list,
5769                                               count,
5770                                               caller_flags);
5771                 vm_object_deallocate(local_object);
5772
5773                 return(ret);
5774         }
5775         vm_map_unlock_read(map);
5776
5777         return(KERN_FAILURE);
5778 }
5779
5780 /*
5781  * Internal routine to enter a UPL into a VM map.
5782  *
5783  * JMM - This should just be doable through the standard
5784  * vm_map_enter() API.
5785  */
5786 kern_return_t
5787 vm_map_enter_upl(
5788         vm_map_t                map,
5789         upl_t                   upl,
5790         vm_map_offset_t         *dst_addr)
5791 {
5792         vm_map_size_t           size;
5793         vm_object_offset_t      offset;
5794         vm_map_offset_t         addr;
5795         vm_page_t               m;
5796         kern_return_t           kr;
5797         int                     isVectorUPL = 0, curr_upl=0;
5798         upl_t                   vector_upl = NULL;
5799         vm_offset_t             vector_upl_dst_addr = 0;
5800         vm_map_t                vector_upl_submap = NULL;
5801         upl_offset_t            subupl_offset = 0;
5802         upl_size_t              subupl_size = 0;
5803
5804         if (upl == UPL_NULL)
5805                 return KERN_INVALID_ARGUMENT;
5806
5807         if((isVectorUPL = vector_upl_is_valid(upl))) {
5808                 int mapped=0,valid_upls=0;
5809                 vector_upl = upl;
5810
5811                 upl_lock(vector_upl);
5812                 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
5813                         upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
5814                         if(upl == NULL)
5815                                 continue;
5816                         valid_upls++;
5817                         if (UPL_PAGE_LIST_MAPPED & upl->flags)
5818                                 mapped++;
5819                 }
5820
5821                 if(mapped) {
5822                         if(mapped != valid_upls)
5823                                 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
5824                         else {
5825                                 upl_unlock(vector_upl);
5826                                 return KERN_FAILURE;
5827                         }
5828                 }
5829
5830                 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap);
5831                 if( kr != KERN_SUCCESS )
5832                         panic("Vector UPL submap allocation failed\n");
5833                 map = vector_upl_submap;
5834                 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
5835                 curr_upl=0;
5836         }
5837         else
5838                 upl_lock(upl);
5839
5840 process_upl_to_enter:
5841         if(isVectorUPL){
5842                 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
5843                         *dst_addr = vector_upl_dst_addr;
5844                         upl_unlock(vector_upl);
5845                         return KERN_SUCCESS;
5846                 }
5847                 upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
5848                 if(upl == NULL)
5849                         goto process_upl_to_enter;
5850
5851                 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
5852                 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
5853         } else {
5854                 /*
5855                  * check to see if already mapped
5856                  */
5857                 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
5858                         upl_unlock(upl);
5859                         return KERN_FAILURE;
5860                 }
5861         }
5862         if ((!(upl->flags & UPL_SHADOWED)) &&
5863             ((upl->flags & UPL_HAS_BUSY) ||
5864              !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
5865
5866                 vm_object_t             object;
5867                 vm_page_t               alias_page;
5868                 vm_object_offset_t      new_offset;
5869                 unsigned int            pg_num;
5870                 wpl_array_t             lite_list;
5871
5872                 if (upl->flags & UPL_INTERNAL) {
5873                         lite_list = (wpl_array_t)
5874                                 ((((uintptr_t)upl) + sizeof(struct upl))
5875                                  + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
5876                 } else {
5877                         lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
5878                 }
5879                 object = upl->map_object;
5880                 upl->map_object = vm_object_allocate(upl->size);
5881
5882                 vm_object_lock(upl->map_object);
5883
5884                 upl->map_object->shadow = object;
5885                 upl->map_object->pageout = TRUE;
5886                 upl->map_object->can_persist = FALSE;
5887                 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5888                 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
5889                 upl->map_object->wimg_bits = object->wimg_bits;
5890                 offset = upl->map_object->vo_shadow_offset;
5891                 new_offset = 0;
5892                 size = upl->size;
5893
5894                 upl->flags |= UPL_SHADOWED;
5895
5896                 while (size) {
5897                         pg_num = (unsigned int) (new_offset / PAGE_SIZE);
5898                         assert(pg_num == new_offset / PAGE_SIZE);
5899
5900                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
5901
5902                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5903
5904                                 vm_object_lock(object);
5905
5906                                 m = vm_page_lookup(object, offset);
5907                                 if (m == VM_PAGE_NULL) {
5908                                         panic("vm_upl_map: page missing\n");
5909                                 }
5910
5911                                 /*
5912                                  * Convert the fictitious page to a private
5913                                  * shadow of the real page.
5914                                  */
5915                                 assert(alias_page->fictitious);
5916                                 alias_page->fictitious = FALSE;
5917                                 alias_page->private = TRUE;
5918                                 alias_page->pageout = TRUE;
5919                                 /*
5920                                  * since m is a page in the upl it must
5921                                  * already be wired or BUSY, so it's
5922                                  * safe to assign the underlying physical
5923                                  * page to the alias
5924                                  */
5925                                 alias_page->phys_page = m->phys_page;
5926
5927                                 vm_object_unlock(object);
5928
5929                                 vm_page_lockspin_queues();
5930                                 vm_page_wire(alias_page);
5931                                 vm_page_unlock_queues();
5932
5933                                 /*
5934                                  * ENCRYPTED SWAP:
5935                                  * The virtual page ("m") has to be wired in some way
5936                                  * here or its physical page ("m->phys_page") could
5937                                  * be recycled at any time.
5938                                  * Assuming this is enforced by the caller, we can't
5939                                  * get an encrypted page here.  Since the encryption
5940                                  * key depends on the VM page's "pager" object and
5941                                  * the "paging_offset", we couldn't handle 2 pageable
5942                                  * VM pages (with different pagers and paging_offsets)
5943                                  * sharing the same physical page:  we could end up
5944                                  * encrypting with one key (via one VM page) and
5945                                  * decrypting with another key (via the alias VM page).
5946                                  */
5947                                 ASSERT_PAGE_DECRYPTED(m);
5948
5949                                 vm_page_insert(alias_page, upl->map_object, new_offset);
5950
5951                                 assert(!alias_page->wanted);
5952                                 alias_page->busy = FALSE;
5953                                 alias_page->absent = FALSE;
5954                         }
5955                         size -= PAGE_SIZE;
5956                         offset += PAGE_SIZE_64;
5957                         new_offset += PAGE_SIZE_64;
5958                 }
5959                 vm_object_unlock(upl->map_object);
5960         }
5961         if (upl->flags & UPL_SHADOWED)
5962                 offset = 0;
5963         else
5964                 offset = upl->offset - upl->map_object->paging_offset;
5965
5966         size = upl->size;
5967
5968         vm_object_reference(upl->map_object);
5969
5970         if(!isVectorUPL) {
5971                 *dst_addr = 0;
5972                 /*
5973                 * NEED A UPL_MAP ALIAS
5974                 */
5975                 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
5976                                   VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
5977                                   VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
5978
5979                 if (kr != KERN_SUCCESS) {
5980                         upl_unlock(upl);
5981                         return(kr);
5982                 }
5983         }
5984         else {
5985                 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
5986                                   VM_FLAGS_FIXED, upl->map_object, offset, FALSE,
5987                                   VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
5988                 if(kr)
5989                         panic("vm_map_enter failed for a Vector UPL\n");
5990         }
5991         vm_object_lock(upl->map_object);
5992
5993         for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
5994                 m = vm_page_lookup(upl->map_object, offset);
5995
5996                 if (m) {
5997                         m->pmapped = TRUE;
5998
5999                         /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6000                          * but only in kernel space. If this was on a user map,
6001                          * we'd have to set the wpmapped bit. */
6002                         /* m->wpmapped = TRUE; */
6003                         assert(map->pmap == kernel_pmap);
6004
6005                         PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE);
6006                 }
6007                 offset += PAGE_SIZE_64;
6008         }
6009         vm_object_unlock(upl->map_object);
6010
6011         /*
6012          * hold a reference for the mapping
6013          */
6014         upl->ref_count++;
6015         upl->flags |= UPL_PAGE_LIST_MAPPED;
6016         upl->kaddr = (vm_offset_t) *dst_addr;
6017         assert(upl->kaddr == *dst_addr);
6018
6019         if(isVectorUPL)
6020                 goto process_upl_to_enter;
6021
6022         upl_unlock(upl);
6023
6024         return KERN_SUCCESS;
6025 }
6026
6027 /*
6028  * Internal routine to remove a UPL mapping from a VM map.
6029  *
6030  * XXX - This should just be doable through a standard
6031  * vm_map_remove() operation.  Otherwise, implicit clean-up
6032  * of the target map won't be able to correctly remove
6033  * these (and release the reference on the UPL).  Having
6034  * to do this means we can't map these into user-space
6035  * maps yet.
6036  */
6037 kern_return_t
6038 vm_map_remove_upl(
6039         vm_map_t        map,
6040         upl_t           upl)
6041 {
6042         vm_address_t    addr;
6043         upl_size_t      size;
6044         int             isVectorUPL = 0, curr_upl = 0;
6045         upl_t           vector_upl = NULL;
6046
6047         if (upl == UPL_NULL)
6048                 return KERN_INVALID_ARGUMENT;
6049
6050         if((isVectorUPL = vector_upl_is_valid(upl))) {
6051                 int     unmapped=0, valid_upls=0;
6052                 vector_upl = upl;
6053                 upl_lock(vector_upl);
6054                 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6055                         upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
6056                         if(upl == NULL)
6057                                 continue;
6058                         valid_upls++;
6059                         if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6060                                 unmapped++;
6061                 }
6062
6063                 if(unmapped) {
6064                         if(unmapped != valid_upls)
6065                                 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6066                         else {
6067                                 upl_unlock(vector_upl);
6068                                 return KERN_FAILURE;
6069                         }
6070                 }
6071                 curr_upl=0;
6072         }
6073         else
6074                 upl_lock(upl);
6075
6076 process_upl_to_remove:
6077         if(isVectorUPL) {
6078                 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6079                         vm_map_t v_upl_submap;
6080                         vm_offset_t v_upl_submap_dst_addr;
6081                         vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6082
6083                         vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
6084                         vm_map_deallocate(v_upl_submap);
6085                         upl_unlock(vector_upl);
6086                         return KERN_SUCCESS;
6087                 }
6088
6089                 upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6090                 if(upl == NULL)
6091                         goto process_upl_to_remove;
6092         }
6093
6094         if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6095                 addr = upl->kaddr;
6096                 size = upl->size;
6097
6098                 assert(upl->ref_count > 1);
6099                 upl->ref_count--;               /* removing mapping ref */
6100
6101                 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6102                 upl->kaddr = (vm_offset_t) 0;
6103
6104                 if(!isVectorUPL) {
6105                         upl_unlock(upl);
6106
6107                         vm_map_remove(
6108                                 map,
6109                                 vm_map_trunc_page(addr,
6110                                                   VM_MAP_PAGE_MASK(map)),
6111                                 vm_map_round_page(addr + size,
6112                                                   VM_MAP_PAGE_MASK(map)),
6113                                 VM_MAP_NO_FLAGS);
6114
6115                         return KERN_SUCCESS;
6116                 }
6117                 else {
6118                         /*
6119                         * If it's a Vectored UPL, we'll be removing the entire
6120                         * submap anyways, so no need to remove individual UPL
6121                         * element mappings from within the submap
6122                         */
6123                         goto process_upl_to_remove;
6124                 }
6125         }
6126         upl_unlock(upl);
6127
6128         return KERN_FAILURE;
6129 }
6130
6131 kern_return_t
6132 upl_commit_range(
6133         upl_t                   upl,
6134         upl_offset_t            offset,
6135         upl_size_t              size,
6136         int                     flags,
6137         upl_page_info_t         *page_list,
6138         mach_msg_type_number_t  count,
6139         boolean_t               *empty)
6140 {
6141         upl_size_t              xfer_size, subupl_size = size;
6142         vm_object_t             shadow_object;
6143         vm_object_t             object;
6144         vm_object_offset_t      target_offset;
6145         upl_offset_t            subupl_offset = offset;
6146         int                     entry;
6147         wpl_array_t             lite_list;
6148         int                     occupied;
6149         int                     clear_refmod = 0;
6150         int                     pgpgout_count = 0;
6151         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6152         struct  vm_page_delayed_work    *dwp;
6153         int                     dw_count;
6154         int                     dw_limit;
6155         int                     isVectorUPL = 0;
6156         upl_t                   vector_upl = NULL;
6157         boolean_t               should_be_throttled = FALSE;
6158
6159         vm_page_t               nxt_page = VM_PAGE_NULL;
6160         int                     fast_path_possible = 0;
6161         int                     fast_path_full_commit = 0;
6162         int                     throttle_page = 0;
6163         int                     unwired_count = 0;
6164         int                     local_queue_count = 0;
6165         queue_head_t            local_queue;
6166
6167         *empty = FALSE;
6168
6169         if (upl == UPL_NULL)
6170                 return KERN_INVALID_ARGUMENT;
6171
6172         if (count == 0)
6173                 page_list = NULL;
6174
6175         if((isVectorUPL = vector_upl_is_valid(upl))) {
6176                 vector_upl = upl;
6177                 upl_lock(vector_upl);
6178         }
6179         else
6180                 upl_lock(upl);
6181
6182 process_upl_to_commit:
6183
6184         if(isVectorUPL) {
6185                 size = subupl_size;
6186                 offset = subupl_offset;
6187                 if(size == 0) {
6188                         upl_unlock(vector_upl);
6189                         return KERN_SUCCESS;
6190                 }
6191                 upl =  vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6192                 if(upl == NULL) {
6193                         upl_unlock(vector_upl);
6194                         return KERN_FAILURE;
6195                 }
6196                 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6197                 subupl_size -= size;
6198                 subupl_offset += size;
6199         }
6200
6201 #if UPL_DEBUG
6202         if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6203                 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6204
6205                 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6206                 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6207
6208                 upl->upl_commit_index++;
6209         }
6210 #endif
6211         if (upl->flags & UPL_DEVICE_MEMORY)
6212                 xfer_size = 0;
6213         else if ((offset + size) <= upl->size)
6214                 xfer_size = size;
6215         else {
6216                 if(!isVectorUPL)
6217                         upl_unlock(upl);
6218                 else {
6219                         upl_unlock(vector_upl);
6220                 }
6221                 return KERN_FAILURE;
6222         }
6223         if (upl->flags & UPL_SET_DIRTY)
6224                 flags |= UPL_COMMIT_SET_DIRTY;
6225         if (upl->flags & UPL_CLEAR_DIRTY)
6226                 flags |= UPL_COMMIT_CLEAR_DIRTY;
6227
6228         if (upl->flags & UPL_INTERNAL)
6229                 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6230                                            + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6231         else
6232                 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6233
6234         object = upl->map_object;
6235
6236         if (upl->flags & UPL_SHADOWED) {
6237                 vm_object_lock(object);
6238                 shadow_object = object->shadow;
6239         } else {
6240                 shadow_object = object;
6241         }
6242         entry = offset/PAGE_SIZE;
6243         target_offset = (vm_object_offset_t)offset;
6244
6245         if (upl->flags & UPL_KERNEL_OBJECT)
6246                 vm_object_lock_shared(shadow_object);
6247         else
6248                 vm_object_lock(shadow_object);
6249
6250         if (upl->flags & UPL_ACCESS_BLOCKED) {
6251                 assert(shadow_object->blocked_access);
6252                 shadow_object->blocked_access = FALSE;
6253                 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6254         }
6255
6256         if (shadow_object->code_signed) {
6257                 /*
6258                  * CODE SIGNING:
6259                  * If the object is code-signed, do not let this UPL tell
6260                  * us if the pages are valid or not.  Let the pages be
6261                  * validated by VM the normal way (when they get mapped or
6262                  * copied).
6263                  */
6264                 flags &= ~UPL_COMMIT_CS_VALIDATED;
6265         }
6266         if (! page_list) {
6267                 /*
6268                  * No page list to get the code-signing info from !?
6269                  */
6270                 flags &= ~UPL_COMMIT_CS_VALIDATED;
6271         }
6272         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal)
6273                 should_be_throttled = TRUE;
6274
6275         dwp = &dw_array[0];
6276         dw_count = 0;
6277         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6278
6279         if ((upl->flags & UPL_IO_WIRE) &&
6280             !(flags & UPL_COMMIT_FREE_ABSENT) &&
6281             !isVectorUPL &&
6282             shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6283             shadow_object->purgable != VM_PURGABLE_EMPTY) {
6284
6285                 if (!queue_empty(&shadow_object->memq)) {
6286                         queue_init(&local_queue);
6287                         if (size == shadow_object->vo_size) {
6288                                 nxt_page = (vm_page_t)queue_first(&shadow_object->memq);
6289                                 fast_path_full_commit = 1;
6290                         }
6291                         fast_path_possible = 1;
6292
6293                         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal &&
6294                             (shadow_object->purgable == VM_PURGABLE_DENY ||
6295                              shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6296                              shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6297                                 throttle_page = 1;
6298                         }
6299                 }
6300         }
6301
6302         while (xfer_size) {
6303                 vm_page_t       t, m;
6304
6305                 dwp->dw_mask = 0;
6306                 clear_refmod = 0;
6307
6308                 m = VM_PAGE_NULL;
6309
6310                 if (upl->flags & UPL_LITE) {
6311                         unsigned int    pg_num;
6312
6313                         if (nxt_page != VM_PAGE_NULL) {
6314                                 m = nxt_page;
6315                                 nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
6316                                 target_offset = m->offset;
6317                         }
6318                         pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6319                         assert(pg_num == target_offset/PAGE_SIZE);
6320
6321                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6322                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6323
6324                                 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6325                                         m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6326                         } else
6327                                 m = NULL;
6328                 }
6329                 if (upl->flags & UPL_SHADOWED) {
6330                         if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6331
6332                                 t->pageout = FALSE;
6333
6334                                 VM_PAGE_FREE(t);
6335
6336                                 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6337                                         m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6338                         }
6339                 }
6340                 if (m == VM_PAGE_NULL)
6341                         goto commit_next_page;
6342
6343                 if (m->compressor) {
6344                         assert(m->busy);
6345
6346                         dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6347                         goto commit_next_page;
6348                 }
6349
6350                 if (flags & UPL_COMMIT_CS_VALIDATED) {
6351                         /*
6352                          * CODE SIGNING:
6353                          * Set the code signing bits according to
6354                          * what the UPL says they should be.
6355                          */
6356                         m->cs_validated = page_list[entry].cs_validated;
6357                         m->cs_tainted = page_list[entry].cs_tainted;
6358                 }
6359                 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6360                         m->written_by_kernel = TRUE;
6361
6362                 if (upl->flags & UPL_IO_WIRE) {
6363
6364                         if (page_list)
6365                                 page_list[entry].phys_addr = 0;
6366
6367                         if (flags & UPL_COMMIT_SET_DIRTY) {
6368                                 SET_PAGE_DIRTY(m, FALSE);
6369                         } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6370                                 m->dirty = FALSE;
6371
6372                                 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6373                                     m->cs_validated && !m->cs_tainted) {
6374                                         /*
6375                                          * CODE SIGNING:
6376                                          * This page is no longer dirty
6377                                          * but could have been modified,
6378                                          * so it will need to be
6379                                          * re-validated.
6380                                          */
6381                                         if (m->slid) {
6382                                                 panic("upl_commit_range(%p): page %p was slid\n",
6383                                                       upl, m);
6384                                         }
6385                                         assert(!m->slid);
6386                                         m->cs_validated = FALSE;
6387 #if DEVELOPMENT || DEBUG
6388                                         vm_cs_validated_resets++;
6389 #endif
6390                                         pmap_disconnect(m->phys_page);
6391                                 }
6392                                 clear_refmod |= VM_MEM_MODIFIED;
6393                         }
6394                         if (upl->flags & UPL_ACCESS_BLOCKED) {
6395                                 /*
6396                                  * We blocked access to the pages in this UPL.
6397                                  * Clear the "busy" bit and wake up any waiter
6398                                  * for this page.
6399                                  */
6400                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6401                         }
6402                         if (fast_path_possible) {
6403                                 assert(m->object->purgable != VM_PURGABLE_EMPTY);
6404                                 assert(m->object->purgable != VM_PURGABLE_VOLATILE);
6405                                 if (m->absent) {
6406                                         assert(m->wire_count == 0);
6407                                         assert(m->busy);
6408
6409                                         m->absent = FALSE;
6410                                         dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6411                                 } else {
6412                                         if (m->wire_count == 0)
6413                                                 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6414
6415                                         /*
6416                                          * XXX FBDP need to update some other
6417                                          * counters here (purgeable_wired_count)
6418                                          * (ledgers), ...
6419                                          */
6420                                         assert(m->wire_count);
6421                                         m->wire_count--;
6422
6423                                         if (m->wire_count == 0)
6424                                                 unwired_count++;
6425                                 }
6426                                 if (m->wire_count == 0) {
6427                                         queue_enter(&local_queue, m, vm_page_t, pageq);
6428                                         local_queue_count++;
6429
6430                                         if (throttle_page) {
6431                                                 m->throttled = TRUE;
6432                                         } else {
6433                                                 if (flags & UPL_COMMIT_INACTIVATE)
6434                                                         m->inactive = TRUE;
6435                                                 else
6436                                                         m->active = TRUE;
6437                                         }
6438                                 }
6439                         } else {
6440                                 if (flags & UPL_COMMIT_INACTIVATE) {
6441                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
6442                                         clear_refmod |= VM_MEM_REFERENCED;
6443                                 }
6444                                 if (m->absent) {
6445                                         if (flags & UPL_COMMIT_FREE_ABSENT)
6446                                                 dwp->dw_mask |= DW_vm_page_free;
6447                                         else {
6448                                                 m->absent = FALSE;
6449                                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6450
6451                                                 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6452                                                         dwp->dw_mask |= DW_vm_page_activate;
6453                                         }
6454                                 } else
6455                                         dwp->dw_mask |= DW_vm_page_unwire;
6456                         }
6457                         goto commit_next_page;
6458                 }
6459                 assert(!m->compressor);
6460
6461                 if (page_list)
6462                         page_list[entry].phys_addr = 0;
6463
6464                 /*
6465                  * make sure to clear the hardware
6466                  * modify or reference bits before
6467                  * releasing the BUSY bit on this page
6468                  * otherwise we risk losing a legitimate
6469                  * change of state
6470                  */
6471                 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6472                         m->dirty = FALSE;
6473
6474                         clear_refmod |= VM_MEM_MODIFIED;
6475                 }
6476                 if (m->laundry)
6477                         dwp->dw_mask |= DW_vm_pageout_throttle_up;
6478
6479                 if (VM_PAGE_WIRED(m))
6480                         m->pageout = FALSE;
6481
6482                 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6483                     m->cs_validated && !m->cs_tainted) {
6484                         /*
6485                          * CODE SIGNING:
6486                          * This page is no longer dirty
6487                          * but could have been modified,
6488                          * so it will need to be
6489                          * re-validated.
6490                          */
6491                         if (m->slid) {
6492                                 panic("upl_commit_range(%p): page %p was slid\n",
6493                                       upl, m);
6494                         }
6495                         assert(!m->slid);
6496                         m->cs_validated = FALSE;
6497 #if DEVELOPMENT || DEBUG
6498                         vm_cs_validated_resets++;
6499 #endif
6500                         pmap_disconnect(m->phys_page);
6501                 }
6502                 if (m->overwriting) {
6503                         /*
6504                          * the (COPY_OUT_FROM == FALSE) request_page_list case
6505                          */
6506                         if (m->busy) {
6507 #if CONFIG_PHANTOM_CACHE
6508                                 if (m->absent && !m->object->internal)
6509                                         dwp->dw_mask |= DW_vm_phantom_cache_update;
6510 #endif
6511                                 m->absent = FALSE;
6512
6513                                 dwp->dw_mask |= DW_clear_busy;
6514                         } else {
6515                                 /*
6516                                  * alternate (COPY_OUT_FROM == FALSE) page_list case
6517                                  * Occurs when the original page was wired
6518                                  * at the time of the list request
6519                                  */
6520                                 assert(VM_PAGE_WIRED(m));
6521
6522                                 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
6523                         }
6524                         m->overwriting = FALSE;
6525                 }
6526                 if (m->encrypted_cleaning == TRUE) {
6527                         m->encrypted_cleaning = FALSE;
6528
6529                         dwp->dw_mask |= DW_clear_busy | DW_PAGE_WAKEUP;
6530                 }
6531                 m->cleaning = FALSE;
6532
6533                 if (m->pageout) {
6534                         /*
6535                          * With the clean queue enabled, UPL_PAGEOUT should
6536                          * no longer set the pageout bit. It's pages now go
6537                          * to the clean queue.
6538                          */
6539                         assert(!(flags & UPL_PAGEOUT));
6540
6541                         m->pageout = FALSE;
6542 #if MACH_CLUSTER_STATS
6543                         if (m->wanted) vm_pageout_target_collisions++;
6544 #endif
6545                         if ((flags & UPL_COMMIT_SET_DIRTY) ||
6546                             (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) {
6547                                 /*
6548                                  * page was re-dirtied after we started
6549                                  * the pageout... reactivate it since
6550                                  * we don't know whether the on-disk
6551                                  * copy matches what is now in memory
6552                                  */
6553                                 SET_PAGE_DIRTY(m, FALSE);
6554
6555                                 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
6556
6557                                 if (upl->flags & UPL_PAGEOUT) {
6558                                         CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
6559                                         VM_STAT_INCR(reactivations);
6560                                         DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
6561                                 }
6562                         } else {
6563                                 /*
6564                                  * page has been successfully cleaned
6565                                  * go ahead and free it for other use
6566                                  */
6567                                 if (m->object->internal) {
6568                                         DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
6569                                 } else {
6570                                         DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
6571                                 }
6572                                 m->dirty = FALSE;
6573                                 m->busy = TRUE;
6574
6575                                 dwp->dw_mask |= DW_vm_page_free;
6576                         }
6577                         goto commit_next_page;
6578                 }
6579 #if MACH_CLUSTER_STATS
6580                 if (m->wpmapped)
6581                         m->dirty = pmap_is_modified(m->phys_page);
6582
6583                 if (m->dirty)   vm_pageout_cluster_dirtied++;
6584                 else            vm_pageout_cluster_cleaned++;
6585                 if (m->wanted)  vm_pageout_cluster_collisions++;
6586 #endif
6587                 /*
6588                  * It is a part of the semantic of COPYOUT_FROM
6589                  * UPLs that a commit implies cache sync
6590                  * between the vm page and the backing store
6591                  * this can be used to strip the precious bit
6592                  * as well as clean
6593                  */
6594                 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
6595                         m->precious = FALSE;
6596
6597                 if (flags & UPL_COMMIT_SET_DIRTY) {
6598                         SET_PAGE_DIRTY(m, FALSE);
6599                 } else {
6600                         m->dirty = FALSE;
6601                 }
6602
6603                 /* with the clean queue on, move *all* cleaned pages to the clean queue */
6604                 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
6605                         pgpgout_count++;
6606
6607                         VM_STAT_INCR(pageouts);
6608                         DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
6609
6610                         dwp->dw_mask |= DW_enqueue_cleaned;
6611                         vm_pageout_enqueued_cleaned_from_inactive_dirty++;
6612                 } else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) {
6613                         /*
6614                          * page coming back in from being 'frozen'...
6615                          * it was dirty before it was frozen, so keep it so
6616                          * the vm_page_activate will notice that it really belongs
6617                          * on the throttle queue and put it there
6618                          */
6619                         SET_PAGE_DIRTY(m, FALSE);
6620                         dwp->dw_mask |= DW_vm_page_activate;
6621
6622                 } else {
6623                         if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
6624                                 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6625                                 clear_refmod |= VM_MEM_REFERENCED;
6626                         } else if (!m->active && !m->inactive && !m->speculative) {
6627
6628                                 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
6629                                         dwp->dw_mask |= DW_vm_page_speculate;
6630                                 else if (m->reference)
6631                                         dwp->dw_mask |= DW_vm_page_activate;
6632                                 else {
6633                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
6634                                         clear_refmod |= VM_MEM_REFERENCED;
6635                                 }
6636                         }
6637                 }
6638                 if (upl->flags & UPL_ACCESS_BLOCKED) {
6639                         /*
6640                          * We blocked access to the pages in this URL.
6641                          * Clear the "busy" bit on this page before we
6642                          * wake up any waiter.
6643                          */
6644                         dwp->dw_mask |= DW_clear_busy;
6645                 }
6646                 /*
6647                  * Wakeup any thread waiting for the page to be un-cleaning.
6648                  */
6649                 dwp->dw_mask |= DW_PAGE_WAKEUP;
6650
6651 commit_next_page:
6652                 if (clear_refmod)
6653                         pmap_clear_refmod(m->phys_page, clear_refmod);
6654
6655                 target_offset += PAGE_SIZE_64;
6656                 xfer_size -= PAGE_SIZE;
6657                 entry++;
6658
6659                 if (dwp->dw_mask) {
6660                         if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
6661                                 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
6662
6663                                 if (dw_count >= dw_limit) {
6664                                         vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
6665
6666                                         dwp = &dw_array[0];
6667                                         dw_count = 0;
6668                                 }
6669                         } else {
6670                                 if (dwp->dw_mask & DW_clear_busy)
6671                                         m->busy = FALSE;
6672
6673                                 if (dwp->dw_mask & DW_PAGE_WAKEUP)
6674                                         PAGE_WAKEUP(m);
6675                         }
6676                 }
6677         }
6678         if (dw_count)
6679                 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
6680
6681         if (fast_path_possible) {
6682
6683                 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
6684                 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
6685
6686                 if (local_queue_count || unwired_count) {
6687
6688                         if (local_queue_count) {
6689                                 vm_page_t       first_local, last_local;
6690                                 vm_page_t       first_target;
6691                                 queue_head_t    *target_queue;
6692
6693                                 if (throttle_page)
6694                                         target_queue = &vm_page_queue_throttled;
6695                                 else {
6696                                         if (flags & UPL_COMMIT_INACTIVATE) {
6697                                                 if (shadow_object->internal)
6698                                                         target_queue = &vm_page_queue_anonymous;
6699                                                 else
6700                                                         target_queue = &vm_page_queue_inactive;
6701                                         } else
6702                                                 target_queue = &vm_page_queue_active;
6703                                 }
6704                                 /*
6705                                  * Transfer the entire local queue to a regular LRU page queues.
6706                                  */
6707                                 first_local = (vm_page_t) queue_first(&local_queue);
6708                                 last_local = (vm_page_t) queue_last(&local_queue);
6709
6710                                 vm_page_lockspin_queues();
6711
6712                                 first_target = (vm_page_t) queue_first(target_queue);
6713
6714                                 if (queue_empty(target_queue))
6715                                         queue_last(target_queue) = (queue_entry_t) last_local;
6716                                 else
6717                                         queue_prev(&first_target->pageq) = (queue_entry_t) last_local;
6718
6719                                 queue_first(target_queue) = (queue_entry_t) first_local;
6720                                 queue_prev(&first_local->pageq) = (queue_entry_t) target_queue;
6721                                 queue_next(&last_local->pageq) = (queue_entry_t) first_target;
6722
6723                                 /*
6724                                  * Adjust the global page counts.
6725                                  */
6726                                 if (throttle_page) {
6727                                         vm_page_throttled_count += local_queue_count;
6728                                 } else {
6729                                         if (flags & UPL_COMMIT_INACTIVATE) {
6730                                                 if (shadow_object->internal)
6731                                                         vm_page_anonymous_count += local_queue_count;
6732                                                 vm_page_inactive_count += local_queue_count;
6733
6734                                                 token_new_pagecount += local_queue_count;
6735                                         } else
6736                                                 vm_page_active_count += local_queue_count;
6737
6738                                         if (shadow_object->internal)
6739                                                 vm_page_pageable_internal_count += local_queue_count;
6740                                         else
6741                                                 vm_page_pageable_external_count += local_queue_count;
6742                                 }
6743                         } else {
6744                                 vm_page_lockspin_queues();
6745                         }
6746                         if (unwired_count) {
6747                                 vm_page_wire_count -= unwired_count;
6748                                 VM_CHECK_MEMORYSTATUS;
6749                         }
6750                         vm_page_unlock_queues();
6751
6752                         shadow_object->wired_page_count -= unwired_count;
6753                 }
6754         }
6755         occupied = 1;
6756
6757         if (upl->flags & UPL_DEVICE_MEMORY)  {
6758                 occupied = 0;
6759         } else if (upl->flags & UPL_LITE) {
6760                 int     pg_num;
6761                 int     i;
6762
6763                 occupied = 0;
6764
6765                 if (!fast_path_full_commit) {
6766                         pg_num = upl->size/PAGE_SIZE;
6767                         pg_num = (pg_num + 31) >> 5;
6768
6769                         for (i = 0; i < pg_num; i++) {
6770                                 if (lite_list[i] != 0) {
6771                                         occupied = 1;
6772                                         break;
6773                                 }
6774                         }
6775                 }
6776         } else {
6777                 if (queue_empty(&upl->map_object->memq))
6778                         occupied = 0;
6779         }
6780         if (occupied == 0) {
6781                 /*
6782                  * If this UPL element belongs to a Vector UPL and is
6783                  * empty, then this is the right function to deallocate
6784                  * it. So go ahead set the *empty variable. The flag
6785                  * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
6786                  * should be considered relevant for the Vector UPL and not
6787                  * the internal UPLs.
6788                  */
6789                 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
6790                         *empty = TRUE;
6791
6792                 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
6793                         /*
6794                          * this is not a paging object
6795                          * so we need to drop the paging reference
6796                          * that was taken when we created the UPL
6797                          * against this object
6798                          */
6799                         vm_object_activity_end(shadow_object);
6800                         vm_object_collapse(shadow_object, 0, TRUE);
6801                 } else {
6802                          /*
6803                           * we dontated the paging reference to
6804                           * the map object... vm_pageout_object_terminate
6805                           * will drop this reference
6806                           */
6807                 }
6808         }
6809         vm_object_unlock(shadow_object);
6810         if (object != shadow_object)
6811                 vm_object_unlock(object);
6812
6813         if(!isVectorUPL)
6814                 upl_unlock(upl);
6815         else {
6816                 /*
6817                  * If we completed our operations on an UPL that is
6818                  * part of a Vectored UPL and if empty is TRUE, then
6819                  * we should go ahead and deallocate this UPL element.
6820                  * Then we check if this was the last of the UPL elements
6821                  * within that Vectored UPL. If so, set empty to TRUE
6822                  * so that in ubc_upl_commit_range or ubc_upl_commit, we
6823                  * can go ahead and deallocate the Vector UPL too.
6824                  */
6825                 if(*empty==TRUE) {
6826                         *empty = vector_upl_set_subupl(vector_upl, upl, 0);
6827                         upl_deallocate(upl);
6828                 }
6829                 goto process_upl_to_commit;
6830         }
6831
6832         if (pgpgout_count) {
6833                 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
6834         }
6835
6836         return KERN_SUCCESS;
6837 }
6838
6839 kern_return_t
6840 upl_abort_range(
6841         upl_t                   upl,
6842         upl_offset_t            offset,
6843         upl_size_t              size,
6844         int                     error,
6845         boolean_t               *empty)
6846 {
6847         upl_page_info_t         *user_page_list = NULL;
6848         upl_size_t              xfer_size, subupl_size = size;
6849         vm_object_t             shadow_object;
6850         vm_object_t             object;
6851         vm_object_offset_t      target_offset;
6852         upl_offset_t            subupl_offset = offset;
6853         int                     entry;
6854         wpl_array_t             lite_list;
6855         int                     occupied;
6856         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6857         struct  vm_page_delayed_work    *dwp;
6858         int                     dw_count;
6859         int                     dw_limit;
6860         int                     isVectorUPL = 0;
6861         upl_t                   vector_upl = NULL;
6862
6863         *empty = FALSE;
6864
6865         if (upl == UPL_NULL)
6866                 return KERN_INVALID_ARGUMENT;
6867
6868         if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
6869                 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
6870
6871         if((isVectorUPL = vector_upl_is_valid(upl))) {
6872                 vector_upl = upl;
6873                 upl_lock(vector_upl);
6874         }
6875         else
6876                 upl_lock(upl);
6877
6878 process_upl_to_abort:
6879         if(isVectorUPL) {
6880                 size = subupl_size;
6881                 offset = subupl_offset;
6882                 if(size == 0) {
6883                         upl_unlock(vector_upl);
6884                         return KERN_SUCCESS;
6885                 }
6886                 upl =  vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6887                 if(upl == NULL) {
6888                         upl_unlock(vector_upl);
6889                         return KERN_FAILURE;
6890                 }
6891                 subupl_size -= size;
6892                 subupl_offset += size;
6893         }
6894
6895         *empty = FALSE;
6896
6897 #if UPL_DEBUG
6898         if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6899                 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6900
6901                 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6902                 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6903                 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
6904
6905                 upl->upl_commit_index++;
6906         }
6907 #endif
6908         if (upl->flags & UPL_DEVICE_MEMORY)
6909                 xfer_size = 0;
6910         else if ((offset + size) <= upl->size)
6911                 xfer_size = size;
6912         else {
6913                 if(!isVectorUPL)
6914                         upl_unlock(upl);
6915                 else {
6916                         upl_unlock(vector_upl);
6917                 }
6918
6919                 return KERN_FAILURE;
6920         }
6921         if (upl->flags & UPL_INTERNAL) {
6922                 lite_list = (wpl_array_t)
6923                         ((((uintptr_t)upl) + sizeof(struct upl))
6924                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6925
6926                 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
6927         } else {
6928                 lite_list = (wpl_array_t)
6929                         (((uintptr_t)upl) + sizeof(struct upl));
6930         }
6931         object = upl->map_object;
6932
6933         if (upl->flags & UPL_SHADOWED) {
6934                 vm_object_lock(object);
6935                 shadow_object = object->shadow;
6936         } else
6937                 shadow_object = object;
6938
6939         entry = offset/PAGE_SIZE;
6940         target_offset = (vm_object_offset_t)offset;
6941
6942         if (upl->flags & UPL_KERNEL_OBJECT)
6943                 vm_object_lock_shared(shadow_object);
6944         else
6945                 vm_object_lock(shadow_object);
6946
6947         if (upl->flags & UPL_ACCESS_BLOCKED) {
6948                 assert(shadow_object->blocked_access);
6949                 shadow_object->blocked_access = FALSE;
6950                 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6951         }
6952
6953         dwp = &dw_array[0];
6954         dw_count = 0;
6955         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6956
6957         if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
6958                 panic("upl_abort_range: kernel_object being DUMPED");
6959
6960         while (xfer_size) {
6961                 vm_page_t       t, m;
6962                 unsigned int    pg_num;
6963                 boolean_t       needed;
6964
6965                 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6966                 assert(pg_num == target_offset/PAGE_SIZE);
6967
6968                 needed = FALSE;
6969
6970                 if (user_page_list)
6971                         needed = user_page_list[pg_num].needed;
6972
6973                 dwp->dw_mask = 0;
6974                 m = VM_PAGE_NULL;
6975
6976                 if (upl->flags & UPL_LITE) {
6977
6978                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6979                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6980
6981                                 if ( !(upl->flags & UPL_KERNEL_OBJECT))
6982                                         m = vm_page_lookup(shadow_object, target_offset +
6983                                                            (upl->offset - shadow_object->paging_offset));
6984                         }
6985                 }
6986                 if (upl->flags & UPL_SHADOWED) {
6987                         if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6988                                 t->pageout = FALSE;
6989
6990                                 VM_PAGE_FREE(t);
6991
6992                                 if (m == VM_PAGE_NULL)
6993                                         m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6994                         }
6995                 }
6996                 if ((upl->flags & UPL_KERNEL_OBJECT))
6997                         goto abort_next_page;
6998
6999                 if (m != VM_PAGE_NULL) {
7000
7001                         assert(!m->compressor);
7002
7003                         if (m->absent) {
7004                                 boolean_t must_free = TRUE;
7005
7006                                 /*
7007                                  * COPYOUT = FALSE case
7008                                  * check for error conditions which must
7009                                  * be passed back to the pages customer
7010                                  */
7011                                 if (error & UPL_ABORT_RESTART) {
7012                                         m->restart = TRUE;
7013                                         m->absent = FALSE;
7014                                         m->unusual = TRUE;
7015                                         must_free = FALSE;
7016                                 } else if (error & UPL_ABORT_UNAVAILABLE) {
7017                                         m->restart = FALSE;
7018                                         m->unusual = TRUE;
7019                                         must_free = FALSE;
7020                                 } else if (error & UPL_ABORT_ERROR) {
7021                                         m->restart = FALSE;
7022                                         m->absent = FALSE;
7023                                         m->error = TRUE;
7024                                         m->unusual = TRUE;
7025                                         must_free = FALSE;
7026                                 }
7027                                 if (m->clustered && needed == FALSE) {
7028                                         /*
7029                                          * This page was a part of a speculative
7030                                          * read-ahead initiated by the kernel
7031                                          * itself.  No one is expecting this
7032                                          * page and no one will clean up its
7033                                          * error state if it ever becomes valid
7034                                          * in the future.
7035                                          * We have to free it here.
7036                                          */
7037                                         must_free = TRUE;
7038                                 }
7039
7040                                 /*
7041                                  * ENCRYPTED SWAP:
7042                                  * If the page was already encrypted,
7043                                  * we don't really need to decrypt it
7044                                  * now.  It will get decrypted later,
7045                                  * on demand, as soon as someone needs
7046                                  * to access its contents.
7047                                  */
7048
7049                                 m->cleaning = FALSE;
7050                                 m->encrypted_cleaning = FALSE;
7051
7052                                 if (m->overwriting && !m->busy) {
7053                                         /*
7054                                          * this shouldn't happen since
7055                                          * this is an 'absent' page, but
7056                                          * it doesn't hurt to check for
7057                                          * the 'alternate' method of
7058                                          * stabilizing the page...
7059                                          * we will mark 'busy' to be cleared
7060                                          * in the following code which will
7061                                          * take care of the primary stabilzation
7062                                          * method (i.e. setting 'busy' to TRUE)
7063                                          */
7064                                         dwp->dw_mask |= DW_vm_page_unwire;
7065                                 }
7066                                 m->overwriting = FALSE;
7067
7068                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7069
7070                                 if (must_free == TRUE)
7071                                         dwp->dw_mask |= DW_vm_page_free;
7072                                 else
7073                                         dwp->dw_mask |= DW_vm_page_activate;
7074                         } else {
7075                                 /*
7076                                  * Handle the trusted pager throttle.
7077                                  */
7078                                 if (m->laundry)
7079                                         dwp->dw_mask |= DW_vm_pageout_throttle_up;
7080
7081                                 if (upl->flags & UPL_ACCESS_BLOCKED) {
7082                                         /*
7083                                          * We blocked access to the pages in this UPL.
7084                                          * Clear the "busy" bit and wake up any waiter
7085                                          * for this page.
7086                                          */
7087                                         dwp->dw_mask |= DW_clear_busy;
7088                                 }
7089                                 if (m->overwriting) {
7090                                         if (m->busy)
7091                                                 dwp->dw_mask |= DW_clear_busy;
7092                                         else {
7093                                                 /*
7094                                                  * deal with the 'alternate' method
7095                                                  * of stabilizing the page...
7096                                                  * we will either free the page
7097                                                  * or mark 'busy' to be cleared
7098                                                  * in the following code which will
7099                                                  * take care of the primary stabilzation
7100                                                  * method (i.e. setting 'busy' to TRUE)
7101                                                  */
7102                                                 dwp->dw_mask |= DW_vm_page_unwire;
7103                                         }
7104                                         m->overwriting = FALSE;
7105                                 }
7106                                 if (m->encrypted_cleaning == TRUE) {
7107                                         m->encrypted_cleaning = FALSE;
7108
7109                                         dwp->dw_mask |= DW_clear_busy;
7110                                 }
7111                                 m->pageout = FALSE;
7112                                 m->cleaning = FALSE;
7113 #if     MACH_PAGEMAP
7114                                 vm_external_state_clr(m->object->existence_map, m->offset);
7115 #endif  /* MACH_PAGEMAP */
7116                                 if (error & UPL_ABORT_DUMP_PAGES) {
7117                                         pmap_disconnect(m->phys_page);
7118
7119                                         dwp->dw_mask |= DW_vm_page_free;
7120                                 } else {
7121                                         if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7122                                                 if (error & UPL_ABORT_REFERENCE) {
7123                                                         /*
7124                                                          * we've been told to explictly
7125                                                          * reference this page... for
7126                                                          * file I/O, this is done by
7127                                                          * implementing an LRU on the inactive q
7128                                                          */
7129                                                         dwp->dw_mask |= DW_vm_page_lru;
7130
7131                                                 } else if (!m->active && !m->inactive && !m->speculative)
7132                                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
7133                                         }
7134                                         dwp->dw_mask |= DW_PAGE_WAKEUP;
7135                                 }
7136                         }
7137                 }
7138 abort_next_page:
7139                 target_offset += PAGE_SIZE_64;
7140                 xfer_size -= PAGE_SIZE;
7141                 entry++;
7142
7143                 if (dwp->dw_mask) {
7144                         if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7145                                 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7146
7147                                 if (dw_count >= dw_limit) {
7148                                         vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
7149
7150                                         dwp = &dw_array[0];
7151                                         dw_count = 0;
7152                                 }
7153                         } else {
7154                                 if (dwp->dw_mask & DW_clear_busy)
7155                                         m->busy = FALSE;
7156
7157                                 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7158                                         PAGE_WAKEUP(m);
7159                         }
7160                 }
7161         }
7162         if (dw_count)
7163                 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
7164
7165         occupied = 1;
7166
7167         if (upl->flags & UPL_DEVICE_MEMORY)  {
7168                 occupied = 0;
7169         } else if (upl->flags & UPL_LITE) {
7170                 int     pg_num;
7171                 int     i;
7172
7173                 pg_num = upl->size/PAGE_SIZE;
7174                 pg_num = (pg_num + 31) >> 5;
7175                 occupied = 0;
7176
7177                 for (i = 0; i < pg_num; i++) {
7178                         if (lite_list[i] != 0) {
7179                                 occupied = 1;
7180                                 break;
7181                         }
7182                 }
7183         } else {
7184                 if (queue_empty(&upl->map_object->memq))
7185                         occupied = 0;
7186         }
7187         if (occupied == 0) {
7188                 /*
7189                  * If this UPL element belongs to a Vector UPL and is
7190                  * empty, then this is the right function to deallocate
7191                  * it. So go ahead set the *empty variable. The flag
7192                  * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7193                  * should be considered relevant for the Vector UPL and
7194                  * not the internal UPLs.
7195                  */
7196                 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7197                         *empty = TRUE;
7198
7199                 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7200                         /*
7201                          * this is not a paging object
7202                          * so we need to drop the paging reference
7203                          * that was taken when we created the UPL
7204                          * against this object
7205                          */
7206                         vm_object_activity_end(shadow_object);
7207                         vm_object_collapse(shadow_object, 0, TRUE);
7208                 } else {
7209                          /*
7210                           * we dontated the paging reference to
7211                           * the map object... vm_pageout_object_terminate
7212                           * will drop this reference
7213                           */
7214                 }
7215         }
7216         vm_object_unlock(shadow_object);
7217         if (object != shadow_object)
7218                 vm_object_unlock(object);
7219
7220         if(!isVectorUPL)
7221                 upl_unlock(upl);
7222         else {
7223                 /*
7224                 * If we completed our operations on an UPL that is
7225                 * part of a Vectored UPL and if empty is TRUE, then
7226                 * we should go ahead and deallocate this UPL element.
7227                 * Then we check if this was the last of the UPL elements
7228                 * within that Vectored UPL. If so, set empty to TRUE
7229                 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7230                 * can go ahead and deallocate the Vector UPL too.
7231                 */
7232                 if(*empty == TRUE) {
7233                         *empty = vector_upl_set_subupl(vector_upl, upl,0);
7234                         upl_deallocate(upl);
7235                 }
7236                 goto process_upl_to_abort;
7237         }
7238
7239         return KERN_SUCCESS;
7240 }
7241
7242
7243 kern_return_t
7244 upl_abort(
7245         upl_t   upl,
7246         int     error)
7247 {
7248         boolean_t       empty;
7249
7250         return upl_abort_range(upl, 0, upl->size, error, &empty);
7251 }
7252
7253
7254 /* an option on commit should be wire */
7255 kern_return_t
7256 upl_commit(
7257         upl_t                   upl,
7258         upl_page_info_t         *page_list,
7259         mach_msg_type_number_t  count)
7260 {
7261         boolean_t       empty;
7262
7263         return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7264 }
7265
7266
7267 void
7268 iopl_valid_data(
7269         upl_t   upl)
7270 {
7271         vm_object_t     object;
7272         vm_offset_t     offset;
7273         vm_page_t       m, nxt_page = VM_PAGE_NULL;
7274         upl_size_t      size;
7275         int             wired_count = 0;
7276
7277         if (upl == NULL)
7278                 panic("iopl_valid_data: NULL upl");
7279         if (vector_upl_is_valid(upl))
7280                 panic("iopl_valid_data: vector upl");
7281         if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7282                 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7283
7284         object = upl->map_object;
7285
7286         if (object == kernel_object || object == compressor_object)
7287                 panic("iopl_valid_data: object == kernel or compressor");
7288
7289         if (object->purgable == VM_PURGABLE_VOLATILE)
7290                 panic("iopl_valid_data: object == VM_PURGABLE_VOLATILE");
7291
7292         size = upl->size;
7293
7294         vm_object_lock(object);
7295
7296         if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7297                 nxt_page = (vm_page_t)queue_first(&object->memq);
7298         else
7299                 offset = 0 + upl->offset - object->paging_offset;
7300
7301         while (size) {
7302
7303                 if (nxt_page != VM_PAGE_NULL) {
7304                         m = nxt_page;
7305                         nxt_page = (vm_page_t)queue_next(&nxt_page->listq);
7306                 } else {
7307                         m = vm_page_lookup(object, offset);
7308                         offset += PAGE_SIZE;
7309
7310                         if (m == VM_PAGE_NULL)
7311                                 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7312                 }
7313                 if (m->busy) {
7314                         if (!m->absent)
7315                                 panic("iopl_valid_data: busy page w/o absent");
7316
7317                         if (m->pageq.next || m->pageq.prev)
7318                                 panic("iopl_valid_data: busy+absent page on page queue");
7319
7320                         m->absent = FALSE;
7321                         m->dirty = TRUE;
7322                         m->wire_count++;
7323                         wired_count++;
7324
7325                         PAGE_WAKEUP_DONE(m);
7326                 }
7327                 size -= PAGE_SIZE;
7328         }
7329         if (wired_count) {
7330                 object->wired_page_count += wired_count;
7331
7332                 vm_page_lockspin_queues();
7333                 vm_page_wire_count += wired_count;
7334                 vm_page_unlock_queues();
7335         }
7336         vm_object_unlock(object);
7337 }
7338
7339
7340
7341
7342 void
7343 vm_object_set_pmap_cache_attr(
7344                 vm_object_t             object,
7345                 upl_page_info_array_t   user_page_list,
7346                 unsigned int            num_pages,
7347                 boolean_t               batch_pmap_op)
7348 {
7349         unsigned int    cache_attr = 0;
7350
7351         cache_attr = object->wimg_bits & VM_WIMG_MASK;
7352         assert(user_page_list);
7353         if (cache_attr != VM_WIMG_USE_DEFAULT) {
7354                 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7355         }
7356 }
7357
7358 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
7359
7360 kern_return_t
7361 vm_object_iopl_request(
7362         vm_object_t             object,
7363         vm_object_offset_t      offset,
7364         upl_size_t              size,
7365         upl_t                   *upl_ptr,
7366         upl_page_info_array_t   user_page_list,
7367         unsigned int            *page_list_count,
7368         int                     cntrl_flags)
7369 {
7370         vm_page_t               dst_page;
7371         vm_object_offset_t      dst_offset;
7372         upl_size_t              xfer_size;
7373         upl_t                   upl = NULL;
7374         unsigned int            entry;
7375         wpl_array_t             lite_list = NULL;
7376         int                     no_zero_fill = FALSE;
7377         unsigned int            size_in_pages;
7378         u_int32_t               psize;
7379         kern_return_t           ret;
7380         vm_prot_t               prot;
7381         struct vm_object_fault_info fault_info;
7382         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7383         struct  vm_page_delayed_work    *dwp;
7384         int                     dw_count;
7385         int                     dw_limit;
7386         int                     dw_index;
7387         boolean_t               caller_lookup;
7388         int                     io_tracking_flag = 0;
7389         int                     interruptible;
7390
7391         boolean_t               set_cache_attr_needed = FALSE;
7392         boolean_t               free_wired_pages = FALSE;
7393         int                     fast_path_possible = 0;
7394
7395
7396         if (cntrl_flags & ~UPL_VALID_FLAGS) {
7397                 /*
7398                  * For forward compatibility's sake,
7399                  * reject any unknown flag.
7400                  */
7401                 return KERN_INVALID_VALUE;
7402         }
7403         if (vm_lopage_needed == FALSE)
7404                 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
7405
7406         if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
7407                 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
7408                         return KERN_INVALID_VALUE;
7409
7410                 if (object->phys_contiguous) {
7411                         if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
7412                                 return KERN_INVALID_ADDRESS;
7413
7414                         if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
7415                                 return KERN_INVALID_ADDRESS;
7416                 }
7417         }
7418
7419         if (cntrl_flags & UPL_ENCRYPT) {
7420                 /*
7421                  * ENCRYPTED SWAP:
7422                  * The paging path doesn't use this interface,
7423                  * so we don't support the UPL_ENCRYPT flag
7424                  * here.  We won't encrypt the pages.
7425                  */
7426                 assert(! (cntrl_flags & UPL_ENCRYPT));
7427         }
7428         if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7429                 no_zero_fill = TRUE;
7430
7431         if (cntrl_flags & UPL_COPYOUT_FROM)
7432                 prot = VM_PROT_READ;
7433         else
7434                 prot = VM_PROT_READ | VM_PROT_WRITE;
7435
7436         if ((!object->internal) && (object->paging_offset != 0))
7437                 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
7438
7439 #if CONFIG_IOSCHED || UPL_DEBUG
7440         if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
7441                 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
7442 #endif
7443
7444 #if CONFIG_IOSCHED
7445         if (object->io_tracking) {
7446                 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
7447                 if (object != kernel_object)
7448                         io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
7449         }
7450 #endif
7451
7452         if (object->phys_contiguous)
7453                 psize = PAGE_SIZE;
7454         else
7455                 psize = size;
7456
7457         if (cntrl_flags & UPL_SET_INTERNAL) {
7458                 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
7459
7460                 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7461                 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
7462                                            ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
7463                 if (size == 0) {
7464                         user_page_list = NULL;
7465                         lite_list = NULL;
7466                 }
7467         } else {
7468                 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
7469
7470                 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
7471                 if (size == 0) {
7472                         lite_list = NULL;
7473                 }
7474         }
7475         if (user_page_list)
7476                 user_page_list[0].device = FALSE;
7477         *upl_ptr = upl;
7478
7479         upl->map_object = object;
7480         upl->size = size;
7481
7482         size_in_pages = size / PAGE_SIZE;
7483
7484         if (object == kernel_object &&
7485             !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
7486                 upl->flags |= UPL_KERNEL_OBJECT;
7487 #if UPL_DEBUG
7488                 vm_object_lock(object);
7489 #else
7490                 vm_object_lock_shared(object);
7491 #endif
7492         } else {
7493                 vm_object_lock(object);
7494                 vm_object_activity_begin(object);
7495         }
7496         /*
7497          * paging in progress also protects the paging_offset
7498          */
7499         upl->offset = offset + object->paging_offset;
7500
7501         if (cntrl_flags & UPL_BLOCK_ACCESS) {
7502                 /*
7503                  * The user requested that access to the pages in this UPL
7504                  * be blocked until the UPL is commited or aborted.
7505                  */
7506                 upl->flags |= UPL_ACCESS_BLOCKED;
7507         }
7508
7509         if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
7510             object->purgable != VM_PURGABLE_VOLATILE &&
7511             object->purgable != VM_PURGABLE_EMPTY &&
7512             object->copy == NULL &&
7513             size == object->vo_size &&
7514             offset == 0 &&
7515             object->resident_page_count == 0 &&
7516             object->shadow == NULL &&
7517             object->pager == NULL)
7518         {
7519                 fast_path_possible = 1;
7520                 set_cache_attr_needed = TRUE;
7521         }
7522
7523 #if CONFIG_IOSCHED || UPL_DEBUG
7524         if (upl->flags & UPL_TRACKED_BY_OBJECT) {
7525                 vm_object_activity_begin(object);
7526                 queue_enter(&object->uplq, upl, upl_t, uplq);
7527         }
7528 #endif
7529
7530         if (object->phys_contiguous) {
7531
7532                 if (upl->flags & UPL_ACCESS_BLOCKED) {
7533                         assert(!object->blocked_access);
7534                         object->blocked_access = TRUE;
7535                 }
7536
7537                 vm_object_unlock(object);
7538
7539                 /*
7540                  * don't need any shadow mappings for this one
7541                  * since it is already I/O memory
7542                  */
7543                 upl->flags |= UPL_DEVICE_MEMORY;
7544
7545                 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
7546
7547                 if (user_page_list) {
7548                         user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
7549                         user_page_list[0].device = TRUE;
7550                 }
7551                 if (page_list_count != NULL) {
7552                         if (upl->flags & UPL_INTERNAL)
7553                                 *page_list_count = 0;
7554                         else
7555                                 *page_list_count = 1;
7556                 }
7557                 return KERN_SUCCESS;
7558         }
7559         if (object != kernel_object && object != compressor_object) {
7560                 /*
7561                  * Protect user space from future COW operations
7562                  */
7563 #if VM_OBJECT_TRACKING_OP_TRUESHARE
7564                 if (!object->true_share &&
7565                     vm_object_tracking_inited) {
7566                         void *bt[VM_OBJECT_TRACKING_BTDEPTH];
7567                         int num = 0;
7568
7569                         num = OSBacktrace(bt,
7570                                           VM_OBJECT_TRACKING_BTDEPTH);
7571                         btlog_add_entry(vm_object_tracking_btlog,
7572                                         object,
7573                                         VM_OBJECT_TRACKING_OP_TRUESHARE,
7574                                         bt,
7575                                         num);
7576                 }
7577 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
7578
7579                 object->true_share = TRUE;
7580
7581                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
7582                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7583         }
7584
7585         if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
7586             object->copy != VM_OBJECT_NULL) {
7587                 /*
7588                  * Honor copy-on-write obligations
7589                  *
7590                  * The caller is gathering these pages and
7591                  * might modify their contents.  We need to
7592                  * make sure that the copy object has its own
7593                  * private copies of these pages before we let
7594                  * the caller modify them.
7595                  *
7596                  * NOTE: someone else could map the original object
7597                  * after we've done this copy-on-write here, and they
7598                  * could then see an inconsistent picture of the memory
7599                  * while it's being modified via the UPL.  To prevent this,
7600                  * we would have to block access to these pages until the
7601                  * UPL is released.  We could use the UPL_BLOCK_ACCESS
7602                  * code path for that...
7603                  */
7604                 vm_object_update(object,
7605                                  offset,
7606                                  size,
7607                                  NULL,
7608                                  NULL,
7609                                  FALSE, /* should_return */
7610                                  MEMORY_OBJECT_COPY_SYNC,
7611                                  VM_PROT_NO_CHANGE);
7612 #if DEVELOPMENT || DEBUG
7613                 iopl_cow++;
7614                 iopl_cow_pages += size >> PAGE_SHIFT;
7615 #endif
7616         }
7617         if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7618                 interruptible = THREAD_ABORTSAFE;
7619         else
7620                 interruptible = THREAD_UNINT;
7621
7622         entry = 0;
7623
7624         xfer_size = size;
7625         dst_offset = offset;
7626         dw_count = 0;
7627
7628         if (fast_path_possible) {
7629                 int     wired_count = 0;
7630
7631                 while (xfer_size) {
7632
7633                         while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) {
7634                                 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
7635
7636                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7637
7638                                 if (vm_page_wait(interruptible) == FALSE) {
7639                                         /*
7640                                          * interrupted case
7641                                          */
7642                                         OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
7643
7644                                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7645
7646                                         if (wired_count) {
7647                                                 vm_page_lockspin_queues();
7648                                                 vm_page_wire_count += wired_count;
7649                                                 vm_page_unlock_queues();
7650
7651                                                 free_wired_pages = TRUE;
7652                                         }
7653                                         ret = MACH_SEND_INTERRUPTED;
7654
7655                                         goto return_err;
7656                                 }
7657                                 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
7658
7659                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
7660                         }
7661                         if (no_zero_fill == FALSE)
7662                                 vm_page_zero_fill(dst_page);
7663                         else
7664                                 dst_page->absent = TRUE;
7665
7666                         dst_page->reference = TRUE;
7667
7668                         if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7669                                 SET_PAGE_DIRTY(dst_page, FALSE);
7670                         }
7671                         if (dst_page->absent == FALSE) {
7672                                 assert(object->purgable != VM_PURGABLE_VOLATILE);
7673                                 assert(object->purgable != VM_PURGABLE_EMPTY);
7674                                 dst_page->wire_count++;
7675                                 wired_count++;
7676
7677                                 PAGE_WAKEUP_DONE(dst_page);
7678                         }
7679                         vm_page_insert_internal(dst_page, object, dst_offset, FALSE, TRUE, TRUE);
7680
7681                         lite_list[entry>>5] |= 1 << (entry & 31);
7682
7683                         if (dst_page->phys_page > upl->highest_page)
7684                                 upl->highest_page = dst_page->phys_page;
7685
7686                         if (user_page_list) {
7687                                 user_page_list[entry].phys_addr = dst_page->phys_page;
7688                                 user_page_list[entry].absent    = dst_page->absent;
7689                                 user_page_list[entry].dirty     = dst_page->dirty;
7690                                 user_page_list[entry].precious  = FALSE;
7691                                 user_page_list[entry].pageout   = FALSE;
7692                                 user_page_list[entry].device    = FALSE;
7693                                 user_page_list[entry].needed    = FALSE;
7694                                 user_page_list[entry].speculative = FALSE;
7695                                 user_page_list[entry].cs_validated = FALSE;
7696                                 user_page_list[entry].cs_tainted = FALSE;
7697                         }
7698                         entry++;
7699                         dst_offset += PAGE_SIZE_64;
7700                         xfer_size -= PAGE_SIZE;
7701                         size_in_pages--;
7702                 }
7703                 if (wired_count) {
7704                         vm_page_lockspin_queues();
7705                         vm_page_wire_count += wired_count;
7706                         vm_page_unlock_queues();
7707                 }
7708                 goto finish;
7709         }
7710
7711         fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
7712         fault_info.user_tag  = 0;
7713         fault_info.lo_offset = offset;
7714         fault_info.hi_offset = offset + xfer_size;
7715         fault_info.no_cache  = FALSE;
7716         fault_info.stealth = FALSE;
7717         fault_info.io_sync = FALSE;
7718         fault_info.cs_bypass = FALSE;
7719         fault_info.mark_zf_absent = TRUE;
7720         fault_info.interruptible = interruptible;
7721         fault_info.batch_pmap_op = TRUE;
7722
7723         dwp = &dw_array[0];
7724         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7725
7726         while (xfer_size) {
7727                 vm_fault_return_t       result;
7728                 unsigned int            pg_num;
7729
7730                 dwp->dw_mask = 0;
7731
7732                 dst_page = vm_page_lookup(object, dst_offset);
7733
7734                 /*
7735                  * ENCRYPTED SWAP:
7736                  * If the page is encrypted, we need to decrypt it,
7737                  * so force a soft page fault.
7738                  */
7739                 if (dst_page == VM_PAGE_NULL ||
7740                     dst_page->busy ||
7741                     dst_page->encrypted ||
7742                     dst_page->error ||
7743                     dst_page->restart ||
7744                     dst_page->absent ||
7745                     dst_page->fictitious) {
7746
7747                    if (object == kernel_object)
7748                            panic("vm_object_iopl_request: missing/bad page in kernel object\n");
7749                    if (object == compressor_object)
7750                            panic("vm_object_iopl_request: missing/bad page in compressor object\n");
7751
7752                    if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
7753                            ret = KERN_MEMORY_ERROR;
7754                            goto return_err;
7755                    }
7756                    set_cache_attr_needed = TRUE;
7757
7758                    /*
7759                     * We just looked up the page and the result remains valid
7760                     * until the object lock is release, so send it to
7761                     * vm_fault_page() (as "dst_page"), to avoid having to
7762                     * look it up again there.
7763                     */
7764                    caller_lookup = TRUE;
7765
7766                    do {
7767                         vm_page_t       top_page;
7768                         kern_return_t   error_code;
7769
7770                         fault_info.cluster_size = xfer_size;
7771
7772                         vm_object_paging_begin(object);
7773
7774                         result = vm_fault_page(object, dst_offset,
7775                                                prot | VM_PROT_WRITE, FALSE,
7776                                                caller_lookup,
7777                                                &prot, &dst_page, &top_page,
7778                                                (int *)0,
7779                                                &error_code, no_zero_fill,
7780                                                FALSE, &fault_info);
7781
7782                         /* our lookup is no longer valid at this point */
7783                         caller_lookup = FALSE;
7784
7785                         switch (result) {
7786
7787                         case VM_FAULT_SUCCESS:
7788
7789                                 if ( !dst_page->absent) {
7790                                         PAGE_WAKEUP_DONE(dst_page);
7791                                 } else {
7792                                         /*
7793                                          * we only get back an absent page if we
7794                                          * requested that it not be zero-filled
7795                                          * because we are about to fill it via I/O
7796                                          *
7797                                          * absent pages should be left BUSY
7798                                          * to prevent them from being faulted
7799                                          * into an address space before we've
7800                                          * had a chance to complete the I/O on
7801                                          * them since they may contain info that
7802                                          * shouldn't be seen by the faulting task
7803                                          */
7804                                 }
7805                                 /*
7806                                  *      Release paging references and
7807                                  *      top-level placeholder page, if any.
7808                                  */
7809                                 if (top_page != VM_PAGE_NULL) {
7810                                         vm_object_t local_object;
7811
7812                                         local_object = top_page->object;
7813
7814                                         if (top_page->object != dst_page->object) {
7815                                                 vm_object_lock(local_object);
7816                                                 VM_PAGE_FREE(top_page);
7817                                                 vm_object_paging_end(local_object);
7818                                                 vm_object_unlock(local_object);
7819                                         } else {
7820                                                 VM_PAGE_FREE(top_page);
7821                                                 vm_object_paging_end(local_object);
7822                                         }
7823                                 }
7824                                 vm_object_paging_end(object);
7825                                 break;
7826
7827                         case VM_FAULT_RETRY:
7828                                 vm_object_lock(object);
7829                                 break;
7830
7831                         case VM_FAULT_MEMORY_SHORTAGE:
7832                                 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
7833
7834                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7835
7836                                 if (vm_page_wait(interruptible)) {
7837                                         OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
7838
7839                                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
7840                                         vm_object_lock(object);
7841
7842                                         break;
7843                                 }
7844                                 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
7845
7846                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7847
7848                                 /* fall thru */
7849
7850                         case VM_FAULT_INTERRUPTED:
7851                                 error_code = MACH_SEND_INTERRUPTED;
7852                         case VM_FAULT_MEMORY_ERROR:
7853                         memory_error:
7854                                 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
7855
7856                                 vm_object_lock(object);
7857                                 goto return_err;
7858
7859                         case VM_FAULT_SUCCESS_NO_VM_PAGE:
7860                                 /* success but no page: fail */
7861                                 vm_object_paging_end(object);
7862                                 vm_object_unlock(object);
7863                                 goto memory_error;
7864
7865                         default:
7866                                 panic("vm_object_iopl_request: unexpected error"
7867                                       " 0x%x from vm_fault_page()\n", result);
7868                         }
7869                    } while (result != VM_FAULT_SUCCESS);
7870
7871                 }
7872                 if (upl->flags & UPL_KERNEL_OBJECT)
7873                         goto record_phys_addr;
7874
7875                 if (dst_page->compressor) {
7876                         dst_page->busy = TRUE;
7877                         goto record_phys_addr;
7878                 }
7879
7880                 if (dst_page->cleaning) {
7881                         /*
7882                          * Someone else is cleaning this page in place.
7883                          * In theory, we should be able to  proceed and use this
7884                          * page but they'll probably end up clearing the "busy"
7885                          * bit on it in upl_commit_range() but they didn't set
7886                          * it, so they would clear our "busy" bit and open
7887                          * us to race conditions.
7888                          * We'd better wait for the cleaning to complete and
7889                          * then try again.
7890                          */
7891                         vm_object_iopl_request_sleep_for_cleaning++;
7892                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
7893                         continue;
7894                 }
7895                 if (dst_page->laundry) {
7896                         dst_page->pageout = FALSE;
7897
7898                         vm_pageout_steal_laundry(dst_page, FALSE);
7899                 }
7900                 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
7901                      dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
7902                         vm_page_t       low_page;
7903                         int             refmod;
7904
7905                         /*
7906                          * support devices that can't DMA above 32 bits
7907                          * by substituting pages from a pool of low address
7908                          * memory for any pages we find above the 4G mark
7909                          * can't substitute if the page is already wired because
7910                          * we don't know whether that physical address has been
7911                          * handed out to some other 64 bit capable DMA device to use
7912                          */
7913                         if (VM_PAGE_WIRED(dst_page)) {
7914                                 ret = KERN_PROTECTION_FAILURE;
7915                                 goto return_err;
7916                         }
7917                         low_page = vm_page_grablo();
7918
7919                         if (low_page == VM_PAGE_NULL) {
7920                                 ret = KERN_RESOURCE_SHORTAGE;
7921                                 goto return_err;
7922                         }
7923                         /*
7924                          * from here until the vm_page_replace completes
7925                          * we musn't drop the object lock... we don't
7926                          * want anyone refaulting this page in and using
7927                          * it after we disconnect it... we want the fault
7928                          * to find the new page being substituted.
7929                          */
7930                         if (dst_page->pmapped)
7931                                 refmod = pmap_disconnect(dst_page->phys_page);
7932                         else
7933                                 refmod = 0;
7934
7935                         if (!dst_page->absent)
7936                                 vm_page_copy(dst_page, low_page);
7937
7938                         low_page->reference = dst_page->reference;
7939                         low_page->dirty     = dst_page->dirty;
7940                         low_page->absent    = dst_page->absent;
7941
7942                         if (refmod & VM_MEM_REFERENCED)
7943                                 low_page->reference = TRUE;
7944                         if (refmod & VM_MEM_MODIFIED) {
7945                                 SET_PAGE_DIRTY(low_page, FALSE);
7946                         }
7947
7948                         vm_page_replace(low_page, object, dst_offset);
7949
7950                         dst_page = low_page;
7951                         /*
7952                          * vm_page_grablo returned the page marked
7953                          * BUSY... we don't need a PAGE_WAKEUP_DONE
7954                          * here, because we've never dropped the object lock
7955                          */
7956                         if ( !dst_page->absent)
7957                                 dst_page->busy = FALSE;
7958                 }
7959                 if ( !dst_page->busy)
7960                         dwp->dw_mask |= DW_vm_page_wire;
7961
7962                 if (cntrl_flags & UPL_BLOCK_ACCESS) {
7963                         /*
7964                          * Mark the page "busy" to block any future page fault
7965                          * on this page in addition to wiring it.
7966                          * We'll also remove the mapping
7967                          * of all these pages before leaving this routine.
7968                          */
7969                         assert(!dst_page->fictitious);
7970                         dst_page->busy = TRUE;
7971                 }
7972                 /*
7973                  * expect the page to be used
7974                  * page queues lock must be held to set 'reference'
7975                  */
7976                 dwp->dw_mask |= DW_set_reference;
7977
7978                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7979                         SET_PAGE_DIRTY(dst_page, TRUE);
7980                 }
7981                 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
7982                         pmap_sync_page_attributes_phys(dst_page->phys_page);
7983                         dst_page->written_by_kernel = FALSE;
7984                 }
7985
7986 record_phys_addr:
7987                 if (dst_page->busy)
7988                         upl->flags |= UPL_HAS_BUSY;
7989
7990                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
7991                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
7992                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
7993
7994                 if (dst_page->phys_page > upl->highest_page)
7995                         upl->highest_page = dst_page->phys_page;
7996
7997                 if (user_page_list) {
7998                         user_page_list[entry].phys_addr = dst_page->phys_page;
7999                         user_page_list[entry].pageout   = dst_page->pageout;
8000                         user_page_list[entry].absent    = dst_page->absent;
8001                         user_page_list[entry].dirty     = dst_page->dirty;
8002                         user_page_list[entry].precious  = dst_page->precious;
8003                         user_page_list[entry].device    = FALSE;
8004                         user_page_list[entry].needed    = FALSE;
8005                         if (dst_page->clustered == TRUE)
8006                                 user_page_list[entry].speculative = dst_page->speculative;
8007                         else
8008                                 user_page_list[entry].speculative = FALSE;
8009                         user_page_list[entry].cs_validated = dst_page->cs_validated;
8010                         user_page_list[entry].cs_tainted = dst_page->cs_tainted;
8011                 }
8012                 if (object != kernel_object && object != compressor_object) {
8013                         /*
8014                          * someone is explicitly grabbing this page...
8015                          * update clustered and speculative state
8016                          *
8017                          */
8018                         if (dst_page->clustered)
8019                                 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8020                 }
8021                 entry++;
8022                 dst_offset += PAGE_SIZE_64;
8023                 xfer_size -= PAGE_SIZE;
8024                 size_in_pages--;
8025
8026                 if (dwp->dw_mask) {
8027                         VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8028
8029                         if (dw_count >= dw_limit) {
8030                                 vm_page_do_delayed_work(object, &dw_array[0], dw_count);
8031
8032                                 dwp = &dw_array[0];
8033                                 dw_count = 0;
8034                         }
8035                 }
8036         }
8037         if (dw_count)
8038                 vm_page_do_delayed_work(object, &dw_array[0], dw_count);
8039
8040 finish:
8041         if (user_page_list && set_cache_attr_needed == TRUE)
8042                 vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE);
8043
8044         if (page_list_count != NULL) {
8045                 if (upl->flags & UPL_INTERNAL)
8046                         *page_list_count = 0;
8047                 else if (*page_list_count > entry)
8048                         *page_list_count = entry;
8049         }
8050         vm_object_unlock(object);
8051
8052         if (cntrl_flags & UPL_BLOCK_ACCESS) {
8053                 /*
8054                  * We've marked all the pages "busy" so that future
8055                  * page faults will block.
8056                  * Now remove the mapping for these pages, so that they
8057                  * can't be accessed without causing a page fault.
8058                  */
8059                 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8060                                        PMAP_NULL, 0, VM_PROT_NONE);
8061                 assert(!object->blocked_access);
8062                 object->blocked_access = TRUE;
8063         }
8064         return KERN_SUCCESS;
8065
8066 return_err:
8067         dw_index = 0;
8068
8069         for (; offset < dst_offset; offset += PAGE_SIZE) {
8070                 boolean_t need_unwire;
8071
8072                 dst_page = vm_page_lookup(object, offset);
8073
8074                 if (dst_page == VM_PAGE_NULL)
8075                         panic("vm_object_iopl_request: Wired page missing. \n");
8076
8077                 /*
8078                  * if we've already processed this page in an earlier
8079                  * dw_do_work, we need to undo the wiring... we will
8080                  * leave the dirty and reference bits on if they
8081                  * were set, since we don't have a good way of knowing
8082                  * what the previous state was and we won't get here
8083                  * under any normal circumstances...  we will always
8084                  * clear BUSY and wakeup any waiters via vm_page_free
8085                  * or PAGE_WAKEUP_DONE
8086                  */
8087                 need_unwire = TRUE;
8088
8089                 if (dw_count) {
8090                         if (dw_array[dw_index].dw_m == dst_page) {
8091                                 /*
8092                                  * still in the deferred work list
8093                                  * which means we haven't yet called
8094                                  * vm_page_wire on this page
8095                                  */
8096                                 need_unwire = FALSE;
8097
8098                                 dw_index++;
8099                                 dw_count--;
8100                         }
8101                 }
8102                 vm_page_lock_queues();
8103
8104                 if (dst_page->absent || free_wired_pages == TRUE) {
8105                         vm_page_free(dst_page);
8106
8107                         need_unwire = FALSE;
8108                 } else {
8109                         if (need_unwire == TRUE)
8110                                 vm_page_unwire(dst_page, TRUE);
8111
8112                         PAGE_WAKEUP_DONE(dst_page);
8113                 }
8114                 vm_page_unlock_queues();
8115
8116                 if (need_unwire == TRUE)
8117                         VM_STAT_INCR(reactivations);
8118         }
8119 #if UPL_DEBUG
8120         upl->upl_state = 2;
8121 #endif
8122         if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8123                 vm_object_activity_end(object);
8124                 vm_object_collapse(object, 0, TRUE);
8125         }
8126         vm_object_unlock(object);
8127         upl_destroy(upl);
8128
8129         return ret;
8130 }
8131
8132 kern_return_t
8133 upl_transpose(
8134         upl_t           upl1,
8135         upl_t           upl2)
8136 {
8137         kern_return_t           retval;
8138         boolean_t               upls_locked;
8139         vm_object_t             object1, object2;
8140
8141         if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2  || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR)  || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8142                 return KERN_INVALID_ARGUMENT;
8143         }
8144
8145         upls_locked = FALSE;
8146
8147         /*
8148          * Since we need to lock both UPLs at the same time,
8149          * avoid deadlocks by always taking locks in the same order.
8150          */
8151         if (upl1 < upl2) {
8152                 upl_lock(upl1);
8153                 upl_lock(upl2);
8154         } else {
8155                 upl_lock(upl2);
8156                 upl_lock(upl1);
8157         }
8158         upls_locked = TRUE;     /* the UPLs will need to be unlocked */
8159
8160         object1 = upl1->map_object;
8161         object2 = upl2->map_object;
8162
8163         if (upl1->offset != 0 || upl2->offset != 0 ||
8164             upl1->size != upl2->size) {
8165                 /*
8166                  * We deal only with full objects, not subsets.
8167                  * That's because we exchange the entire backing store info
8168                  * for the objects: pager, resident pages, etc...  We can't do
8169                  * only part of it.
8170                  */
8171                 retval = KERN_INVALID_VALUE;
8172                 goto done;
8173         }
8174
8175         /*
8176          * Tranpose the VM objects' backing store.
8177          */
8178         retval = vm_object_transpose(object1, object2,
8179                                      (vm_object_size_t) upl1->size);
8180
8181         if (retval == KERN_SUCCESS) {
8182                 /*
8183                  * Make each UPL point to the correct VM object, i.e. the
8184                  * object holding the pages that the UPL refers to...
8185                  */
8186 #if CONFIG_IOSCHED || UPL_DEBUG
8187                 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8188                         vm_object_lock(object1);
8189                         vm_object_lock(object2);
8190                 }
8191                 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8192                         queue_remove(&object1->uplq, upl1, upl_t, uplq);
8193                 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8194                         queue_remove(&object2->uplq, upl2, upl_t, uplq);
8195 #endif
8196                 upl1->map_object = object2;
8197                 upl2->map_object = object1;
8198
8199 #if CONFIG_IOSCHED || UPL_DEBUG
8200                 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8201                         queue_enter(&object2->uplq, upl1, upl_t, uplq);
8202                 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8203                         queue_enter(&object1->uplq, upl2, upl_t, uplq);
8204                 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8205                         vm_object_unlock(object2);
8206                         vm_object_unlock(object1);
8207                 }
8208 #endif
8209         }
8210
8211 done:
8212         /*
8213          * Cleanup.
8214          */
8215         if (upls_locked) {
8216                 upl_unlock(upl1);
8217                 upl_unlock(upl2);
8218                 upls_locked = FALSE;
8219         }
8220
8221         return retval;
8222 }
8223
8224 void
8225 upl_range_needed(
8226         upl_t           upl,
8227         int             index,
8228         int             count)
8229 {
8230         upl_page_info_t *user_page_list;
8231         int             size_in_pages;
8232
8233         if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8234                 return;
8235
8236         size_in_pages = upl->size / PAGE_SIZE;
8237
8238         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8239
8240         while (count-- && index < size_in_pages)
8241                 user_page_list[index++].needed = TRUE;
8242 }
8243
8244
8245 /*
8246  * ENCRYPTED SWAP:
8247  *
8248  * Rationale:  the user might have some encrypted data on disk (via
8249  * FileVault or any other mechanism).  That data is then decrypted in
8250  * memory, which is safe as long as the machine is secure.  But that
8251  * decrypted data in memory could be paged out to disk by the default
8252  * pager.  The data would then be stored on disk in clear (not encrypted)
8253  * and it could be accessed by anyone who gets physical access to the
8254  * disk (if the laptop or the disk gets stolen for example).  This weakens
8255  * the security offered by FileVault.
8256  *
8257  * Solution:  the default pager will optionally request that all the
8258  * pages it gathers for pageout be encrypted, via the UPL interfaces,
8259  * before it sends this UPL to disk via the vnode_pageout() path.
8260  *
8261  * Notes:
8262  *
8263  * To avoid disrupting the VM LRU algorithms, we want to keep the
8264  * clean-in-place mechanisms, which allow us to send some extra pages to
8265  * swap (clustering) without actually removing them from the user's
8266  * address space.  We don't want the user to unknowingly access encrypted
8267  * data, so we have to actually remove the encrypted pages from the page
8268  * table.  When the user accesses the data, the hardware will fail to
8269  * locate the virtual page in its page table and will trigger a page
8270  * fault.  We can then decrypt the page and enter it in the page table
8271  * again.  Whenever we allow the user to access the contents of a page,
8272  * we have to make sure it's not encrypted.
8273  *
8274  *
8275  */
8276 /*
8277  * ENCRYPTED SWAP:
8278  * Reserve of virtual addresses in the kernel address space.
8279  * We need to map the physical pages in the kernel, so that we
8280  * can call the encryption/decryption routines with a kernel
8281  * virtual address.  We keep this pool of pre-allocated kernel
8282  * virtual addresses so that we don't have to scan the kernel's
8283  * virtaul address space each time we need to encrypt or decrypt
8284  * a physical page.
8285  * It would be nice to be able to encrypt and decrypt in physical
8286  * mode but that might not always be more efficient...
8287  */
8288 decl_simple_lock_data(,vm_paging_lock)
8289 #define VM_PAGING_NUM_PAGES     64
8290 vm_map_offset_t vm_paging_base_address = 0;
8291 boolean_t       vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8292 int             vm_paging_max_index = 0;
8293 int             vm_paging_page_waiter = 0;
8294 int             vm_paging_page_waiter_total = 0;
8295 unsigned long   vm_paging_no_kernel_page = 0;
8296 unsigned long   vm_paging_objects_mapped = 0;
8297 unsigned long   vm_paging_pages_mapped = 0;
8298 unsigned long   vm_paging_objects_mapped_slow = 0;
8299 unsigned long   vm_paging_pages_mapped_slow = 0;
8300
8301 void
8302 vm_paging_map_init(void)
8303 {
8304         kern_return_t   kr;
8305         vm_map_offset_t page_map_offset;
8306         vm_map_entry_t  map_entry;
8307
8308         assert(vm_paging_base_address == 0);
8309
8310         /*
8311          * Initialize our pool of pre-allocated kernel
8312          * virtual addresses.
8313          */
8314         page_map_offset = 0;
8315         kr = vm_map_find_space(kernel_map,
8316                                &page_map_offset,
8317                                VM_PAGING_NUM_PAGES * PAGE_SIZE,
8318                                0,
8319                                0,
8320                                &map_entry);
8321         if (kr != KERN_SUCCESS) {
8322                 panic("vm_paging_map_init: kernel_map full\n");
8323         }
8324         map_entry->object.vm_object = kernel_object;
8325         map_entry->offset = page_map_offset;
8326         map_entry->protection = VM_PROT_NONE;
8327         map_entry->max_protection = VM_PROT_NONE;
8328         map_entry->permanent = TRUE;
8329         vm_object_reference(kernel_object);
8330         vm_map_unlock(kernel_map);
8331
8332         assert(vm_paging_base_address == 0);
8333         vm_paging_base_address = page_map_offset;
8334 }
8335
8336 /*
8337  * ENCRYPTED SWAP:
8338  * vm_paging_map_object:
8339  *      Maps part of a VM object's pages in the kernel
8340  *      virtual address space, using the pre-allocated
8341  *      kernel virtual addresses, if possible.
8342  * Context:
8343  *      The VM object is locked.  This lock will get
8344  *      dropped and re-acquired though, so the caller
8345  *      must make sure the VM object is kept alive
8346  *      (by holding a VM map that has a reference
8347  *      on it, for example, or taking an extra reference).
8348  *      The page should also be kept busy to prevent
8349  *      it from being reclaimed.
8350  */
8351 kern_return_t
8352 vm_paging_map_object(
8353         vm_page_t               page,
8354         vm_object_t             object,
8355         vm_object_offset_t      offset,
8356         vm_prot_t               protection,
8357         boolean_t               can_unlock_object,
8358         vm_map_size_t           *size,          /* IN/OUT */
8359         vm_map_offset_t         *address,       /* OUT */
8360         boolean_t               *need_unmap)    /* OUT */
8361 {
8362         kern_return_t           kr;
8363         vm_map_offset_t         page_map_offset;
8364         vm_map_size_t           map_size;
8365         vm_object_offset_t      object_offset;
8366         int                     i;
8367
8368         if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
8369                 /* use permanent 1-to-1 kernel mapping of physical memory ? */
8370 #if __x86_64__
8371                 *address = (vm_map_offset_t)
8372                         PHYSMAP_PTOV((pmap_paddr_t)page->phys_page <<
8373                                      PAGE_SHIFT);
8374                 *need_unmap = FALSE;
8375                 return KERN_SUCCESS;
8376 #else
8377 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
8378 #endif
8379
8380                 assert(page->busy);
8381                 /*
8382                  * Use one of the pre-allocated kernel virtual addresses
8383                  * and just enter the VM page in the kernel address space
8384                  * at that virtual address.
8385                  */
8386                 simple_lock(&vm_paging_lock);
8387
8388                 /*
8389                  * Try and find an available kernel virtual address
8390                  * from our pre-allocated pool.
8391                  */
8392                 page_map_offset = 0;
8393                 for (;;) {
8394                         for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
8395                                 if (vm_paging_page_inuse[i] == FALSE) {
8396                                         page_map_offset =
8397                                                 vm_paging_base_address +
8398                                                 (i * PAGE_SIZE);
8399                                         break;
8400                                 }
8401                         }
8402                         if (page_map_offset != 0) {
8403                                 /* found a space to map our page ! */
8404                                 break;
8405                         }
8406
8407                         if (can_unlock_object) {
8408                                 /*
8409                                  * If we can afford to unlock the VM object,
8410                                  * let's take the slow path now...
8411                                  */
8412                                 break;
8413                         }
8414                         /*
8415                          * We can't afford to unlock the VM object, so
8416                          * let's wait for a space to become available...
8417                          */
8418                         vm_paging_page_waiter_total++;
8419                         vm_paging_page_waiter++;
8420                         kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
8421                         if (kr == THREAD_WAITING) {
8422                                 simple_unlock(&vm_paging_lock);
8423                                 kr = thread_block(THREAD_CONTINUE_NULL);
8424                                 simple_lock(&vm_paging_lock);
8425                         }
8426                         vm_paging_page_waiter--;
8427                         /* ... and try again */
8428                 }
8429
8430                 if (page_map_offset != 0) {
8431                         /*
8432                          * We found a kernel virtual address;
8433                          * map the physical page to that virtual address.
8434                          */
8435                         if (i > vm_paging_max_index) {
8436                                 vm_paging_max_index = i;
8437                         }
8438                         vm_paging_page_inuse[i] = TRUE;
8439                         simple_unlock(&vm_paging_lock);
8440
8441                         page->pmapped = TRUE;
8442
8443                         /*
8444                          * Keep the VM object locked over the PMAP_ENTER
8445                          * and the actual use of the page by the kernel,
8446                          * or this pmap mapping might get undone by a
8447                          * vm_object_pmap_protect() call...
8448                          */
8449                         PMAP_ENTER(kernel_pmap,
8450                                    page_map_offset,
8451                                    page,
8452                                    protection,
8453                                    VM_PROT_NONE,
8454                                    0,
8455                                    TRUE);
8456                         vm_paging_objects_mapped++;
8457                         vm_paging_pages_mapped++;
8458                         *address = page_map_offset;
8459                         *need_unmap = TRUE;
8460
8461                         /* all done and mapped, ready to use ! */
8462                         return KERN_SUCCESS;
8463                 }
8464
8465                 /*
8466                  * We ran out of pre-allocated kernel virtual
8467                  * addresses.  Just map the page in the kernel
8468                  * the slow and regular way.
8469                  */
8470                 vm_paging_no_kernel_page++;
8471                 simple_unlock(&vm_paging_lock);
8472         }
8473
8474         if (! can_unlock_object) {
8475                 *address = 0;
8476                 *size = 0;
8477                 *need_unmap = FALSE;
8478                 return KERN_NOT_SUPPORTED;
8479         }
8480
8481         object_offset = vm_object_trunc_page(offset);
8482         map_size = vm_map_round_page(*size,
8483                                      VM_MAP_PAGE_MASK(kernel_map));
8484
8485         /*
8486          * Try and map the required range of the object
8487          * in the kernel_map
8488          */
8489
8490         vm_object_reference_locked(object);     /* for the map entry */
8491         vm_object_unlock(object);
8492
8493         kr = vm_map_enter(kernel_map,
8494                           address,
8495                           map_size,
8496                           0,
8497                           VM_FLAGS_ANYWHERE,
8498                           object,
8499                           object_offset,
8500                           FALSE,
8501                           protection,
8502                           VM_PROT_ALL,
8503                           VM_INHERIT_NONE);
8504         if (kr != KERN_SUCCESS) {
8505                 *address = 0;
8506                 *size = 0;
8507                 *need_unmap = FALSE;
8508                 vm_object_deallocate(object);   /* for the map entry */
8509                 vm_object_lock(object);
8510                 return kr;
8511         }
8512
8513         *size = map_size;
8514
8515         /*
8516          * Enter the mapped pages in the page table now.
8517          */
8518         vm_object_lock(object);
8519         /*
8520          * VM object must be kept locked from before PMAP_ENTER()
8521          * until after the kernel is done accessing the page(s).
8522          * Otherwise, the pmap mappings in the kernel could be
8523          * undone by a call to vm_object_pmap_protect().
8524          */
8525
8526         for (page_map_offset = 0;
8527              map_size != 0;
8528              map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
8529
8530                 page = vm_page_lookup(object, offset + page_map_offset);
8531                 if (page == VM_PAGE_NULL) {
8532                         printf("vm_paging_map_object: no page !?");
8533                         vm_object_unlock(object);
8534                         kr = vm_map_remove(kernel_map, *address, *size,
8535                                            VM_MAP_NO_FLAGS);
8536                         assert(kr == KERN_SUCCESS);
8537                         *address = 0;
8538                         *size = 0;
8539                         *need_unmap = FALSE;
8540                         vm_object_lock(object);
8541                         return KERN_MEMORY_ERROR;
8542                 }
8543                 page->pmapped = TRUE;
8544
8545                 //assert(pmap_verify_free(page->phys_page));
8546                 PMAP_ENTER(kernel_pmap,
8547                            *address + page_map_offset,
8548                            page,
8549                            protection,
8550                            VM_PROT_NONE,
8551                            0,
8552                            TRUE);
8553         }
8554
8555         vm_paging_objects_mapped_slow++;
8556         vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
8557
8558         *need_unmap = TRUE;
8559
8560         return KERN_SUCCESS;
8561 }
8562
8563 /*
8564  * ENCRYPTED SWAP:
8565  * vm_paging_unmap_object:
8566  *      Unmaps part of a VM object's pages from the kernel
8567  *      virtual address space.
8568  * Context:
8569  *      The VM object is locked.  This lock will get
8570  *      dropped and re-acquired though.
8571  */
8572 void
8573 vm_paging_unmap_object(
8574         vm_object_t     object,
8575         vm_map_offset_t start,
8576         vm_map_offset_t end)
8577 {
8578         kern_return_t   kr;
8579         int             i;
8580
8581         if ((vm_paging_base_address == 0) ||
8582             (start < vm_paging_base_address) ||
8583             (end > (vm_paging_base_address
8584                      + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
8585                 /*
8586                  * We didn't use our pre-allocated pool of
8587                  * kernel virtual address.  Deallocate the
8588                  * virtual memory.
8589                  */
8590                 if (object != VM_OBJECT_NULL) {
8591                         vm_object_unlock(object);
8592                 }
8593                 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
8594                 if (object != VM_OBJECT_NULL) {
8595                         vm_object_lock(object);
8596                 }
8597                 assert(kr == KERN_SUCCESS);
8598         } else {
8599                 /*
8600                  * We used a kernel virtual address from our
8601                  * pre-allocated pool.  Put it back in the pool
8602                  * for next time.
8603                  */
8604                 assert(end - start == PAGE_SIZE);
8605                 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
8606                 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
8607
8608                 /* undo the pmap mapping */
8609                 pmap_remove(kernel_pmap, start, end);
8610
8611                 simple_lock(&vm_paging_lock);
8612                 vm_paging_page_inuse[i] = FALSE;
8613                 if (vm_paging_page_waiter) {
8614                         thread_wakeup(&vm_paging_page_waiter);
8615                 }
8616                 simple_unlock(&vm_paging_lock);
8617         }
8618 }
8619
8620 #if ENCRYPTED_SWAP
8621 /*
8622  * Encryption data.
8623  * "iv" is the "initial vector".  Ideally, we want to
8624  * have a different one for each page we encrypt, so that
8625  * crackers can't find encryption patterns too easily.
8626  */
8627 #define SWAP_CRYPT_AES_KEY_SIZE 128     /* XXX 192 and 256 don't work ! */
8628 boolean_t               swap_crypt_ctx_initialized = FALSE;
8629 uint32_t                swap_crypt_key[8]; /* big enough for a 256 key */
8630 aes_ctx                 swap_crypt_ctx;
8631 const unsigned char     swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
8632
8633 #if DEBUG
8634 boolean_t               swap_crypt_ctx_tested = FALSE;
8635 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
8636 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
8637 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
8638 #endif /* DEBUG */
8639
8640 /*
8641  * Initialize the encryption context: key and key size.
8642  */
8643 void swap_crypt_ctx_initialize(void); /* forward */
8644 void
8645 swap_crypt_ctx_initialize(void)
8646 {
8647         unsigned int    i;
8648
8649         /*
8650          * No need for locking to protect swap_crypt_ctx_initialized
8651          * because the first use of encryption will come from the
8652          * pageout thread (we won't pagein before there's been a pageout)
8653          * and there's only one pageout thread.
8654          */
8655         if (swap_crypt_ctx_initialized == FALSE) {
8656                 for (i = 0;
8657                      i < (sizeof (swap_crypt_key) /
8658                           sizeof (swap_crypt_key[0]));
8659                      i++) {
8660                         swap_crypt_key[i] = random();
8661                 }
8662                 aes_encrypt_key((const unsigned char *) swap_crypt_key,
8663                                 SWAP_CRYPT_AES_KEY_SIZE,
8664                                 &swap_crypt_ctx.encrypt);
8665                 aes_decrypt_key((const unsigned char *) swap_crypt_key,
8666                                 SWAP_CRYPT_AES_KEY_SIZE,
8667                                 &swap_crypt_ctx.decrypt);
8668                 swap_crypt_ctx_initialized = TRUE;
8669         }
8670
8671 #if DEBUG
8672         /*
8673          * Validate the encryption algorithms.
8674          */
8675         if (swap_crypt_ctx_tested == FALSE) {
8676                 /* initialize */
8677                 for (i = 0; i < 4096; i++) {
8678                         swap_crypt_test_page_ref[i] = (char) i;
8679                 }
8680                 /* encrypt */
8681                 aes_encrypt_cbc(swap_crypt_test_page_ref,
8682                                 swap_crypt_null_iv,
8683                                 PAGE_SIZE / AES_BLOCK_SIZE,
8684                                 swap_crypt_test_page_encrypt,
8685                                 &swap_crypt_ctx.encrypt);
8686                 /* decrypt */
8687                 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
8688                                 swap_crypt_null_iv,
8689                                 PAGE_SIZE / AES_BLOCK_SIZE,
8690                                 swap_crypt_test_page_decrypt,
8691                                 &swap_crypt_ctx.decrypt);
8692                 /* compare result with original */
8693                 for (i = 0; i < 4096; i ++) {
8694                         if (swap_crypt_test_page_decrypt[i] !=
8695                             swap_crypt_test_page_ref[i]) {
8696                                 panic("encryption test failed");
8697                         }
8698                 }
8699
8700                 /* encrypt again */
8701                 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
8702                                 swap_crypt_null_iv,
8703                                 PAGE_SIZE / AES_BLOCK_SIZE,
8704                                 swap_crypt_test_page_decrypt,
8705                                 &swap_crypt_ctx.encrypt);
8706                 /* decrypt in place */
8707                 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
8708                                 swap_crypt_null_iv,
8709                                 PAGE_SIZE / AES_BLOCK_SIZE,
8710                                 swap_crypt_test_page_decrypt,
8711                                 &swap_crypt_ctx.decrypt);
8712                 for (i = 0; i < 4096; i ++) {
8713                         if (swap_crypt_test_page_decrypt[i] !=
8714                             swap_crypt_test_page_ref[i]) {
8715                                 panic("in place encryption test failed");
8716                         }
8717                 }
8718
8719                 swap_crypt_ctx_tested = TRUE;
8720         }
8721 #endif /* DEBUG */
8722 }
8723
8724 /*
8725  * ENCRYPTED SWAP:
8726  * vm_page_encrypt:
8727  *      Encrypt the given page, for secure paging.
8728  *      The page might already be mapped at kernel virtual
8729  *      address "kernel_mapping_offset".  Otherwise, we need
8730  *      to map it.
8731  *
8732  * Context:
8733  *      The page's object is locked, but this lock will be released
8734  *      and re-acquired.
8735  *      The page is busy and not accessible by users (not entered in any pmap).
8736  */
8737 void
8738 vm_page_encrypt(
8739         vm_page_t       page,
8740         vm_map_offset_t kernel_mapping_offset)
8741 {
8742         kern_return_t           kr;
8743         vm_map_size_t           kernel_mapping_size;
8744         boolean_t               kernel_mapping_needs_unmap;
8745         vm_offset_t             kernel_vaddr;
8746         union {
8747                 unsigned char   aes_iv[AES_BLOCK_SIZE];
8748                 struct {
8749                         memory_object_t         pager_object;
8750                         vm_object_offset_t      paging_offset;
8751                 } vm;
8752         } encrypt_iv;
8753
8754         if (! vm_pages_encrypted) {
8755                 vm_pages_encrypted = TRUE;
8756         }
8757
8758         assert(page->busy);
8759
8760         if (page->encrypted) {
8761                 /*
8762                  * Already encrypted: no need to do it again.
8763                  */
8764                 vm_page_encrypt_already_encrypted_counter++;
8765                 return;
8766         }
8767         assert(page->dirty || page->precious);
8768
8769         ASSERT_PAGE_DECRYPTED(page);
8770
8771         /*
8772          * Take a paging-in-progress reference to keep the object
8773          * alive even if we have to unlock it (in vm_paging_map_object()
8774          * for example)...
8775          */
8776         vm_object_paging_begin(page->object);
8777
8778         if (kernel_mapping_offset == 0) {
8779                 /*
8780                  * The page hasn't already been mapped in kernel space
8781                  * by the caller.  Map it now, so that we can access
8782                  * its contents and encrypt them.
8783                  */
8784                 kernel_mapping_size = PAGE_SIZE;
8785                 kernel_mapping_needs_unmap = FALSE;
8786                 kr = vm_paging_map_object(page,
8787                                           page->object,
8788                                           page->offset,
8789                                           VM_PROT_READ | VM_PROT_WRITE,
8790                                           FALSE,
8791                                           &kernel_mapping_size,
8792                                           &kernel_mapping_offset,
8793                                           &kernel_mapping_needs_unmap);
8794                 if (kr != KERN_SUCCESS) {
8795                         panic("vm_page_encrypt: "
8796                               "could not map page in kernel: 0x%x\n",
8797                               kr);
8798                 }
8799         } else {
8800                 kernel_mapping_size = 0;
8801                 kernel_mapping_needs_unmap = FALSE;
8802         }
8803         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
8804
8805         if (swap_crypt_ctx_initialized == FALSE) {
8806                 swap_crypt_ctx_initialize();
8807         }
8808         assert(swap_crypt_ctx_initialized);
8809
8810         /*
8811          * Prepare an "initial vector" for the encryption.
8812          * We use the "pager" and the "paging_offset" for that
8813          * page to obfuscate the encrypted data a bit more and
8814          * prevent crackers from finding patterns that they could
8815          * use to break the key.
8816          */
8817         bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
8818         encrypt_iv.vm.pager_object = page->object->pager;
8819         encrypt_iv.vm.paging_offset =
8820                 page->object->paging_offset + page->offset;
8821
8822         /* encrypt the "initial vector" */
8823         aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
8824                         swap_crypt_null_iv,
8825                         1,
8826                         &encrypt_iv.aes_iv[0],
8827                         &swap_crypt_ctx.encrypt);
8828
8829         /*
8830          * Encrypt the page.
8831          */
8832         aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
8833                         &encrypt_iv.aes_iv[0],
8834                         PAGE_SIZE / AES_BLOCK_SIZE,
8835                         (unsigned char *) kernel_vaddr,
8836                         &swap_crypt_ctx.encrypt);
8837
8838         vm_page_encrypt_counter++;
8839
8840         /*
8841          * Unmap the page from the kernel's address space,
8842          * if we had to map it ourselves.  Otherwise, let
8843          * the caller undo the mapping if needed.
8844          */
8845         if (kernel_mapping_needs_unmap) {
8846                 vm_paging_unmap_object(page->object,
8847                                        kernel_mapping_offset,
8848                                        kernel_mapping_offset + kernel_mapping_size);
8849         }
8850
8851         /*
8852          * Clear the "reference" and "modified" bits.
8853          * This should clean up any impact the encryption had
8854          * on them.
8855          * The page was kept busy and disconnected from all pmaps,
8856          * so it can't have been referenced or modified from user
8857          * space.
8858          * The software bits will be reset later after the I/O
8859          * has completed (in upl_commit_range()).
8860          */
8861         pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
8862
8863         page->encrypted = TRUE;
8864
8865         vm_object_paging_end(page->object);
8866 }
8867
8868 /*
8869  * ENCRYPTED SWAP:
8870  * vm_page_decrypt:
8871  *      Decrypt the given page.
8872  *      The page might already be mapped at kernel virtual
8873  *      address "kernel_mapping_offset".  Otherwise, we need
8874  *      to map it.
8875  *
8876  * Context:
8877  *      The page's VM object is locked but will be unlocked and relocked.
8878  *      The page is busy and not accessible by users (not entered in any pmap).
8879  */
8880 void
8881 vm_page_decrypt(
8882         vm_page_t       page,
8883         vm_map_offset_t kernel_mapping_offset)
8884 {
8885         kern_return_t           kr;
8886         vm_map_size_t           kernel_mapping_size;
8887         vm_offset_t             kernel_vaddr;
8888         boolean_t               kernel_mapping_needs_unmap;
8889         union {
8890                 unsigned char   aes_iv[AES_BLOCK_SIZE];
8891                 struct {
8892                         memory_object_t         pager_object;
8893                         vm_object_offset_t      paging_offset;
8894                 } vm;
8895         } decrypt_iv;
8896         boolean_t               was_dirty;
8897
8898         assert(page->busy);
8899         assert(page->encrypted);
8900
8901         was_dirty = page->dirty;
8902
8903         /*
8904          * Take a paging-in-progress reference to keep the object
8905          * alive even if we have to unlock it (in vm_paging_map_object()
8906          * for example)...
8907          */
8908         vm_object_paging_begin(page->object);
8909
8910         if (kernel_mapping_offset == 0) {
8911                 /*
8912                  * The page hasn't already been mapped in kernel space
8913                  * by the caller.  Map it now, so that we can access
8914                  * its contents and decrypt them.
8915                  */
8916                 kernel_mapping_size = PAGE_SIZE;
8917                 kernel_mapping_needs_unmap = FALSE;
8918                 kr = vm_paging_map_object(page,
8919                                           page->object,
8920                                           page->offset,
8921                                           VM_PROT_READ | VM_PROT_WRITE,
8922                                           FALSE,
8923                                           &kernel_mapping_size,
8924                                           &kernel_mapping_offset,
8925                                           &kernel_mapping_needs_unmap);
8926                 if (kr != KERN_SUCCESS) {
8927                         panic("vm_page_decrypt: "
8928                               "could not map page in kernel: 0x%x\n",
8929                               kr);
8930                 }
8931         } else {
8932                 kernel_mapping_size = 0;
8933                 kernel_mapping_needs_unmap = FALSE;
8934         }
8935         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
8936
8937         assert(swap_crypt_ctx_initialized);
8938
8939         /*
8940          * Prepare an "initial vector" for the decryption.
8941          * It has to be the same as the "initial vector" we
8942          * used to encrypt that page.
8943          */
8944         bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
8945         decrypt_iv.vm.pager_object = page->object->pager;
8946         decrypt_iv.vm.paging_offset =
8947                 page->object->paging_offset + page->offset;
8948
8949         /* encrypt the "initial vector" */
8950         aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
8951                         swap_crypt_null_iv,
8952                         1,
8953                         &decrypt_iv.aes_iv[0],
8954                         &swap_crypt_ctx.encrypt);
8955
8956         /*
8957          * Decrypt the page.
8958          */
8959         aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
8960                         &decrypt_iv.aes_iv[0],
8961                         PAGE_SIZE / AES_BLOCK_SIZE,
8962                         (unsigned char *) kernel_vaddr,
8963                         &swap_crypt_ctx.decrypt);
8964         vm_page_decrypt_counter++;
8965
8966         /*
8967          * Unmap the page from the kernel's address space,
8968          * if we had to map it ourselves.  Otherwise, let
8969          * the caller undo the mapping if needed.
8970          */
8971         if (kernel_mapping_needs_unmap) {
8972                 vm_paging_unmap_object(page->object,
8973                                        kernel_vaddr,
8974                                        kernel_vaddr + PAGE_SIZE);
8975         }
8976
8977         if (was_dirty) {
8978                 /*
8979                  * The pager did not specify that the page would be
8980                  * clean when it got paged in, so let's not clean it here
8981                  * either.
8982                  */
8983         } else {
8984                 /*
8985                  * After decryption, the page is actually still clean.
8986                  * It was encrypted as part of paging, which "cleans"
8987                  * the "dirty" pages.
8988                  * Noone could access it after it was encrypted
8989                  * and the decryption doesn't count.
8990                  */
8991                 page->dirty = FALSE;
8992                 assert (page->cs_validated == FALSE);
8993                 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
8994         }
8995         page->encrypted = FALSE;
8996
8997         /*
8998          * We've just modified the page's contents via the data cache and part
8999          * of the new contents might still be in the cache and not yet in RAM.
9000          * Since the page is now available and might get gathered in a UPL to
9001          * be part of a DMA transfer from a driver that expects the memory to
9002          * be coherent at this point, we have to flush the data cache.
9003          */
9004         pmap_sync_page_attributes_phys(page->phys_page);
9005         /*
9006          * Since the page is not mapped yet, some code might assume that it
9007          * doesn't need to invalidate the instruction cache when writing to
9008          * that page.  That code relies on "pmapped" being FALSE, so that the
9009          * caches get synchronized when the page is first mapped.
9010          */
9011         assert(pmap_verify_free(page->phys_page));
9012         page->pmapped = FALSE;
9013         page->wpmapped = FALSE;
9014
9015         vm_object_paging_end(page->object);
9016 }
9017
9018 #if DEVELOPMENT || DEBUG
9019 unsigned long upl_encrypt_upls = 0;
9020 unsigned long upl_encrypt_pages = 0;
9021 #endif
9022
9023 /*
9024  * ENCRYPTED SWAP:
9025  *
9026  * upl_encrypt:
9027  *      Encrypts all the pages in the UPL, within the specified range.
9028  *
9029  */
9030 void
9031 upl_encrypt(
9032         upl_t                   upl,
9033         upl_offset_t            crypt_offset,
9034         upl_size_t              crypt_size)
9035 {
9036         upl_size_t              upl_size, subupl_size=crypt_size;
9037         upl_offset_t            offset_in_upl, subupl_offset=crypt_offset;
9038         vm_object_t             upl_object;
9039         vm_object_offset_t      upl_offset;
9040         vm_page_t               page;
9041         vm_object_t             shadow_object;
9042         vm_object_offset_t      shadow_offset;
9043         vm_object_offset_t      paging_offset;
9044         vm_object_offset_t      base_offset;
9045         int                     isVectorUPL = 0;
9046         upl_t                   vector_upl = NULL;
9047
9048         if((isVectorUPL = vector_upl_is_valid(upl)))
9049                 vector_upl = upl;
9050
9051 process_upl_to_encrypt:
9052         if(isVectorUPL) {
9053                 crypt_size = subupl_size;
9054                 crypt_offset = subupl_offset;
9055                 upl =  vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
9056                 if(upl == NULL)
9057                         panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
9058                 subupl_size -= crypt_size;
9059                 subupl_offset += crypt_size;
9060         }
9061
9062 #if DEVELOPMENT || DEBUG
9063         upl_encrypt_upls++;
9064         upl_encrypt_pages += crypt_size / PAGE_SIZE;
9065 #endif
9066         upl_object = upl->map_object;
9067         upl_offset = upl->offset;
9068         upl_size = upl->size;
9069
9070         vm_object_lock(upl_object);
9071
9072         /*
9073          * Find the VM object that contains the actual pages.
9074          */
9075         if (upl_object->pageout) {
9076                 shadow_object = upl_object->shadow;
9077                 /*
9078                  * The offset in the shadow object is actually also
9079                  * accounted for in upl->offset.  It possibly shouldn't be
9080                  * this way, but for now don't account for it twice.
9081                  */
9082                 shadow_offset = 0;
9083                 assert(upl_object->paging_offset == 0); /* XXX ? */
9084                 vm_object_lock(shadow_object);
9085         } else {
9086                 shadow_object = upl_object;
9087                 shadow_offset = 0;
9088         }
9089
9090         paging_offset = shadow_object->paging_offset;
9091         vm_object_paging_begin(shadow_object);
9092
9093         if (shadow_object != upl_object)
9094                 vm_object_unlock(upl_object);
9095
9096
9097         base_offset = shadow_offset;
9098         base_offset += upl_offset;
9099         base_offset += crypt_offset;
9100         base_offset -= paging_offset;
9101
9102         assert(crypt_offset + crypt_size <= upl_size);
9103
9104         for (offset_in_upl = 0;
9105              offset_in_upl < crypt_size;
9106              offset_in_upl += PAGE_SIZE) {
9107                 page = vm_page_lookup(shadow_object,
9108                                       base_offset + offset_in_upl);
9109                 if (page == VM_PAGE_NULL) {
9110                         panic("upl_encrypt: "
9111                               "no page for (obj=%p,off=0x%llx+0x%x)!\n",
9112                               shadow_object,
9113                               base_offset,
9114                               offset_in_upl);
9115                 }
9116                 /*
9117                  * Disconnect the page from all pmaps, so that nobody can
9118                  * access it while it's encrypted.  After that point, all
9119                  * accesses to this page will cause a page fault and block
9120                  * while the page is busy being encrypted.  After the
9121                  * encryption completes, any access will cause a
9122                  * page fault and the page gets decrypted at that time.
9123                  */
9124                 pmap_disconnect(page->phys_page);
9125                 vm_page_encrypt(page, 0);
9126
9127                 if (vm_object_lock_avoid(shadow_object)) {
9128                         /*
9129                          * Give vm_pageout_scan() a chance to convert more
9130                          * pages from "clean-in-place" to "clean-and-free",
9131                          * if it's interested in the same pages we selected
9132                          * in this cluster.
9133                          */
9134                         vm_object_unlock(shadow_object);
9135                         mutex_pause(2);
9136                         vm_object_lock(shadow_object);
9137                 }
9138         }
9139
9140         vm_object_paging_end(shadow_object);
9141         vm_object_unlock(shadow_object);
9142
9143         if(isVectorUPL && subupl_size)
9144                 goto process_upl_to_encrypt;
9145 }
9146
9147 #else /* ENCRYPTED_SWAP */
9148 void
9149 upl_encrypt(
9150         __unused upl_t                  upl,
9151         __unused upl_offset_t   crypt_offset,
9152         __unused upl_size_t     crypt_size)
9153 {
9154 }
9155
9156 void
9157 vm_page_encrypt(
9158         __unused vm_page_t              page,
9159         __unused vm_map_offset_t        kernel_mapping_offset)
9160 {
9161 }
9162
9163 void
9164 vm_page_decrypt(
9165         __unused vm_page_t              page,
9166         __unused vm_map_offset_t        kernel_mapping_offset)
9167 {
9168 }
9169
9170 #endif /* ENCRYPTED_SWAP */
9171
9172 /*
9173  * page->object must be locked
9174  */
9175 void
9176 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9177 {
9178         if (!queues_locked) {
9179                 vm_page_lockspin_queues();
9180         }
9181
9182         /*
9183          * need to drop the laundry count...
9184          * we may also need to remove it
9185          * from the I/O paging queue...
9186          * vm_pageout_throttle_up handles both cases
9187          *
9188          * the laundry and pageout_queue flags are cleared...
9189          */
9190         vm_pageout_throttle_up(page);
9191
9192         vm_page_steal_pageout_page++;
9193
9194         if (!queues_locked) {
9195                 vm_page_unlock_queues();
9196         }
9197 }
9198
9199 upl_t
9200 vector_upl_create(vm_offset_t upl_offset)
9201 {
9202         int     vector_upl_size  = sizeof(struct _vector_upl);
9203         int i=0;
9204         upl_t   upl;
9205         vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9206
9207         upl = upl_create(0,UPL_VECTOR,0);
9208         upl->vector_upl = vector_upl;
9209         upl->offset = upl_offset;
9210         vector_upl->size = 0;
9211         vector_upl->offset = upl_offset;
9212         vector_upl->invalid_upls=0;
9213         vector_upl->num_upls=0;
9214         vector_upl->pagelist = NULL;
9215
9216         for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9217                 vector_upl->upl_iostates[i].size = 0;
9218                 vector_upl->upl_iostates[i].offset = 0;
9219
9220         }
9221         return upl;
9222 }
9223
9224 void
9225 vector_upl_deallocate(upl_t upl)
9226 {
9227         if(upl) {
9228                 vector_upl_t vector_upl = upl->vector_upl;
9229                 if(vector_upl) {
9230                         if(vector_upl->invalid_upls != vector_upl->num_upls)
9231                                 panic("Deallocating non-empty Vectored UPL\n");
9232                         kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9233                         vector_upl->invalid_upls=0;
9234                         vector_upl->num_upls = 0;
9235                         vector_upl->pagelist = NULL;
9236                         vector_upl->size = 0;
9237                         vector_upl->offset = 0;
9238                         kfree(vector_upl, sizeof(struct _vector_upl));
9239                         vector_upl = (vector_upl_t)0xfeedfeed;
9240                 }
9241                 else
9242                         panic("vector_upl_deallocate was passed a non-vectored upl\n");
9243         }
9244         else
9245                 panic("vector_upl_deallocate was passed a NULL upl\n");
9246 }
9247
9248 boolean_t
9249 vector_upl_is_valid(upl_t upl)
9250 {
9251         if(upl &&  ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9252                 vector_upl_t vector_upl = upl->vector_upl;
9253                 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9254                         return FALSE;
9255                 else
9256                         return TRUE;
9257         }
9258         return FALSE;
9259 }
9260
9261 boolean_t
9262 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9263 {
9264         if(vector_upl_is_valid(upl)) {
9265                 vector_upl_t vector_upl = upl->vector_upl;
9266
9267                 if(vector_upl) {
9268                         if(subupl) {
9269                                 if(io_size) {
9270                                         if(io_size < PAGE_SIZE)
9271                                                 io_size = PAGE_SIZE;
9272                                         subupl->vector_upl = (void*)vector_upl;
9273                                         vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9274                                         vector_upl->size += io_size;
9275                                         upl->size += io_size;
9276                                 }
9277                                 else {
9278                                         uint32_t i=0,invalid_upls=0;
9279                                         for(i = 0; i < vector_upl->num_upls; i++) {
9280                                                 if(vector_upl->upl_elems[i] == subupl)
9281                                                         break;
9282                                         }
9283                                         if(i == vector_upl->num_upls)
9284                                                 panic("Trying to remove sub-upl when none exists");
9285
9286                                         vector_upl->upl_elems[i] = NULL;
9287                                         invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9288                                         if(invalid_upls == vector_upl->num_upls)
9289                                                 return TRUE;
9290                                         else
9291                                                 return FALSE;
9292                                 }
9293                         }
9294                         else
9295                                 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9296                 }
9297                 else
9298                         panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9299         }
9300         else
9301                 panic("vector_upl_set_subupl was passed a NULL upl\n");
9302
9303         return FALSE;
9304 }
9305
9306 void
9307 vector_upl_set_pagelist(upl_t upl)
9308 {
9309         if(vector_upl_is_valid(upl)) {
9310                 uint32_t i=0;
9311                 vector_upl_t vector_upl = upl->vector_upl;
9312
9313                 if(vector_upl) {
9314                         vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
9315
9316                         vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9317
9318                         for(i=0; i < vector_upl->num_upls; i++) {
9319                                 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9320                                 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9321                                 pagelist_size += cur_upl_pagelist_size;
9322                                 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9323                                         upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9324                         }
9325                         assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9326                 }
9327                 else
9328                         panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9329         }
9330         else
9331                 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9332
9333 }
9334
9335 upl_t
9336 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9337 {
9338         if(vector_upl_is_valid(upl)) {
9339                 vector_upl_t vector_upl = upl->vector_upl;
9340                 if(vector_upl) {
9341                         if(index < vector_upl->num_upls)
9342                                 return vector_upl->upl_elems[index];
9343                 }
9344                 else
9345                         panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9346         }
9347         return NULL;
9348 }
9349
9350 upl_t
9351 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9352 {
9353         if(vector_upl_is_valid(upl)) {
9354                 uint32_t i=0;
9355                 vector_upl_t vector_upl = upl->vector_upl;
9356
9357                 if(vector_upl) {
9358                         upl_t subupl = NULL;
9359                         vector_upl_iostates_t subupl_state;
9360
9361                         for(i=0; i < vector_upl->num_upls; i++) {
9362                                 subupl = vector_upl->upl_elems[i];
9363                                 subupl_state = vector_upl->upl_iostates[i];
9364                                 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9365                                         /* We could have been passed an offset/size pair that belongs
9366                                          * to an UPL element that has already been committed/aborted.
9367                                          * If so, return NULL.
9368                                          */
9369                                         if(subupl == NULL)
9370                                                 return NULL;
9371                                         if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9372                                                 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9373                                                 if(*upl_size > subupl_state.size)
9374                                                         *upl_size = subupl_state.size;
9375                                         }
9376                                         if(*upl_offset >= subupl_state.offset)
9377                                                 *upl_offset -= subupl_state.offset;
9378                                         else if(i)
9379                                                 panic("Vector UPL offset miscalculation\n");
9380                                         return subupl;
9381                                 }
9382                         }
9383                 }
9384                 else
9385                         panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9386         }
9387         return NULL;
9388 }
9389
9390 void
9391 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9392 {
9393         *v_upl_submap = NULL;
9394
9395         if(vector_upl_is_valid(upl)) {
9396                 vector_upl_t vector_upl = upl->vector_upl;
9397                 if(vector_upl) {
9398                         *v_upl_submap = vector_upl->submap;
9399                         *submap_dst_addr = vector_upl->submap_dst_addr;
9400                 }
9401                 else
9402                         panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9403         }
9404         else
9405                 panic("vector_upl_get_submap was passed a null UPL\n");
9406 }
9407
9408 void
9409 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9410 {
9411         if(vector_upl_is_valid(upl)) {
9412                 vector_upl_t vector_upl = upl->vector_upl;
9413                 if(vector_upl) {
9414                         vector_upl->submap = submap;
9415                         vector_upl->submap_dst_addr = submap_dst_addr;
9416                 }
9417                 else
9418                         panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9419         }
9420         else
9421                 panic("vector_upl_get_submap was passed a NULL UPL\n");
9422 }
9423
9424 void
9425 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9426 {
9427         if(vector_upl_is_valid(upl)) {
9428                 uint32_t i = 0;
9429                 vector_upl_t vector_upl = upl->vector_upl;
9430
9431                 if(vector_upl) {
9432                         for(i = 0; i < vector_upl->num_upls; i++) {
9433                                 if(vector_upl->upl_elems[i] == subupl)
9434                                         break;
9435                         }
9436
9437                         if(i == vector_upl->num_upls)
9438                                 panic("setting sub-upl iostate when none exists");
9439
9440                         vector_upl->upl_iostates[i].offset = offset;
9441                         if(size < PAGE_SIZE)
9442                                 size = PAGE_SIZE;
9443                         vector_upl->upl_iostates[i].size = size;
9444                 }
9445                 else
9446                         panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9447         }
9448         else
9449                 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9450 }
9451
9452 void
9453 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9454 {
9455         if(vector_upl_is_valid(upl)) {
9456                 uint32_t i = 0;
9457                 vector_upl_t vector_upl = upl->vector_upl;
9458
9459                 if(vector_upl) {
9460                         for(i = 0; i < vector_upl->num_upls; i++) {
9461                                 if(vector_upl->upl_elems[i] == subupl)
9462                                         break;
9463                         }
9464
9465                         if(i == vector_upl->num_upls)
9466                                 panic("getting sub-upl iostate when none exists");
9467
9468                         *offset = vector_upl->upl_iostates[i].offset;
9469                         *size = vector_upl->upl_iostates[i].size;
9470                 }
9471                 else
9472                         panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9473         }
9474         else
9475                 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9476 }
9477
9478 void
9479 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9480 {
9481         if(vector_upl_is_valid(upl)) {
9482                 vector_upl_t vector_upl = upl->vector_upl;
9483                 if(vector_upl) {
9484                         if(index < vector_upl->num_upls) {
9485                                 *offset = vector_upl->upl_iostates[index].offset;
9486                                 *size = vector_upl->upl_iostates[index].size;
9487                         }
9488                         else
9489                                 *offset = *size = 0;
9490                 }
9491                 else
9492                         panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9493         }
9494         else
9495                 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9496 }
9497
9498 upl_page_info_t *
9499 upl_get_internal_vectorupl_pagelist(upl_t upl)
9500 {
9501         return ((vector_upl_t)(upl->vector_upl))->pagelist;
9502 }
9503
9504 void *
9505 upl_get_internal_vectorupl(upl_t upl)
9506 {
9507         return upl->vector_upl;
9508 }
9509
9510 vm_size_t
9511 upl_get_internal_pagelist_offset(void)
9512 {
9513         return sizeof(struct upl);
9514 }
9515
9516 void
9517 upl_clear_dirty(
9518         upl_t           upl,
9519         boolean_t       value)
9520 {
9521         if (value) {
9522                 upl->flags |= UPL_CLEAR_DIRTY;
9523         } else {
9524                 upl->flags &= ~UPL_CLEAR_DIRTY;
9525         }
9526 }
9527
9528 void
9529 upl_set_referenced(
9530         upl_t           upl,
9531         boolean_t       value)
9532 {
9533         upl_lock(upl);
9534         if (value) {
9535                 upl->ext_ref_count++;
9536         } else {
9537                 if (!upl->ext_ref_count) {
9538                         panic("upl_set_referenced not %p\n", upl);
9539                 }
9540                 upl->ext_ref_count--;
9541         }
9542         upl_unlock(upl);
9543 }
9544
9545 #if CONFIG_IOSCHED
9546 void
9547 upl_set_blkno(
9548         upl_t           upl,
9549         vm_offset_t     upl_offset,
9550         int             io_size,
9551         int64_t         blkno)
9552 {
9553                 int i,j;
9554                 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
9555                         return;
9556
9557                 assert(upl->upl_reprio_info != 0);
9558                 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9559                         UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9560                 }
9561 }
9562 #endif
9563
9564 boolean_t
9565 vm_page_is_slideable(vm_page_t m)
9566 {
9567         boolean_t result = FALSE;
9568         vm_shared_region_slide_info_t si;
9569
9570         vm_object_lock_assert_held(m->object);
9571
9572         /* make sure our page belongs to the one object allowed to do this */
9573         if (!m->object->object_slid) {
9574                 goto done;
9575         }
9576
9577         si = m->object->vo_slide_info;
9578         if (si == NULL) {
9579                 goto done;
9580         }
9581
9582         if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
9583                 result = TRUE;
9584         }
9585
9586 done:
9587         return result;
9588 }
9589
9590 int vm_page_slide_counter = 0;
9591 int vm_page_slide_errors = 0;
9592 kern_return_t
9593 vm_page_slide(
9594         vm_page_t       page,
9595         vm_map_offset_t kernel_mapping_offset)
9596 {
9597         kern_return_t           kr;
9598         vm_map_size_t           kernel_mapping_size;
9599         boolean_t               kernel_mapping_needs_unmap;
9600         vm_offset_t             kernel_vaddr;
9601         uint32_t                pageIndex = 0;
9602
9603         assert(!page->slid);
9604         assert(page->object->object_slid);
9605         vm_object_lock_assert_exclusive(page->object);
9606
9607         if (page->error)
9608                 return KERN_FAILURE;
9609
9610         /*
9611          * Take a paging-in-progress reference to keep the object
9612          * alive even if we have to unlock it (in vm_paging_map_object()
9613          * for example)...
9614          */
9615         vm_object_paging_begin(page->object);
9616
9617         if (kernel_mapping_offset == 0) {
9618                 /*
9619                  * The page hasn't already been mapped in kernel space
9620                  * by the caller.  Map it now, so that we can access
9621                  * its contents and decrypt them.
9622                  */
9623                 kernel_mapping_size = PAGE_SIZE;
9624                 kernel_mapping_needs_unmap = FALSE;
9625                 kr = vm_paging_map_object(page,
9626                                           page->object,
9627                                           page->offset,
9628                                           VM_PROT_READ | VM_PROT_WRITE,
9629                                           FALSE,
9630                                           &kernel_mapping_size,
9631                                           &kernel_mapping_offset,
9632                                           &kernel_mapping_needs_unmap);
9633                 if (kr != KERN_SUCCESS) {
9634                         panic("vm_page_slide: "
9635                               "could not map page in kernel: 0x%x\n",
9636                               kr);
9637                 }
9638         } else {
9639                 kernel_mapping_size = 0;
9640                 kernel_mapping_needs_unmap = FALSE;
9641         }
9642         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
9643
9644         /*
9645          * Slide the pointers on the page.
9646          */
9647
9648         /*assert that slide_file_info.start/end are page-aligned?*/
9649
9650         assert(!page->slid);
9651         assert(page->object->object_slid);
9652
9653         /* on some platforms this is an extern int, on others it's a cpp macro */
9654         __unreachable_ok_push
9655         /* TODO: Consider this */
9656         if (!TEST_PAGE_SIZE_4K) {
9657                 for (int i = 0; i < 4; i++) {
9658                         pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/0x1000);
9659                         kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr + (0x1000*i), pageIndex + i);
9660                 }
9661         } else {
9662                 pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/PAGE_SIZE);
9663                 kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr, pageIndex);
9664         }
9665         __unreachable_ok_pop
9666
9667         vm_page_slide_counter++;
9668
9669         /*
9670          * Unmap the page from the kernel's address space,
9671          */
9672         if (kernel_mapping_needs_unmap) {
9673                 vm_paging_unmap_object(page->object,
9674                                        kernel_vaddr,
9675                                        kernel_vaddr + PAGE_SIZE);
9676         }
9677
9678         page->dirty = FALSE;
9679         pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
9680
9681         if (kr != KERN_SUCCESS || cs_debug > 1) {
9682                 printf("vm_page_slide(%p): "
9683                        "obj %p off 0x%llx mobj %p moff 0x%llx\n",
9684                        page,
9685                        page->object, page->offset,
9686                        page->object->pager,
9687                        page->offset + page->object->paging_offset);
9688         }
9689
9690         if (kr == KERN_SUCCESS) {
9691                 page->slid = TRUE;
9692         } else {
9693                 page->error = TRUE;
9694                 vm_page_slide_errors++;
9695         }
9696
9697         vm_object_paging_end(page->object);
9698
9699         return kr;
9700 }
9701
9702 void inline memoryshot(unsigned int event, unsigned int control)
9703 {
9704         if (vm_debug_events) {
9705                 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9706                                         vm_page_active_count, vm_page_inactive_count,
9707                                         vm_page_free_count, vm_page_speculative_count,
9708                                         vm_page_throttled_count);
9709         } else {
9710                 (void) event;
9711                 (void) control;
9712         }
9713
9714 }
9715
9716 #ifdef MACH_BSD
9717
9718 boolean_t  upl_device_page(upl_page_info_t *upl)
9719 {
9720         return(UPL_DEVICE_PAGE(upl));
9721 }
9722 boolean_t  upl_page_present(upl_page_info_t *upl, int index)
9723 {
9724         return(UPL_PAGE_PRESENT(upl, index));
9725 }
9726 boolean_t  upl_speculative_page(upl_page_info_t *upl, int index)
9727 {
9728         return(UPL_SPECULATIVE_PAGE(upl, index));
9729 }
9730 boolean_t  upl_dirty_page(upl_page_info_t *upl, int index)
9731 {
9732         return(UPL_DIRTY_PAGE(upl, index));
9733 }
9734 boolean_t  upl_valid_page(upl_page_info_t *upl, int index)
9735 {
9736         return(UPL_VALID_PAGE(upl, index));
9737 }
9738 ppnum_t  upl_phys_page(upl_page_info_t *upl, int index)
9739 {
9740         return(UPL_PHYS_PAGE(upl, index));
9741 }
9742
9743 void
9744 vm_countdirtypages(void)
9745 {
9746         vm_page_t m;
9747         int dpages;
9748         int pgopages;
9749         int precpages;
9750
9751
9752         dpages=0;
9753         pgopages=0;
9754         precpages=0;
9755
9756         vm_page_lock_queues();
9757         m = (vm_page_t) queue_first(&vm_page_queue_inactive);
9758         do {
9759                 if (m ==(vm_page_t )0) break;
9760
9761                 if(m->dirty) dpages++;
9762                 if(m->pageout) pgopages++;
9763                 if(m->precious) precpages++;
9764
9765                 assert(m->object != kernel_object);
9766                 m = (vm_page_t) queue_next(&m->pageq);
9767                 if (m ==(vm_page_t )0) break;
9768
9769         } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
9770         vm_page_unlock_queues();
9771
9772         vm_page_lock_queues();
9773         m = (vm_page_t) queue_first(&vm_page_queue_throttled);
9774         do {
9775                 if (m ==(vm_page_t )0) break;
9776
9777                 dpages++;
9778                 assert(m->dirty);
9779                 assert(!m->pageout);
9780                 assert(m->object != kernel_object);
9781                 m = (vm_page_t) queue_next(&m->pageq);
9782                 if (m ==(vm_page_t )0) break;
9783
9784         } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m));
9785         vm_page_unlock_queues();
9786
9787         vm_page_lock_queues();
9788         m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
9789         do {
9790                 if (m ==(vm_page_t )0) break;
9791
9792                 if(m->dirty) dpages++;
9793                 if(m->pageout) pgopages++;
9794                 if(m->precious) precpages++;
9795
9796                 assert(m->object != kernel_object);
9797                 m = (vm_page_t) queue_next(&m->pageq);
9798                 if (m ==(vm_page_t )0) break;
9799
9800         } while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m));
9801         vm_page_unlock_queues();
9802
9803         printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9804
9805         dpages=0;
9806         pgopages=0;
9807         precpages=0;
9808
9809         vm_page_lock_queues();
9810         m = (vm_page_t) queue_first(&vm_page_queue_active);
9811
9812         do {
9813                 if(m == (vm_page_t )0) break;
9814                 if(m->dirty) dpages++;
9815                 if(m->pageout) pgopages++;
9816                 if(m->precious) precpages++;
9817
9818                 assert(m->object != kernel_object);
9819                 m = (vm_page_t) queue_next(&m->pageq);
9820                 if(m == (vm_page_t )0) break;
9821
9822         } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
9823         vm_page_unlock_queues();
9824
9825         printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9826
9827 }
9828 #endif /* MACH_BSD */
9829
9830 ppnum_t upl_get_highest_page(
9831                              upl_t                      upl)
9832 {
9833         return upl->highest_page;
9834 }
9835
9836 upl_size_t upl_get_size(
9837                              upl_t                      upl)
9838 {
9839         return upl->size;
9840 }
9841
9842 #if UPL_DEBUG
9843 kern_return_t  upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
9844 {
9845         upl->ubc_alias1 = alias1;
9846         upl->ubc_alias2 = alias2;
9847         return KERN_SUCCESS;
9848 }
9849 int  upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
9850 {
9851         if(al)
9852                 *al = upl->ubc_alias1;
9853         if(al2)
9854                 *al2 = upl->ubc_alias2;
9855         return KERN_SUCCESS;
9856 }
9857 #endif /* UPL_DEBUG */
9858
9859 #if VM_PRESSURE_EVENTS
9860 /*
9861  * Upward trajectory.
9862  */
9863 extern boolean_t vm_compressor_low_on_space(void);
9864
9865 boolean_t
9866 VM_PRESSURE_NORMAL_TO_WARNING(void)     {
9867
9868         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
9869
9870                 /* Available pages below our threshold */
9871                 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
9872                         /* No frozen processes to kill */
9873                         if (memorystatus_frozen_count == 0) {
9874                                 /* Not enough suspended processes available. */
9875                                 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
9876                                         return TRUE;
9877                                 }
9878                         }
9879                 }
9880                 return FALSE;
9881
9882         } else {
9883                 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
9884         }
9885 }
9886
9887 boolean_t
9888 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
9889
9890         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
9891                 /* Available pages below our threshold */
9892                 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
9893                         return TRUE;
9894                 }
9895                 return FALSE;
9896         } else {
9897                 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9898         }
9899 }
9900
9901 /*
9902  * Downward trajectory.
9903  */
9904 boolean_t
9905 VM_PRESSURE_WARNING_TO_NORMAL(void) {
9906
9907         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
9908                 /* Available pages above our threshold */
9909                 unsigned int target_threshold = memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100);
9910                 if (memorystatus_available_pages > target_threshold) {
9911                         return TRUE;
9912                 }
9913                 return FALSE;
9914         } else {
9915                 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
9916         }
9917 }
9918
9919 boolean_t
9920 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
9921
9922         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
9923                 /* Available pages above our threshold */
9924                 unsigned int target_threshold = memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100);
9925                 if (memorystatus_available_pages > target_threshold) {
9926                         return TRUE;
9927                 }
9928                 return FALSE;
9929         } else {
9930                 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9931         }
9932 }
9933 #endif /* VM_PRESSURE_EVENTS */
9934