osfmk/vm/vm_pageout.c

   1 /*
   2  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_pageout.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      The proverbial page-out daemon.
  64  */
  65
  66 #include <stdint.h>
  67
  68 #include <debug.h>
  69 #include <mach_pagemap.h>
  70 #include <mach_cluster_stats.h>
  71
  72 #include <mach/mach_types.h>
  73 #include <mach/memory_object.h>
  74 #include <mach/memory_object_default.h>
  75 #include <mach/memory_object_control_server.h>
  76 #include <mach/mach_host_server.h>
  77 #include <mach/upl.h>
  78 #include <mach/vm_map.h>
  79 #include <mach/vm_param.h>
  80 #include <mach/vm_statistics.h>
  81 #include <mach/sdt.h>
  82
  83 #include <kern/kern_types.h>
  84 #include <kern/counters.h>
  85 #include <kern/host_statistics.h>
  86 #include <kern/machine.h>
  87 #include <kern/misc_protos.h>
  88 #include <kern/sched.h>
  89 #include <kern/thread.h>
  90 #include <kern/xpr.h>
  91 #include <kern/kalloc.h>
  92 #include <kern/policy_internal.h>
  93 #include <kern/thread_group.h>
  94
  95 #include <machine/vm_tuning.h>
  96 #include <machine/commpage.h>
  97
  98 #include <vm/pmap.h>
  99 #include <vm/vm_compressor_pager.h>
 100 #include <vm/vm_fault.h>
 101 #include <vm/vm_map.h>
 102 #include <vm/vm_object.h>
 103 #include <vm/vm_page.h>
 104 #include <vm/vm_pageout.h>
 105 #include <vm/vm_protos.h> /* must be last */
 106 #include <vm/memory_object.h>
 107 #include <vm/vm_purgeable_internal.h>
 108 #include <vm/vm_shared_region.h>
 109 #include <vm/vm_compressor.h>
 110
 111 #include <san/kasan.h>
 112
 113 #if CONFIG_PHANTOM_CACHE
 114 #include <vm/vm_phantom_cache.h>
 115 #endif
 116
 117 extern int cs_debug;
 118
 119 #if UPL_DEBUG
 120 #include <libkern/OSDebug.h>
 121 #endif
 122
 123 extern void m_drain(void);
 124
 125 #if VM_PRESSURE_EVENTS
 126 #if CONFIG_JETSAM
 127 extern unsigned int memorystatus_available_pages;
 128 extern unsigned int memorystatus_available_pages_pressure;
 129 extern unsigned int memorystatus_available_pages_critical;
 130 #else /* CONFIG_JETSAM */
 131 extern uint64_t memorystatus_available_pages;
 132 extern uint64_t memorystatus_available_pages_pressure;
 133 extern uint64_t memorystatus_available_pages_critical;
 134 #endif /* CONFIG_JETSAM */
 135
 136 extern unsigned int memorystatus_frozen_count;
 137 extern unsigned int memorystatus_suspended_count;
 138
 139 extern vm_pressure_level_t memorystatus_vm_pressure_level;
 140 int memorystatus_purge_on_warning = 2;
 141 int memorystatus_purge_on_urgent = 5;
 142 int memorystatus_purge_on_critical = 8;
 143
 144 void vm_pressure_response(void);
 145 boolean_t vm_pressure_thread_running = FALSE;
 146 extern void consider_vm_pressure_events(void);
 147
 148 #define MEMORYSTATUS_SUSPENDED_THRESHOLD  4
 149 #endif /* VM_PRESSURE_EVENTS */
 150
 151 boolean_t       vm_pressure_changed = FALSE;
 152
 153 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE   /* maximum iterations of the active queue to move pages to inactive */
 154 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  100
 155 #endif
 156
 157 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
 158 #ifdef  CONFIG_EMBEDDED
 159 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
 160 #else
 161 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
 162 #endif
 163 #endif
 164
 165 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
 166 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
 167 #endif
 168
 169 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
 170 #define VM_PAGEOUT_INACTIVE_RELIEF 50   /* minimum number of pages to move to the inactive q */
 171 #endif
 172
 173 #ifndef VM_PAGE_LAUNDRY_MAX
 174 #define VM_PAGE_LAUNDRY_MAX     128UL   /* maximum pageouts on a given pageout queue */
 175 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
 176
 177 #ifndef VM_PAGEOUT_BURST_WAIT
 178 #define VM_PAGEOUT_BURST_WAIT   10      /* milliseconds */
 179 #endif  /* VM_PAGEOUT_BURST_WAIT */
 180
 181 #ifndef VM_PAGEOUT_EMPTY_WAIT
 182 #define VM_PAGEOUT_EMPTY_WAIT   200     /* milliseconds */
 183 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
 184
 185 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
 186 #define VM_PAGEOUT_DEADLOCK_WAIT        300     /* milliseconds */
 187 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
 188
 189 #ifndef VM_PAGEOUT_IDLE_WAIT
 190 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
 191 #endif  /* VM_PAGEOUT_IDLE_WAIT */
 192
 193 #ifndef VM_PAGEOUT_SWAP_WAIT
 194 #define VM_PAGEOUT_SWAP_WAIT    50      /* milliseconds */
 195 #endif  /* VM_PAGEOUT_SWAP_WAIT */
 196
 197 #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
 198 #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED            1000    /* maximum pages considered before we issue a pressure event */
 199 #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
 200
 201 #ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
 202 #define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS          5       /* seconds */
 203 #endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
 204
 205 unsigned int    vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
 206 unsigned int    vm_page_speculative_percentage = 5;
 207
 208 #ifndef VM_PAGE_SPECULATIVE_TARGET
 209 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
 210 #endif /* VM_PAGE_SPECULATIVE_TARGET */
 211
 212
 213 /*
 214  *      To obtain a reasonable LRU approximation, the inactive queue
 215  *      needs to be large enough to give pages on it a chance to be
 216  *      referenced a second time.  This macro defines the fraction
 217  *      of active+inactive pages that should be inactive.
 218  *      The pageout daemon uses it to update vm_page_inactive_target.
 219  *
 220  *      If vm_page_free_count falls below vm_page_free_target and
 221  *      vm_page_inactive_count is below vm_page_inactive_target,
 222  *      then the pageout daemon starts running.
 223  */
 224
 225 #ifndef VM_PAGE_INACTIVE_TARGET
 226 #ifdef  CONFIG_EMBEDDED
 227 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 3)
 228 #else
 229 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 2)
 230 #endif
 231 #endif  /* VM_PAGE_INACTIVE_TARGET */
 232
 233 /*
 234  *      Once the pageout daemon starts running, it keeps going
 235  *      until vm_page_free_count meets or exceeds vm_page_free_target.
 236  */
 237
 238 #ifndef VM_PAGE_FREE_TARGET
 239 #ifdef  CONFIG_EMBEDDED
 240 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 100)
 241 #else
 242 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
 243 #endif
 244 #endif  /* VM_PAGE_FREE_TARGET */
 245
 246
 247 /*
 248  *      The pageout daemon always starts running once vm_page_free_count
 249  *      falls below vm_page_free_min.
 250  */
 251
 252 #ifndef VM_PAGE_FREE_MIN
 253 #ifdef  CONFIG_EMBEDDED
 254 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 200)
 255 #else
 256 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 100)
 257 #endif
 258 #endif  /* VM_PAGE_FREE_MIN */
 259
 260 #ifdef  CONFIG_EMBEDDED
 261 #define VM_PAGE_FREE_RESERVED_LIMIT     100
 262 #define VM_PAGE_FREE_MIN_LIMIT          1500
 263 #define VM_PAGE_FREE_TARGET_LIMIT       2000
 264 #else
 265 #define VM_PAGE_FREE_RESERVED_LIMIT     1700
 266 #define VM_PAGE_FREE_MIN_LIMIT          3500
 267 #define VM_PAGE_FREE_TARGET_LIMIT       4000
 268 #endif
 269
 270 /*
 271  *      When vm_page_free_count falls below vm_page_free_reserved,
 272  *      only vm-privileged threads can allocate pages.  vm-privilege
 273  *      allows the pageout daemon and default pager (and any other
 274  *      associated threads needed for default pageout) to continue
 275  *      operation by dipping into the reserved pool of pages.
 276  */
 277
 278 #ifndef VM_PAGE_FREE_RESERVED
 279 #define VM_PAGE_FREE_RESERVED(n)        \
 280         ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
 281 #endif  /* VM_PAGE_FREE_RESERVED */
 282
 283 /*
 284  *      When we dequeue pages from the inactive list, they are
 285  *      reactivated (ie, put back on the active queue) if referenced.
 286  *      However, it is possible to starve the free list if other
 287  *      processors are referencing pages faster than we can turn off
 288  *      the referenced bit.  So we limit the number of reactivations
 289  *      we will make per call of vm_pageout_scan().
 290  */
 291 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
 292 #ifndef VM_PAGE_REACTIVATE_LIMIT
 293 #ifdef  CONFIG_EMBEDDED
 294 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
 295 #else
 296 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
 297 #endif
 298 #endif  /* VM_PAGE_REACTIVATE_LIMIT */
 299 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM       1000
 300
 301
 302 extern boolean_t hibernate_cleaning_in_progress;
 303
 304 /*
 305  * Exported variable used to broadcast the activation of the pageout scan
 306  * Working Set uses this to throttle its use of pmap removes.  In this
 307  * way, code which runs within memory in an uncontested context does
 308  * not keep encountering soft faults.
 309  */
 310
 311 unsigned int    vm_pageout_scan_event_counter = 0;
 312
 313 /*
 314  * Forward declarations for internal routines.
 315  */
 316 struct cq {
 317         struct vm_pageout_queue *q;
 318         void                    *current_chead;
 319         char                    *scratch_buf;
 320         int                     id;
 321 };
 322
 323 struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
 324
 325
 326 #if VM_PRESSURE_EVENTS
 327 void vm_pressure_thread(void);
 328
 329 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
 330 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
 331
 332 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
 333 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
 334 #endif
 335 void vm_pageout_garbage_collect(int);
 336 static void vm_pageout_iothread_external(void);
 337 static void vm_pageout_iothread_internal(struct cq *cq);
 338 static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *, boolean_t);
 339
 340 extern void vm_pageout_continue(void);
 341 extern void vm_pageout_scan(void);
 342 void vm_tests(void); /* forward */
 343
 344 boolean_t       vm_restricted_to_single_processor = FALSE;
 345 #if !CONFIG_EMBEDDED
 346 static boolean_t vm_pageout_waiter  = FALSE;
 347 static boolean_t vm_pageout_running = FALSE;
 348 #endif /* !CONFIG_EMBEDDED */
 349
 350
 351 static thread_t vm_pageout_external_iothread = THREAD_NULL;
 352 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
 353
 354 unsigned int vm_pageout_reserved_internal = 0;
 355 unsigned int vm_pageout_reserved_really = 0;
 356
 357 unsigned int vm_pageout_swap_wait = 0;
 358 unsigned int vm_pageout_idle_wait = 0;          /* milliseconds */
 359 unsigned int vm_pageout_empty_wait = 0;         /* milliseconds */
 360 unsigned int vm_pageout_burst_wait = 0;         /* milliseconds */
 361 unsigned int vm_pageout_deadlock_wait = 0;      /* milliseconds */
 362 unsigned int vm_pageout_deadlock_relief = 0;
 363 unsigned int vm_pageout_inactive_relief = 0;
 364 unsigned int vm_pageout_burst_active_throttle = 0;
 365 unsigned int vm_pageout_burst_inactive_throttle = 0;
 366
 367 int     vm_upl_wait_for_pages = 0;
 368
 369
 370 /*
 371  *      These variables record the pageout daemon's actions:
 372  *      how many pages it looks at and what happens to those pages.
 373  *      No locking needed because only one thread modifies the variables.
 374  */
 375
 376 unsigned int vm_pageout_active = 0;             /* debugging */
 377 unsigned int vm_pageout_inactive = 0;           /* debugging */
 378 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
 379 unsigned int vm_pageout_inactive_forced = 0;    /* debugging */
 380 unsigned int vm_pageout_inactive_nolock = 0;    /* debugging */
 381 unsigned int vm_pageout_inactive_avoid = 0;     /* debugging */
 382 unsigned int vm_pageout_inactive_busy = 0;      /* debugging */
 383 unsigned int vm_pageout_inactive_error = 0;     /* debugging */
 384 unsigned int vm_pageout_inactive_absent = 0;    /* debugging */
 385 unsigned int vm_pageout_inactive_notalive = 0;  /* debugging */
 386 unsigned int vm_pageout_inactive_used = 0;      /* debugging */
 387 unsigned int vm_pageout_cache_evicted = 0;      /* debugging */
 388 unsigned int vm_pageout_inactive_clean = 0;     /* debugging */
 389 unsigned int vm_pageout_speculative_clean = 0;  /* debugging */
 390 unsigned int vm_pageout_speculative_dirty = 0;  /* debugging */
 391
 392 unsigned int vm_pageout_freed_from_cleaned = 0;
 393 unsigned int vm_pageout_freed_from_speculative = 0;
 394 unsigned int vm_pageout_freed_from_inactive_clean = 0;
 395 unsigned int vm_pageout_freed_after_compression = 0;
 396
 397 extern  uint32_t vm_compressor_pages_grabbed;
 398 extern  uint32_t c_segment_pages_compressed;
 399
 400 unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
 401
 402 unsigned int vm_pageout_cleaned_reclaimed = 0;          /* debugging; how many cleaned pages are reclaimed by the pageout scan */
 403 unsigned int vm_pageout_cleaned_reactivated = 0;        /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
 404 unsigned int vm_pageout_cleaned_reference_reactivated = 0;
 405 unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
 406 unsigned int vm_pageout_cleaned_fault_reactivated = 0;
 407 unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
 408 unsigned int vm_pageout_cleaned_busy = 0;
 409 unsigned int vm_pageout_cleaned_nolock = 0;
 410
 411 unsigned int vm_pageout_inactive_dirty_internal = 0;    /* debugging */
 412 unsigned int vm_pageout_inactive_dirty_external = 0;    /* debugging */
 413 unsigned int vm_pageout_inactive_deactivated = 0;       /* debugging */
 414 unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
 415 unsigned int vm_pageout_dirty_no_pager = 0;     /* debugging */
 416 unsigned int vm_pageout_purged_objects = 0;     /* used for sysctl vm stats */
 417 unsigned int vm_stat_discard = 0;               /* debugging */
 418 unsigned int vm_stat_discard_sent = 0;          /* debugging */
 419 unsigned int vm_stat_discard_failure = 0;       /* debugging */
 420 unsigned int vm_stat_discard_throttle = 0;      /* debugging */
 421 unsigned int vm_pageout_reactivation_limit_exceeded = 0;        /* debugging */
 422 unsigned int vm_pageout_inactive_force_reclaim = 0;     /* debugging */
 423 unsigned int vm_pageout_skipped_external = 0;   /* debugging */
 424
 425 unsigned int vm_pageout_scan_reclaimed_throttled = 0;
 426 unsigned int vm_pageout_scan_active_throttled = 0;
 427 unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
 428 unsigned int vm_pageout_scan_inactive_throttled_external = 0;
 429 unsigned int vm_pageout_scan_throttle = 0;                      /* debugging */
 430 unsigned int vm_pageout_scan_burst_throttle = 0;                /* debugging */
 431 unsigned int vm_pageout_scan_empty_throttle = 0;                /* debugging */
 432 unsigned int vm_pageout_scan_swap_throttle = 0;         /* debugging */
 433 unsigned int vm_pageout_scan_deadlock_detected = 0;             /* debugging */
 434 unsigned int vm_pageout_scan_active_throttle_success = 0;       /* debugging */
 435 unsigned int vm_pageout_scan_inactive_throttle_success = 0;     /* debugging */
 436 unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0;      /* debugging */
 437 unsigned int vm_pageout_scan_throttle_deferred = 0;             /* debugging */
 438 unsigned int vm_pageout_scan_yield_unthrottled = 0;             /* debugging */
 439 unsigned int vm_page_speculative_count_drifts = 0;
 440 unsigned int vm_page_speculative_count_drift_max = 0;
 441
 442 uint32_t vm_compressor_failed;
 443
 444 /*
 445  * Backing store throttle when BS is exhausted
 446  */
 447 unsigned int    vm_backing_store_low = 0;
 448
 449 unsigned int vm_pageout_out_of_line  = 0;
 450 unsigned int vm_pageout_in_place  = 0;
 451
 452 unsigned int vm_page_steal_pageout_page = 0;
 453
 454 struct  vm_config       vm_config;
 455
 456 struct  vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 457 struct  vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 458
 459 unsigned int vm_page_speculative_target = 0;
 460
 461 vm_object_t     vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 462
 463 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
 464
 465 #if DEVELOPMENT || DEBUG
 466 unsigned long vm_cs_validated_resets = 0;
 467 #endif
 468
 469 int     vm_debug_events = 0;
 470
 471 #if CONFIG_MEMORYSTATUS
 472 #if !CONFIG_JETSAM
 473 extern boolean_t memorystatus_idle_exit_from_VM(void);
 474 #endif
 475 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
 476 extern void memorystatus_on_pageout_scan_end(void);
 477
 478 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
 479 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
 480 #if DEVELOPMENT || DEBUG
 481 uint32_t vm_grab_anon_overrides = 0;
 482 uint32_t vm_grab_anon_nops = 0;
 483 #endif
 484
 485 #endif
 486
 487 #if MACH_CLUSTER_STATS
 488 unsigned long vm_pageout_cluster_dirtied = 0;
 489 unsigned long vm_pageout_cluster_cleaned = 0;
 490 unsigned long vm_pageout_cluster_collisions = 0;
 491 unsigned long vm_pageout_cluster_clusters = 0;
 492 unsigned long vm_pageout_cluster_conversions = 0;
 493 unsigned long vm_pageout_target_collisions = 0;
 494 unsigned long vm_pageout_target_page_dirtied = 0;
 495 unsigned long vm_pageout_target_page_freed = 0;
 496 #define CLUSTER_STAT(clause)    clause
 497 #else   /* MACH_CLUSTER_STATS */
 498 #define CLUSTER_STAT(clause)
 499 #endif  /* MACH_CLUSTER_STATS */
 500
 501
 502 #if DEVELOPMENT || DEBUG
 503 vmct_stats_t vmct_stats;
 504 #endif
 505
 506 /*
 507  *      Routine:        vm_pageout_object_terminate
 508  *      Purpose:
 509  *              Destroy the pageout_object, and perform all of the
 510  *              required cleanup actions.
 511  *
 512  *      In/Out conditions:
 513  *              The object must be locked, and will be returned locked.
 514  */
 515 void
 516 vm_pageout_object_terminate(
 517         vm_object_t     object)
 518 {
 519         vm_object_t     shadow_object;
 520
 521         /*
 522          * Deal with the deallocation (last reference) of a pageout object
 523          * (used for cleaning-in-place) by dropping the paging references/
 524          * freeing pages in the original object.
 525          */
 526
 527         assert(object->pageout);
 528         shadow_object = object->shadow;
 529         vm_object_lock(shadow_object);
 530
 531         while (!vm_page_queue_empty(&object->memq)) {
 532                 vm_page_t               p, m;
 533                 vm_object_offset_t      offset;
 534
 535                 p = (vm_page_t) vm_page_queue_first(&object->memq);
 536
 537                 assert(p->private);
 538                 assert(p->free_when_done);
 539                 p->free_when_done = FALSE;
 540                 assert(!p->cleaning);
 541                 assert(!p->laundry);
 542
 543                 offset = p->offset;
 544                 VM_PAGE_FREE(p);
 545                 p = VM_PAGE_NULL;
 546
 547                 m = vm_page_lookup(shadow_object,
 548                         offset + object->vo_shadow_offset);
 549
 550                 if(m == VM_PAGE_NULL)
 551                         continue;
 552
 553                 assert((m->dirty) || (m->precious) ||
 554                                 (m->busy && m->cleaning));
 555
 556                 /*
 557                  * Handle the trusted pager throttle.
 558                  * Also decrement the burst throttle (if external).
 559                  */
 560                 vm_page_lock_queues();
 561                 if (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
 562                         vm_pageout_throttle_up(m);
 563
 564                 /*
 565                  * Handle the "target" page(s). These pages are to be freed if
 566                  * successfully cleaned. Target pages are always busy, and are
 567                  * wired exactly once. The initial target pages are not mapped,
 568                  * (so cannot be referenced or modified) but converted target
 569                  * pages may have been modified between the selection as an
 570                  * adjacent page and conversion to a target.
 571                  */
 572                 if (m->free_when_done) {
 573                         assert(m->busy);
 574                         assert(m->vm_page_q_state == VM_PAGE_IS_WIRED);
 575                         assert(m->wire_count == 1);
 576                         m->cleaning = FALSE;
 577                         m->free_when_done = FALSE;
 578 #if MACH_CLUSTER_STATS
 579                         if (m->wanted) vm_pageout_target_collisions++;
 580 #endif
 581                         /*
 582                          * Revoke all access to the page. Since the object is
 583                          * locked, and the page is busy, this prevents the page
 584                          * from being dirtied after the pmap_disconnect() call
 585                          * returns.
 586                          *
 587                          * Since the page is left "dirty" but "not modifed", we
 588                          * can detect whether the page was redirtied during
 589                          * pageout by checking the modify state.
 590                          */
 591                         if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
 592                                 SET_PAGE_DIRTY(m, FALSE);
 593                         } else {
 594                                 m->dirty = FALSE;
 595                         }
 596
 597                         if (m->dirty) {
 598                                 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
 599                                 vm_page_unwire(m, TRUE);        /* reactivates */
 600                                 VM_STAT_INCR(reactivations);
 601                                 PAGE_WAKEUP_DONE(m);
 602                         } else {
 603                                 CLUSTER_STAT(vm_pageout_target_page_freed++;)
 604                                 vm_page_free(m);/* clears busy, etc. */
 605                         }
 606                         vm_page_unlock_queues();
 607                         continue;
 608                 }
 609                 /*
 610                  * Handle the "adjacent" pages. These pages were cleaned in
 611                  * place, and should be left alone.
 612                  * If prep_pin_count is nonzero, then someone is using the
 613                  * page, so make it active.
 614                  */
 615                 if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) && !m->private) {
 616                         if (m->reference)
 617                                 vm_page_activate(m);
 618                         else
 619                                 vm_page_deactivate(m);
 620                 }
 621                 if (m->overwriting) {
 622                         /*
 623                          * the (COPY_OUT_FROM == FALSE) request_page_list case
 624                          */
 625                         if (m->busy) {
 626                                 /*
 627                                  * We do not re-set m->dirty !
 628                                  * The page was busy so no extraneous activity
 629                                  * could have occurred. COPY_INTO is a read into the
 630                                  * new pages. CLEAN_IN_PLACE does actually write
 631                                  * out the pages but handling outside of this code
 632                                  * will take care of resetting dirty. We clear the
 633                                  * modify however for the Programmed I/O case.
 634                                  */
 635                                 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 636
 637                                 m->busy = FALSE;
 638                                 m->absent = FALSE;
 639                         } else {
 640                                 /*
 641                                  * alternate (COPY_OUT_FROM == FALSE) request_page_list case
 642                                  * Occurs when the original page was wired
 643                                  * at the time of the list request
 644                                  */
 645                                  assert(VM_PAGE_WIRED(m));
 646                                  vm_page_unwire(m, TRUE);       /* reactivates */
 647                         }
 648                         m->overwriting = FALSE;
 649                 } else {
 650                         /*
 651                          * Set the dirty state according to whether or not the page was
 652                          * modified during the pageout. Note that we purposefully do
 653                          * NOT call pmap_clear_modify since the page is still mapped.
 654                          * If the page were to be dirtied between the 2 calls, this
 655                          * this fact would be lost. This code is only necessary to
 656                          * maintain statistics, since the pmap module is always
 657                          * consulted if m->dirty is false.
 658                          */
 659 #if MACH_CLUSTER_STATS
 660                         m->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m));
 661
 662                         if (m->dirty)   vm_pageout_cluster_dirtied++;
 663                         else            vm_pageout_cluster_cleaned++;
 664                         if (m->wanted)  vm_pageout_cluster_collisions++;
 665 #else
 666                         m->dirty = FALSE;
 667 #endif
 668                 }
 669                 m->cleaning = FALSE;
 670
 671                 /*
 672                  * Wakeup any thread waiting for the page to be un-cleaning.
 673                  */
 674                 PAGE_WAKEUP(m);
 675                 vm_page_unlock_queues();
 676         }
 677         /*
 678          * Account for the paging reference taken in vm_paging_object_allocate.
 679          */
 680         vm_object_activity_end(shadow_object);
 681         vm_object_unlock(shadow_object);
 682
 683         assert(object->ref_count == 0);
 684         assert(object->paging_in_progress == 0);
 685         assert(object->activity_in_progress == 0);
 686         assert(object->resident_page_count == 0);
 687         return;
 688 }
 689
 690 /*
 691  * Routine:     vm_pageclean_setup
 692  *
 693  * Purpose:     setup a page to be cleaned (made non-dirty), but not
 694  *              necessarily flushed from the VM page cache.
 695  *              This is accomplished by cleaning in place.
 696  *
 697  *              The page must not be busy, and new_object
 698  *              must be locked.
 699  *
 700  */
 701 static void
 702 vm_pageclean_setup(
 703         vm_page_t               m,
 704         vm_page_t               new_m,
 705         vm_object_t             new_object,
 706         vm_object_offset_t      new_offset)
 707 {
 708         assert(!m->busy);
 709 #if 0
 710         assert(!m->cleaning);
 711 #endif
 712
 713         XPR(XPR_VM_PAGEOUT,
 714             "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
 715                 VM_PAGE_OBJECT(m), m->offset, m,
 716                 new_m, new_offset);
 717
 718         pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 719
 720         /*
 721          * Mark original page as cleaning in place.
 722          */
 723         m->cleaning = TRUE;
 724         SET_PAGE_DIRTY(m, FALSE);
 725         m->precious = FALSE;
 726
 727         /*
 728          * Convert the fictitious page to a private shadow of
 729          * the real page.
 730          */
 731         assert(new_m->fictitious);
 732         assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
 733         new_m->fictitious = FALSE;
 734         new_m->private = TRUE;
 735         new_m->free_when_done = TRUE;
 736         VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
 737
 738         vm_page_lockspin_queues();
 739         vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
 740         vm_page_unlock_queues();
 741
 742         vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
 743         assert(!new_m->wanted);
 744         new_m->busy = FALSE;
 745 }
 746
 747 /*
 748  *      Routine:        vm_pageout_initialize_page
 749  *      Purpose:
 750  *              Causes the specified page to be initialized in
 751  *              the appropriate memory object. This routine is used to push
 752  *              pages into a copy-object when they are modified in the
 753  *              permanent object.
 754  *
 755  *              The page is moved to a temporary object and paged out.
 756  *
 757  *      In/out conditions:
 758  *              The page in question must not be on any pageout queues.
 759  *              The object to which it belongs must be locked.
 760  *              The page must be busy, but not hold a paging reference.
 761  *
 762  *      Implementation:
 763  *              Move this page to a completely new object.
 764  */
 765 void
 766 vm_pageout_initialize_page(
 767         vm_page_t       m)
 768 {
 769         vm_object_t             object;
 770         vm_object_offset_t      paging_offset;
 771         memory_object_t         pager;
 772
 773         XPR(XPR_VM_PAGEOUT,
 774                 "vm_pageout_initialize_page, page 0x%X\n",
 775                 m, 0, 0, 0, 0);
 776
 777         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
 778
 779         object = VM_PAGE_OBJECT(m);
 780
 781         assert(m->busy);
 782         assert(object->internal);
 783
 784         /*
 785          *      Verify that we really want to clean this page
 786          */
 787         assert(!m->absent);
 788         assert(!m->error);
 789         assert(m->dirty);
 790
 791         /*
 792          *      Create a paging reference to let us play with the object.
 793          */
 794         paging_offset = m->offset + object->paging_offset;
 795
 796         if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
 797                 panic("reservation without pageout?"); /* alan */
 798
 799                 VM_PAGE_FREE(m);
 800                 vm_object_unlock(object);
 801
 802                 return;
 803         }
 804
 805         /*
 806          * If there's no pager, then we can't clean the page.  This should
 807          * never happen since this should be a copy object and therefore not
 808          * an external object, so the pager should always be there.
 809          */
 810
 811         pager = object->pager;
 812
 813         if (pager == MEMORY_OBJECT_NULL) {
 814                 panic("missing pager for copy object");
 815
 816                 VM_PAGE_FREE(m);
 817                 return;
 818         }
 819
 820         /*
 821          * set the page for future call to vm_fault_list_request
 822          */
 823         pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 824         SET_PAGE_DIRTY(m, FALSE);
 825
 826         /*
 827          * keep the object from collapsing or terminating
 828          */
 829         vm_object_paging_begin(object);
 830         vm_object_unlock(object);
 831
 832         /*
 833          *      Write the data to its pager.
 834          *      Note that the data is passed by naming the new object,
 835          *      not a virtual address; the pager interface has been
 836          *      manipulated to use the "internal memory" data type.
 837          *      [The object reference from its allocation is donated
 838          *      to the eventual recipient.]
 839          */
 840         memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
 841
 842         vm_object_lock(object);
 843         vm_object_paging_end(object);
 844 }
 845
 846 #if     MACH_CLUSTER_STATS
 847 #define MAXCLUSTERPAGES 16
 848 struct {
 849         unsigned long pages_in_cluster;
 850         unsigned long pages_at_higher_offsets;
 851         unsigned long pages_at_lower_offsets;
 852 } cluster_stats[MAXCLUSTERPAGES];
 853 #endif  /* MACH_CLUSTER_STATS */
 854
 855
 856 /*
 857  * vm_pageout_cluster:
 858  *
 859  * Given a page, queue it to the appropriate I/O thread,
 860  * which will page it out and attempt to clean adjacent pages
 861  * in the same operation.
 862  *
 863  * The object and queues must be locked. We will take a
 864  * paging reference to prevent deallocation or collapse when we
 865  * release the object lock back at the call site.  The I/O thread
 866  * is responsible for consuming this reference
 867  *
 868  * The page must not be on any pageout queue.
 869  */
 870 int32_t vmct_active = 0;
 871 typedef enum vmct_state_t {
 872         VMCT_IDLE,
 873         VMCT_AWAKENED,
 874         VMCT_ACTIVE,
 875 } vmct_state_t;
 876 vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
 877
 878 void
 879 vm_pageout_cluster(vm_page_t m)
 880 {
 881         vm_object_t     object = VM_PAGE_OBJECT(m);
 882         struct          vm_pageout_queue *q;
 883
 884
 885         XPR(XPR_VM_PAGEOUT,
 886                 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
 887                 object, m->offset, m, 0, 0);
 888
 889         VM_PAGE_CHECK(m);
 890         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 891         vm_object_lock_assert_exclusive(object);
 892
 893         /*
 894          * Only a certain kind of page is appreciated here.
 895          */
 896         assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
 897         assert(!m->cleaning && !m->laundry);
 898         assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
 899
 900         /*
 901          * protect the object from collapse or termination
 902          */
 903         vm_object_activity_begin(object);
 904
 905         if (object->internal == TRUE) {
 906                 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
 907
 908                 m->busy = TRUE;
 909
 910                 q = &vm_pageout_queue_internal;
 911         } else
 912                 q = &vm_pageout_queue_external;
 913
 914         /*
 915          * pgo_laundry count is tied to the laundry bit
 916          */
 917         m->laundry = TRUE;
 918         q->pgo_laundry++;
 919
 920         m->vm_page_q_state = VM_PAGE_ON_PAGEOUT_Q;
 921         vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
 922
 923         if (q->pgo_idle == TRUE) {
 924                 q->pgo_idle = FALSE;
 925                 thread_wakeup((event_t) &q->pgo_pending);
 926         }
 927         VM_PAGE_CHECK(m);
 928 }
 929
 930
 931 unsigned long vm_pageout_throttle_up_count = 0;
 932
 933 /*
 934  * A page is back from laundry or we are stealing it back from
 935  * the laundering state.  See if there are some pages waiting to
 936  * go to laundry and if we can let some of them go now.
 937  *
 938  * Object and page queues must be locked.
 939  */
 940 void
 941 vm_pageout_throttle_up(
 942        vm_page_t       m)
 943 {
 944        struct vm_pageout_queue *q;
 945        vm_object_t      m_object;
 946
 947        m_object = VM_PAGE_OBJECT(m);
 948
 949        assert(m_object != VM_OBJECT_NULL);
 950        assert(m_object != kernel_object);
 951
 952        LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 953        vm_object_lock_assert_exclusive(m_object);
 954
 955        vm_pageout_throttle_up_count++;
 956
 957        if (m_object->internal == TRUE)
 958                q = &vm_pageout_queue_internal;
 959        else
 960                q = &vm_pageout_queue_external;
 961
 962        if (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
 963
 964                vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
 965                m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
 966
 967                VM_PAGE_ZERO_PAGEQ_ENTRY(m);
 968
 969                vm_object_activity_end(m_object);
 970        }
 971        if (m->laundry == TRUE) {
 972
 973                m->laundry = FALSE;
 974                q->pgo_laundry--;
 975
 976                if (q->pgo_throttled == TRUE) {
 977                        q->pgo_throttled = FALSE;
 978                        thread_wakeup((event_t) &q->pgo_laundry);
 979                }
 980                if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
 981                        q->pgo_draining = FALSE;
 982                        thread_wakeup((event_t) (&q->pgo_laundry+1));
 983                }
 984         }
 985 }
 986
 987
 988 static void
 989 vm_pageout_throttle_up_batch(
 990         struct vm_pageout_queue *q,
 991         int             batch_cnt)
 992 {
 993        LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 994
 995        vm_pageout_throttle_up_count += batch_cnt;
 996
 997        q->pgo_laundry -= batch_cnt;
 998
 999        if (q->pgo_throttled == TRUE) {
1000                q->pgo_throttled = FALSE;
1001                thread_wakeup((event_t) &q->pgo_laundry);
1002        }
1003        if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
1004                q->pgo_draining = FALSE;
1005                thread_wakeup((event_t) (&q->pgo_laundry+1));
1006        }
1007 }
1008
1009
1010
1011 /*
1012  * VM memory pressure monitoring.
1013  *
1014  * vm_pageout_scan() keeps track of the number of pages it considers and
1015  * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
1016  *
1017  * compute_memory_pressure() is called every second from compute_averages()
1018  * and moves "vm_pageout_stat_now" forward, to start accumulating the number
1019  * of recalimed pages in a new vm_pageout_stat[] bucket.
1020  *
1021  * mach_vm_pressure_monitor() collects past statistics about memory pressure.
1022  * The caller provides the number of seconds ("nsecs") worth of statistics
1023  * it wants, up to 30 seconds.
1024  * It computes the number of pages reclaimed in the past "nsecs" seconds and
1025  * also returns the number of pages the system still needs to reclaim at this
1026  * moment in time.
1027  */
1028 #define VM_PAGEOUT_STAT_SIZE    31
1029 struct vm_pageout_stat {
1030         unsigned int considered;
1031         unsigned int reclaimed_clean;
1032         unsigned int pages_compressed;
1033         unsigned int pages_grabbed_by_compressor;
1034         unsigned int cleaned_dirty_external;
1035         unsigned int throttled_internal_q;
1036         unsigned int throttled_external_q;
1037         unsigned int failed_compressions;
1038 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0,0,0,0,0,0,0}, };
1039
1040 unsigned int vm_pageout_stat_now = 0;
1041 unsigned int vm_memory_pressure = 0;
1042
1043 #define VM_PAGEOUT_STAT_BEFORE(i) \
1044         (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1045 #define VM_PAGEOUT_STAT_AFTER(i) \
1046         (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1047
1048 #if VM_PAGE_BUCKETS_CHECK
1049 int vm_page_buckets_check_interval = 10; /* in seconds */
1050 #endif /* VM_PAGE_BUCKETS_CHECK */
1051
1052 /*
1053  * Called from compute_averages().
1054  */
1055 void
1056 compute_memory_pressure(
1057         __unused void *arg)
1058 {
1059         unsigned int vm_pageout_next;
1060
1061 #if VM_PAGE_BUCKETS_CHECK
1062         /* check the consistency of VM page buckets at regular interval */
1063         static int counter = 0;
1064         if ((++counter % vm_page_buckets_check_interval) == 0) {
1065                 vm_page_buckets_check();
1066         }
1067 #endif /* VM_PAGE_BUCKETS_CHECK */
1068
1069         vm_memory_pressure =
1070                 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed_clean;
1071
1072         commpage_set_memory_pressure( vm_memory_pressure );
1073
1074         /* move "now" forward */
1075         vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
1076         vm_pageout_stats[vm_pageout_next].considered = 0;
1077         vm_pageout_stats[vm_pageout_next].reclaimed_clean = 0;
1078         vm_pageout_stats[vm_pageout_next].throttled_internal_q = 0;
1079         vm_pageout_stats[vm_pageout_next].throttled_external_q = 0;
1080         vm_pageout_stats[vm_pageout_next].cleaned_dirty_external = 0;
1081         vm_pageout_stats[vm_pageout_next].pages_compressed = 0;
1082         vm_pageout_stats[vm_pageout_next].pages_grabbed_by_compressor = 0;
1083         vm_pageout_stats[vm_pageout_next].failed_compressions = 0;
1084
1085         vm_pageout_stat_now = vm_pageout_next;
1086 }
1087
1088
1089 /*
1090  * IMPORTANT
1091  * mach_vm_ctl_page_free_wanted() is called indirectly, via
1092  * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1093  * it must be safe in the restricted stackshot context. Locks and/or
1094  * blocking are not allowable.
1095  */
1096 unsigned int
1097 mach_vm_ctl_page_free_wanted(void)
1098 {
1099         unsigned int page_free_target, page_free_count, page_free_wanted;
1100
1101         page_free_target = vm_page_free_target;
1102         page_free_count = vm_page_free_count;
1103         if (page_free_target > page_free_count) {
1104                 page_free_wanted = page_free_target - page_free_count;
1105         } else {
1106                 page_free_wanted = 0;
1107         }
1108
1109         return page_free_wanted;
1110 }
1111
1112
1113 /*
1114  * IMPORTANT:
1115  * mach_vm_pressure_monitor() is called when taking a stackshot, with
1116  * wait_for_pressure FALSE, so that code path must remain safe in the
1117  * restricted stackshot context. No blocking or locks are allowable.
1118  * on that code path.
1119  */
1120
1121 kern_return_t
1122 mach_vm_pressure_monitor(
1123         boolean_t       wait_for_pressure,
1124         unsigned int    nsecs_monitored,
1125         unsigned int    *pages_reclaimed_p,
1126         unsigned int    *pages_wanted_p)
1127 {
1128         wait_result_t   wr;
1129         unsigned int    vm_pageout_then, vm_pageout_now;
1130         unsigned int    pages_reclaimed;
1131
1132         /*
1133          * We don't take the vm_page_queue_lock here because we don't want
1134          * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1135          * thread when it's trying to reclaim memory.  We don't need fully
1136          * accurate monitoring anyway...
1137          */
1138
1139         if (wait_for_pressure) {
1140                 /* wait until there's memory pressure */
1141                 while (vm_page_free_count >= vm_page_free_target) {
1142                         wr = assert_wait((event_t) &vm_page_free_wanted,
1143                                          THREAD_INTERRUPTIBLE);
1144                         if (wr == THREAD_WAITING) {
1145                                 wr = thread_block(THREAD_CONTINUE_NULL);
1146                         }
1147                         if (wr == THREAD_INTERRUPTED) {
1148                                 return KERN_ABORTED;
1149                         }
1150                         if (wr == THREAD_AWAKENED) {
1151                                 /*
1152                                  * The memory pressure might have already
1153                                  * been relieved but let's not block again
1154                                  * and let's report that there was memory
1155                                  * pressure at some point.
1156                                  */
1157                                 break;
1158                         }
1159                 }
1160         }
1161
1162         /* provide the number of pages the system wants to reclaim */
1163         if (pages_wanted_p != NULL) {
1164                 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1165         }
1166
1167         if (pages_reclaimed_p == NULL) {
1168                 return KERN_SUCCESS;
1169         }
1170
1171         /* provide number of pages reclaimed in the last "nsecs_monitored" */
1172         vm_pageout_now = vm_pageout_stat_now;
1173         pages_reclaimed = 0;
1174         for (vm_pageout_then =
1175                      VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1176              vm_pageout_then != vm_pageout_now &&
1177                      nsecs_monitored-- != 0;
1178              vm_pageout_then =
1179                      VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1180                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed_clean;
1181         }
1182         *pages_reclaimed_p = pages_reclaimed;
1183
1184         return KERN_SUCCESS;
1185 }
1186
1187
1188
1189 #if DEVELOPMENT || DEBUG
1190
1191 static void
1192 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1193
1194 /*
1195  * condition variable used to make sure there is
1196  * only a single sweep going on at a time
1197  */
1198 boolean_t       vm_pageout_disconnect_all_pages_active = FALSE;
1199
1200
1201 void
1202 vm_pageout_disconnect_all_pages()
1203 {
1204         vm_page_lock_queues();
1205
1206         if (vm_pageout_disconnect_all_pages_active == TRUE) {
1207                 vm_page_unlock_queues();
1208                 return;
1209         }
1210         vm_pageout_disconnect_all_pages_active = TRUE;
1211         vm_page_unlock_queues();
1212
1213         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1214         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1215         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1216
1217         vm_pageout_disconnect_all_pages_active = FALSE;
1218 }
1219
1220
1221 void
1222 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1223 {
1224         vm_page_t       m;
1225         vm_object_t     t_object = NULL;
1226         vm_object_t     l_object = NULL;
1227         vm_object_t     m_object = NULL;
1228         int             delayed_unlock = 0;
1229         int             try_failed_count = 0;
1230         int             disconnected_count = 0;
1231         int             paused_count = 0;
1232         int             object_locked_count = 0;
1233
1234         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_START,
1235                                   q, qcount, 0, 0, 0);
1236
1237         vm_page_lock_queues();
1238
1239         while (qcount && !vm_page_queue_empty(q)) {
1240
1241                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1242
1243                 m = (vm_page_t) vm_page_queue_first(q);
1244                 m_object = VM_PAGE_OBJECT(m);
1245
1246                 /*
1247                  * check to see if we currently are working
1248                  * with the same object... if so, we've
1249                  * already got the lock
1250                  */
1251                 if (m_object != l_object) {
1252                         /*
1253                          * the object associated with candidate page is
1254                          * different from the one we were just working
1255                          * with... dump the lock if we still own it
1256                          */
1257                         if (l_object != NULL) {
1258                                 vm_object_unlock(l_object);
1259                                 l_object = NULL;
1260                         }
1261                         if (m_object != t_object)
1262                                 try_failed_count = 0;
1263
1264                         /*
1265                          * Try to lock object; since we've alread got the
1266                          * page queues lock, we can only 'try' for this one.
1267                          * if the 'try' fails, we need to do a mutex_pause
1268                          * to allow the owner of the object lock a chance to
1269                          * run...
1270                          */
1271                         if ( !vm_object_lock_try_scan(m_object)) {
1272
1273                                 if (try_failed_count > 20) {
1274                                         goto reenter_pg_on_q;
1275                                 }
1276                                 vm_page_unlock_queues();
1277                                 mutex_pause(try_failed_count++);
1278                                 vm_page_lock_queues();
1279                                 delayed_unlock = 0;
1280
1281                                 paused_count++;
1282
1283                                 t_object = m_object;
1284                                 continue;
1285                         }
1286                         object_locked_count++;
1287
1288                         l_object = m_object;
1289                 }
1290                 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->free_when_done) {
1291                         /*
1292                          * put it back on the head of its queue
1293                          */
1294                         goto reenter_pg_on_q;
1295                 }
1296                 if (m->pmapped == TRUE) {
1297
1298                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1299
1300                         disconnected_count++;
1301                 }
1302 reenter_pg_on_q:
1303                 vm_page_queue_remove(q, m, vm_page_t, pageq);
1304                 vm_page_queue_enter(q, m, vm_page_t, pageq);
1305
1306                 qcount--;
1307                 try_failed_count = 0;
1308
1309                 if (delayed_unlock++ > 128) {
1310
1311                         if (l_object != NULL) {
1312                                 vm_object_unlock(l_object);
1313                                 l_object = NULL;
1314                         }
1315                         lck_mtx_yield(&vm_page_queue_lock);
1316                         delayed_unlock = 0;
1317                 }
1318         }
1319         if (l_object != NULL) {
1320                 vm_object_unlock(l_object);
1321                 l_object = NULL;
1322         }
1323         vm_page_unlock_queues();
1324
1325         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_END,
1326                                   q, disconnected_count, object_locked_count, paused_count, 0);
1327 }
1328
1329 #endif
1330
1331
1332 static void
1333 vm_pageout_page_queue(vm_page_queue_head_t *, int);
1334
1335 /*
1336  * condition variable used to make sure there is
1337  * only a single sweep going on at a time
1338  */
1339 boolean_t       vm_pageout_anonymous_pages_active = FALSE;
1340
1341
1342 void
1343 vm_pageout_anonymous_pages()
1344 {
1345         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1346
1347                 vm_page_lock_queues();
1348
1349                 if (vm_pageout_anonymous_pages_active == TRUE) {
1350                         vm_page_unlock_queues();
1351                         return;
1352                 }
1353                 vm_pageout_anonymous_pages_active = TRUE;
1354                 vm_page_unlock_queues();
1355
1356                 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1357                 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1358                 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1359
1360                 if (VM_CONFIG_SWAP_IS_PRESENT)
1361                         vm_consider_swapping();
1362
1363                 vm_page_lock_queues();
1364                 vm_pageout_anonymous_pages_active = FALSE;
1365                 vm_page_unlock_queues();
1366         }
1367 }
1368
1369
1370 void
1371 vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
1372 {
1373         vm_page_t       m;
1374         vm_object_t     t_object = NULL;
1375         vm_object_t     l_object = NULL;
1376         vm_object_t     m_object = NULL;
1377         int             delayed_unlock = 0;
1378         int             try_failed_count = 0;
1379         int             refmod_state;
1380         int             pmap_options;
1381         struct          vm_pageout_queue *iq;
1382         ppnum_t         phys_page;
1383
1384
1385         iq = &vm_pageout_queue_internal;
1386
1387         vm_page_lock_queues();
1388
1389         while (qcount && !vm_page_queue_empty(q)) {
1390
1391                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1392
1393                 if (VM_PAGE_Q_THROTTLED(iq)) {
1394
1395                         if (l_object != NULL) {
1396                                 vm_object_unlock(l_object);
1397                                 l_object = NULL;
1398                         }
1399                         iq->pgo_draining = TRUE;
1400
1401                         assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1402                         vm_page_unlock_queues();
1403
1404                         thread_block(THREAD_CONTINUE_NULL);
1405
1406                         vm_page_lock_queues();
1407                         delayed_unlock = 0;
1408                         continue;
1409                 }
1410                 m = (vm_page_t) vm_page_queue_first(q);
1411                 m_object = VM_PAGE_OBJECT(m);
1412
1413                 /*
1414                  * check to see if we currently are working
1415                  * with the same object... if so, we've
1416                  * already got the lock
1417                  */
1418                 if (m_object != l_object) {
1419                         if ( !m_object->internal)
1420                                 goto reenter_pg_on_q;
1421
1422                         /*
1423                          * the object associated with candidate page is
1424                          * different from the one we were just working
1425                          * with... dump the lock if we still own it
1426                          */
1427                         if (l_object != NULL) {
1428                                 vm_object_unlock(l_object);
1429                                 l_object = NULL;
1430                         }
1431                         if (m_object != t_object)
1432                                 try_failed_count = 0;
1433
1434                         /*
1435                          * Try to lock object; since we've alread got the
1436                          * page queues lock, we can only 'try' for this one.
1437                          * if the 'try' fails, we need to do a mutex_pause
1438                          * to allow the owner of the object lock a chance to
1439                          * run...
1440                          */
1441                         if ( !vm_object_lock_try_scan(m_object)) {
1442
1443                                 if (try_failed_count > 20) {
1444                                         goto reenter_pg_on_q;
1445                                 }
1446                                 vm_page_unlock_queues();
1447                                 mutex_pause(try_failed_count++);
1448                                 vm_page_lock_queues();
1449                                 delayed_unlock = 0;
1450
1451                                 t_object = m_object;
1452                                 continue;
1453                         }
1454                         l_object = m_object;
1455                 }
1456                 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->free_when_done) {
1457                         /*
1458                          * page is not to be cleaned
1459                          * put it back on the head of its queue
1460                          */
1461                         goto reenter_pg_on_q;
1462                 }
1463                 phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1464
1465                 if (m->reference == FALSE && m->pmapped == TRUE) {
1466                         refmod_state = pmap_get_refmod(phys_page);
1467
1468                         if (refmod_state & VM_MEM_REFERENCED)
1469                                 m->reference = TRUE;
1470                         if (refmod_state & VM_MEM_MODIFIED) {
1471                                 SET_PAGE_DIRTY(m, FALSE);
1472                         }
1473                 }
1474                 if (m->reference == TRUE) {
1475                         m->reference = FALSE;
1476                         pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1477                         goto reenter_pg_on_q;
1478                 }
1479                 if (m->pmapped == TRUE) {
1480                         if (m->dirty || m->precious) {
1481                                 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1482                         } else {
1483                                 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1484                         }
1485                         refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1486                         if (refmod_state & VM_MEM_MODIFIED) {
1487                                 SET_PAGE_DIRTY(m, FALSE);
1488                         }
1489                 }
1490                 if ( !m->dirty && !m->precious) {
1491                         vm_page_unlock_queues();
1492                         VM_PAGE_FREE(m);
1493                         vm_page_lock_queues();
1494                         delayed_unlock = 0;
1495
1496                         goto next_pg;
1497                 }
1498                 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)  {
1499
1500                         if (!m_object->pager_initialized) {
1501
1502                                 vm_page_unlock_queues();
1503
1504                                 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1505
1506                                 if (!m_object->pager_initialized)
1507                                         vm_object_compressor_pager_create(m_object);
1508
1509                                 vm_page_lock_queues();
1510                                 delayed_unlock = 0;
1511                         }
1512                         if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1513                                 goto reenter_pg_on_q;
1514                         /*
1515                          * vm_object_compressor_pager_create will drop the object lock
1516                          * which means 'm' may no longer be valid to use
1517                          */
1518                         continue;
1519                 }
1520                 /*
1521                  * we've already factored out pages in the laundry which
1522                  * means this page can't be on the pageout queue so it's
1523                  * safe to do the vm_page_queues_remove
1524                  */
1525                 vm_page_queues_remove(m, TRUE);
1526
1527                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1528
1529                 vm_pageout_cluster(m);
1530
1531                 goto next_pg;
1532
1533 reenter_pg_on_q:
1534                 vm_page_queue_remove(q, m, vm_page_t, pageq);
1535                 vm_page_queue_enter(q, m, vm_page_t, pageq);
1536 next_pg:
1537                 qcount--;
1538                 try_failed_count = 0;
1539
1540                 if (delayed_unlock++ > 128) {
1541
1542                         if (l_object != NULL) {
1543                                 vm_object_unlock(l_object);
1544                                 l_object = NULL;
1545                         }
1546                         lck_mtx_yield(&vm_page_queue_lock);
1547                         delayed_unlock = 0;
1548                 }
1549         }
1550         if (l_object != NULL) {
1551                 vm_object_unlock(l_object);
1552                 l_object = NULL;
1553         }
1554         vm_page_unlock_queues();
1555 }
1556
1557
1558
1559 /*
1560  * function in BSD to apply I/O throttle to the pageout thread
1561  */
1562 extern void vm_pageout_io_throttle(void);
1563
1564 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj)                    \
1565         MACRO_BEGIN                                                     \
1566         /*                                                              \
1567          * If a "reusable" page somehow made it back into               \
1568          * the active queue, it's been re-used and is not               \
1569          * quite re-usable.                                             \
1570          * If the VM object was "all_reusable", consider it             \
1571          * as "all re-used" instead of converting it to                 \
1572          * "partially re-used", which could be expensive.               \
1573          */                                                             \
1574         assert(VM_PAGE_OBJECT((m)) == (obj));                           \
1575         if ((m)->reusable ||                                            \
1576             (obj)->all_reusable) {                                      \
1577                 vm_object_reuse_pages((obj),                            \
1578                                       (m)->offset,                      \
1579                                       (m)->offset + PAGE_SIZE_64,       \
1580                                       FALSE);                           \
1581         }                                                               \
1582         MACRO_END
1583
1584
1585 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT         64
1586 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX     1024
1587
1588 #define FCS_IDLE                0
1589 #define FCS_DELAYED             1
1590 #define FCS_DEADLOCK_DETECTED   2
1591
1592 struct flow_control {
1593         int             state;
1594         mach_timespec_t ts;
1595 };
1596
1597 #if CONFIG_BACKGROUND_QUEUE
1598 uint64_t vm_pageout_skipped_bq_internal = 0;
1599 uint64_t vm_pageout_considered_bq_internal = 0;
1600 uint64_t vm_pageout_considered_bq_external = 0;
1601 uint64_t vm_pageout_rejected_bq_internal = 0;
1602 uint64_t vm_pageout_rejected_bq_external = 0;
1603 #endif
1604
1605 uint32_t vm_pageout_no_victim = 0;
1606 uint32_t vm_pageout_considered_page = 0;
1607 uint32_t vm_page_filecache_min = 0;
1608
1609 #define ANONS_GRABBED_LIMIT     2
1610
1611 #if CONFIG_SECLUDED_MEMORY
1612 extern vm_page_t vm_page_grab_secluded(void);
1613 uint64_t vm_pageout_secluded_burst_count = 0;
1614 #endif /* CONFIG_SECLUDED_MEMORY */
1615
1616
1617 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1618 static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1619
1620 #define VM_PAGEOUT_PB_NO_ACTION                         0
1621 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1622 #define VM_PAGEOUT_PB_THREAD_YIELD                      2
1623
1624
1625 static void
1626 vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1627 {
1628         if (*local_freeq) {
1629                 vm_page_unlock_queues();
1630
1631                 VM_DEBUG_EVENT(
1632                         vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1633                         vm_page_free_count, *local_freed, 0, 1);
1634
1635                 vm_page_free_list(*local_freeq, TRUE);
1636
1637                 VM_DEBUG_EVENT(vm_pageout_freelist,VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1638                                vm_page_free_count, 0, 0, 1);
1639
1640                 *local_freeq = NULL;
1641                 *local_freed = 0;
1642
1643                 vm_page_lock_queues();
1644         } else {
1645                 lck_mtx_yield(&vm_page_queue_lock);
1646         }
1647         *delayed_unlock = 1;
1648 }
1649
1650
1651 static void
1652 vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1653                             vm_page_t *local_freeq, int *local_freed, int action)
1654 {
1655         vm_page_unlock_queues();
1656
1657         if (*object != NULL) {
1658                 vm_object_unlock(*object);
1659                 *object = NULL;
1660         }
1661         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1662
1663         if (*local_freeq) {
1664
1665                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1666                                vm_page_free_count, *local_freed, 0, 2);
1667
1668                 vm_page_free_list(*local_freeq, TRUE);
1669
1670                 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1671                                vm_page_free_count, 0, 0, 2);
1672
1673                 *local_freeq = NULL;
1674                 *local_freed = 0;
1675         }
1676         *delayed_unlock = 1;
1677
1678         switch (action) {
1679
1680         case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1681                 vm_consider_waking_compactor_swapper();
1682                 break;
1683         case VM_PAGEOUT_PB_THREAD_YIELD:
1684                 thread_yield_internal(1);
1685                 break;
1686         case VM_PAGEOUT_PB_NO_ACTION:
1687         default:
1688                 break;
1689         }
1690         vm_page_lock_queues();
1691 }
1692
1693
1694 int     last_vm_pageout_freed_from_inactive_clean = 0;
1695 int     last_vm_pageout_freed_from_cleaned = 0;
1696 int     last_vm_pageout_freed_from_speculative = 0;
1697 int     last_vm_pageout_freed_after_compression = 0;
1698 int     last_vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
1699 int     last_vm_pageout_inactive_force_reclaim = 0;
1700 int     last_vm_pageout_scan_inactive_throttled_external = 0;
1701 int     last_vm_pageout_scan_inactive_throttled_internal = 0;
1702 int     last_vm_pageout_reactivation_limit_exceeded = 0;
1703 int     last_vm_pageout_considered_page = 0;
1704 int     last_vm_compressor_pages_grabbed = 0;
1705 int     last_vm_compressor_failed = 0;
1706 int     last_vm_pageout_skipped_external = 0;
1707
1708
1709 void update_vm_info(void)
1710 {
1711         int     tmp1, tmp2, tmp3, tmp4;
1712
1713         if (!kdebug_enable)
1714                 return;
1715
1716         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE,
1717                               vm_page_active_count,
1718                               vm_page_speculative_count,
1719                               vm_page_inactive_count,
1720                               vm_page_anonymous_count,
1721                               0);
1722
1723         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE,
1724                               vm_page_free_count,
1725                               vm_page_wire_count,
1726                               VM_PAGE_COMPRESSOR_COUNT,
1727                               0, 0);
1728
1729         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE,
1730                               c_segment_pages_compressed,
1731                               vm_page_internal_count,
1732                               vm_page_external_count,
1733                               vm_page_xpmapped_external_count,
1734                               0);
1735
1736
1737         if ((vm_pageout_considered_page - last_vm_pageout_considered_page) == 0 &&
1738             (vm_pageout_enqueued_cleaned_from_inactive_dirty - last_vm_pageout_enqueued_cleaned_from_inactive_dirty == 0) &&
1739             (vm_pageout_freed_after_compression - last_vm_pageout_freed_after_compression == 0))
1740                 return;
1741
1742
1743         tmp1 = vm_pageout_considered_page;
1744         tmp2 = vm_pageout_freed_from_speculative;
1745         tmp3 = vm_pageout_freed_from_inactive_clean;
1746
1747         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE,
1748                               tmp1 - last_vm_pageout_considered_page,
1749                               tmp2 - last_vm_pageout_freed_from_speculative,
1750                               tmp3 - last_vm_pageout_freed_from_inactive_clean,
1751                               0, 0);
1752
1753         last_vm_pageout_considered_page = tmp1;
1754         last_vm_pageout_freed_from_speculative = tmp2;
1755         last_vm_pageout_freed_from_inactive_clean = tmp3;
1756
1757
1758         tmp1 = vm_pageout_scan_inactive_throttled_external;
1759         tmp2 = vm_pageout_enqueued_cleaned_from_inactive_dirty;
1760         tmp3 = vm_pageout_freed_from_cleaned;
1761
1762         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE,
1763                               tmp1 - last_vm_pageout_scan_inactive_throttled_external,
1764                               tmp2 - last_vm_pageout_enqueued_cleaned_from_inactive_dirty,
1765                               tmp3 - last_vm_pageout_freed_from_cleaned,
1766                               0, 0);
1767
1768         vm_pageout_stats[vm_pageout_stat_now].throttled_external_q += (tmp1 - last_vm_pageout_scan_inactive_throttled_external);
1769         vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external += (tmp2 - last_vm_pageout_enqueued_cleaned_from_inactive_dirty);
1770
1771         last_vm_pageout_scan_inactive_throttled_external = tmp1;
1772         last_vm_pageout_enqueued_cleaned_from_inactive_dirty = tmp2;
1773         last_vm_pageout_freed_from_cleaned = tmp3;
1774
1775
1776         tmp1 = vm_pageout_scan_inactive_throttled_internal;
1777         tmp2 = vm_pageout_freed_after_compression;
1778         tmp3 = vm_compressor_pages_grabbed;
1779         tmp4 = vm_pageout_skipped_external;
1780
1781         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE,
1782                               tmp1 - last_vm_pageout_scan_inactive_throttled_internal,
1783                               tmp2 - last_vm_pageout_freed_after_compression,
1784                               tmp3 - last_vm_compressor_pages_grabbed,
1785                               tmp4 - last_vm_pageout_skipped_external,
1786                               0);
1787
1788         vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q += (tmp1 - last_vm_pageout_scan_inactive_throttled_internal);
1789         vm_pageout_stats[vm_pageout_stat_now].pages_compressed += (tmp2 - last_vm_pageout_freed_after_compression);
1790         vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor += (tmp3 - last_vm_compressor_pages_grabbed);
1791
1792         last_vm_pageout_scan_inactive_throttled_internal = tmp1;
1793         last_vm_pageout_freed_after_compression = tmp2;
1794         last_vm_compressor_pages_grabbed = tmp3;
1795         last_vm_pageout_skipped_external = tmp4;
1796
1797
1798         if ((vm_pageout_reactivation_limit_exceeded - last_vm_pageout_reactivation_limit_exceeded) == 0 &&
1799             (vm_pageout_inactive_force_reclaim - last_vm_pageout_inactive_force_reclaim) == 0 &&
1800             (vm_compressor_failed - last_vm_compressor_failed) == 0)
1801                 return;
1802
1803         tmp1 = vm_pageout_reactivation_limit_exceeded;
1804         tmp2 = vm_pageout_inactive_force_reclaim;
1805         tmp3 = vm_compressor_failed;
1806
1807         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE,
1808                               tmp1 - last_vm_pageout_reactivation_limit_exceeded,
1809                               tmp2 - last_vm_pageout_inactive_force_reclaim,
1810                               tmp3 - last_vm_compressor_failed,
1811                               0, 0);
1812
1813         vm_pageout_stats[vm_pageout_stat_now].failed_compressions += (tmp3 - last_vm_compressor_failed);
1814
1815         last_vm_pageout_reactivation_limit_exceeded = tmp1;
1816         last_vm_pageout_inactive_force_reclaim = tmp2;
1817         last_vm_compressor_failed = tmp3;
1818 }
1819
1820
1821 /*
1822  *      vm_pageout_scan does the dirty work for the pageout daemon.
1823  *      It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1824  *      held and vm_page_free_wanted == 0.
1825  */
1826 void
1827 vm_pageout_scan(void)
1828 {
1829         unsigned int loop_count = 0;
1830         unsigned int inactive_burst_count = 0;
1831         unsigned int active_burst_count = 0;
1832         unsigned int reactivated_this_call;
1833         unsigned int reactivate_limit;
1834         vm_page_t   local_freeq = NULL;
1835         int         local_freed = 0;
1836         int         delayed_unlock;
1837         int         delayed_unlock_limit = 0;
1838         int         refmod_state = 0;
1839         int     vm_pageout_deadlock_target = 0;
1840         struct  vm_pageout_queue *iq;
1841         struct  vm_pageout_queue *eq;
1842         struct  vm_speculative_age_q *sq;
1843         struct  flow_control    flow_control = { 0, { 0, 0 } };
1844         boolean_t inactive_throttled = FALSE;
1845         boolean_t try_failed;
1846         mach_timespec_t ts;
1847         unsigned        int msecs = 0;
1848         vm_object_t     object = NULL;
1849         uint32_t        inactive_reclaim_run;
1850         boolean_t       exceeded_burst_throttle;
1851         boolean_t       grab_anonymous = FALSE;
1852         boolean_t       force_anonymous = FALSE;
1853         boolean_t       force_speculative_aging = FALSE;
1854         int             anons_grabbed = 0;
1855         int             page_prev_q_state = 0;
1856 #if CONFIG_BACKGROUND_QUEUE
1857         boolean_t       page_from_bg_q = FALSE;
1858 #endif
1859         int             cache_evict_throttle = 0;
1860         uint32_t        vm_pageout_inactive_external_forced_reactivate_limit = 0;
1861         int             force_purge = 0;
1862 #define DELAY_SPECULATIVE_AGE   1000
1863         int             delay_speculative_age = 0;
1864         vm_object_t     m_object = VM_OBJECT_NULL;
1865
1866 #if VM_PRESSURE_EVENTS
1867         vm_pressure_level_t pressure_level;
1868 #endif /* VM_PRESSURE_EVENTS */
1869
1870         VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1871                        vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1872                        vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1873
1874         flow_control.state = FCS_IDLE;
1875         iq = &vm_pageout_queue_internal;
1876         eq = &vm_pageout_queue_external;
1877         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1878
1879
1880         XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1881
1882         /* Ask the pmap layer to return any pages it no longer needs. */
1883         pmap_release_pages_fast();
1884
1885         vm_page_lock_queues();
1886         delayed_unlock = 1;
1887
1888         /*
1889          *      Calculate the max number of referenced pages on the inactive
1890          *      queue that we will reactivate.
1891          */
1892         reactivated_this_call = 0;
1893         reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1894                                                     vm_page_inactive_count);
1895         inactive_reclaim_run = 0;
1896
1897         vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1898
1899         /*
1900          *      We want to gradually dribble pages from the active queue
1901          *      to the inactive queue.  If we let the inactive queue get
1902          *      very small, and then suddenly dump many pages into it,
1903          *      those pages won't get a sufficient chance to be referenced
1904          *      before we start taking them from the inactive queue.
1905          *
1906          *      We must limit the rate at which we send pages to the pagers
1907          *      so that we don't tie up too many pages in the I/O queues.
1908          *      We implement a throttling mechanism using the laundry count
1909          *      to limit the number of pages outstanding to the default
1910          *      and external pagers.  We can bypass the throttles and look
1911          *      for clean pages if the pageout queues don't drain in a timely
1912          *      fashion since this may indicate that the pageout paths are
1913          *      stalled waiting for memory, which only we can provide.
1914          */
1915
1916
1917 Restart:
1918
1919         assert(object == NULL);
1920         assert(delayed_unlock != 0);
1921
1922         /*
1923          *      Recalculate vm_page_inactivate_target.
1924          */
1925         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1926                                                           vm_page_inactive_count +
1927                                                           vm_page_speculative_count);
1928
1929         vm_page_anonymous_min = vm_page_inactive_target / 20;
1930
1931
1932         /*
1933          * don't want to wake the pageout_scan thread up everytime we fall below
1934          * the targets... set a low water mark at 0.25% below the target
1935          */
1936         vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1937
1938         if (vm_page_speculative_percentage > 50)
1939                 vm_page_speculative_percentage = 50;
1940         else if (vm_page_speculative_percentage <= 0)
1941                 vm_page_speculative_percentage = 1;
1942
1943         vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1944                                                                 vm_page_inactive_count);
1945
1946         try_failed = FALSE;
1947
1948         for (;;) {
1949                 vm_page_t m;
1950
1951                 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1952
1953                 if (vm_upl_wait_for_pages < 0)
1954                         vm_upl_wait_for_pages = 0;
1955
1956                 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1957
1958                 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1959                         delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1960
1961 #if CONFIG_SECLUDED_MEMORY
1962                 /*
1963                  * Deal with secluded_q overflow.
1964                  */
1965                 if (vm_page_secluded_count > vm_page_secluded_target) {
1966                         unsigned int secluded_overflow;
1967                         vm_page_t secluded_page;
1968
1969                         if (object != NULL) {
1970                                 vm_object_unlock(object);
1971                                 object = NULL;
1972                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1973                         }
1974                         /*
1975                          * SECLUDED_AGING_BEFORE_ACTIVE:
1976                          * Excess secluded pages go to the active queue and
1977                          * will later go to the inactive queue.
1978                          */
1979                         active_burst_count = MIN(vm_pageout_burst_active_throttle,
1980                                                  vm_page_secluded_count_inuse);
1981                         secluded_overflow = (vm_page_secluded_count -
1982                                              vm_page_secluded_target);
1983                         while (secluded_overflow-- > 0 &&
1984                                vm_page_secluded_count > vm_page_secluded_target) {
1985                                 assert((vm_page_secluded_count_free +
1986                                         vm_page_secluded_count_inuse) ==
1987                                        vm_page_secluded_count);
1988                                 secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1989                                 assert(secluded_page->vm_page_q_state ==
1990                                        VM_PAGE_ON_SECLUDED_Q);
1991                                 vm_page_queues_remove(secluded_page, FALSE);
1992                                 assert(!secluded_page->fictitious);
1993                                 assert(!VM_PAGE_WIRED(secluded_page));
1994                                 if (secluded_page->vm_page_object == 0) {
1995                                         /* transfer to free queue */
1996                                         assert(secluded_page->busy);
1997                                         secluded_page->snext = local_freeq;
1998                                         local_freeq = secluded_page;
1999                                         local_freed++;
2000                                 } else {
2001                                         /* transfer to head of active queue */
2002                                         vm_page_enqueue_active(secluded_page, FALSE);
2003                                         if (active_burst_count-- == 0) {
2004                                                 vm_pageout_secluded_burst_count++;
2005                                                 break;
2006                                         }
2007                                 }
2008                                 secluded_page = VM_PAGE_NULL;
2009
2010                                 if (delayed_unlock++ > delayed_unlock_limit) {
2011                                         vm_pageout_delayed_unlock(&delayed_unlock, &local_freed, &local_freeq);
2012                                 }
2013                         }
2014                 }
2015 #endif /* CONFIG_SECLUDED_MEMORY */
2016
2017                 assert(delayed_unlock);
2018
2019                 /*
2020                  * Move pages from active to inactive if we're below the target
2021                  */
2022                 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
2023                         goto done_moving_active_pages;
2024
2025                 if (object != NULL) {
2026                         vm_object_unlock(object);
2027                         object = NULL;
2028                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2029                 }
2030                 /*
2031                  * Don't sweep through active queue more than the throttle
2032                  * which should be kept relatively low
2033                  */
2034                 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
2035
2036                 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
2037                                vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
2038
2039                 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
2040                                vm_pageout_speculative_clean, vm_pageout_inactive_clean,
2041                                vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
2042                 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
2043
2044
2045                 while (!vm_page_queue_empty(&vm_page_queue_active) && active_burst_count--) {
2046
2047                         vm_pageout_active++;
2048
2049                         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
2050
2051                         assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
2052                         assert(!m->laundry);
2053                         assert(VM_PAGE_OBJECT(m) != kernel_object);
2054                         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2055
2056                         DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2057
2058                         /*
2059                          * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
2060                          *
2061                          * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
2062                          * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
2063                          * new reference happens. If no futher references happen on the page after that remote TLB flushes
2064                          * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
2065                          * by pageout_scan, which is just fine since the last reference would have happened quite far
2066                          * in the past (TLB caches don't hang around for very long), and of course could just as easily
2067                          * have happened before we moved the page
2068                          */
2069                         pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
2070
2071                         /*
2072                          * The page might be absent or busy,
2073                          * but vm_page_deactivate can handle that.
2074                          * FALSE indicates that we don't want a H/W clear reference
2075                          */
2076                         vm_page_deactivate_internal(m, FALSE);
2077
2078                         if (delayed_unlock++ > delayed_unlock_limit) {
2079                                 vm_pageout_delayed_unlock(&delayed_unlock, &local_freed, &local_freeq);
2080                         }
2081                 }
2082
2083                 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
2084                                vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
2085                 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
2086
2087                 /**********************************************************************
2088                  * above this point we're playing with the active and secluded queues
2089                  * below this point we're playing with the throttling mechanisms
2090                  * and the inactive queue
2091                  **********************************************************************/
2092
2093 done_moving_active_pages:
2094
2095                 if (vm_page_free_count + local_freed >= vm_page_free_target)
2096                 {
2097                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2098                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2099                         /*
2100                          * make sure the pageout I/O threads are running
2101                          * throttled in case there are still requests
2102                          * in the laundry... since we have met our targets
2103                          * we don't need the laundry to be cleaned in a timely
2104                          * fashion... so let's avoid interfering with foreground
2105                          * activity
2106                          */
2107                         vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2108
2109                         /*
2110                          * recalculate vm_page_inactivate_target
2111                          */
2112                         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
2113                                                                           vm_page_inactive_count +
2114                                                                           vm_page_speculative_count);
2115 #ifndef CONFIG_EMBEDDED
2116                         if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
2117                             !vm_page_queue_empty(&vm_page_queue_active)) {
2118                                 /*
2119                                  * inactive target still not met... keep going
2120                                  * until we get the queues balanced...
2121                                  */
2122                                 continue;
2123                         }
2124 #endif
2125                         lck_mtx_lock(&vm_page_queue_free_lock);
2126
2127                         if ((vm_page_free_count >= vm_page_free_target) &&
2128                             (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2129                                 /*
2130                                  * done - we have met our target *and*
2131                                  * there is no one waiting for a page.
2132                                  */
2133 return_from_scan:
2134                                 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2135
2136                                 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
2137                                                vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
2138                                 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
2139                                                vm_pageout_speculative_clean, vm_pageout_inactive_clean,
2140                                                vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
2141
2142                                 return;
2143                         }
2144                         lck_mtx_unlock(&vm_page_queue_free_lock);
2145                 }
2146
2147                 /*
2148                  * Before anything, we check if we have any ripe volatile
2149                  * objects around. If so, try to purge the first object.
2150                  * If the purge fails, fall through to reclaim a page instead.
2151                  * If the purge succeeds, go back to the top and reevalute
2152                  * the new memory situation.
2153                  */
2154
2155                 assert (available_for_purge>=0);
2156                 force_purge = 0; /* no force-purging */
2157
2158 #if VM_PRESSURE_EVENTS
2159                 pressure_level = memorystatus_vm_pressure_level;
2160
2161                 if (pressure_level > kVMPressureNormal) {
2162
2163                         if (pressure_level >= kVMPressureCritical) {
2164                                 force_purge = memorystatus_purge_on_critical;
2165                         } else if (pressure_level >= kVMPressureUrgent) {
2166                                 force_purge = memorystatus_purge_on_urgent;
2167                         } else if (pressure_level >= kVMPressureWarning) {
2168                                 force_purge = memorystatus_purge_on_warning;
2169                         }
2170                 }
2171 #endif /* VM_PRESSURE_EVENTS */
2172
2173                 if (available_for_purge || force_purge) {
2174
2175                         if (object != NULL) {
2176                                 vm_object_unlock(object);
2177                                 object = NULL;
2178                         }
2179
2180                         memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2181
2182                         VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2183                         if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2184                                 vm_pageout_purged_objects++;
2185                                 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2186                                 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2187                                 continue;
2188                         }
2189                         VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2190                         memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2191                 }
2192
2193                 if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2194                         /*
2195                          * try to pull pages from the aging bins...
2196                          * see vm_page.h for an explanation of how
2197                          * this mechanism works
2198                          */
2199                         struct vm_speculative_age_q     *aq;
2200                         boolean_t       can_steal = FALSE;
2201                         int num_scanned_queues;
2202
2203                         aq = &vm_page_queue_speculative[speculative_steal_index];
2204
2205                         num_scanned_queues = 0;
2206                         while (vm_page_queue_empty(&aq->age_q) &&
2207                                num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2208
2209                                 speculative_steal_index++;
2210
2211                                 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2212                                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2213
2214                                 aq = &vm_page_queue_speculative[speculative_steal_index];
2215                         }
2216
2217                         if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
2218                                 /*
2219                                  * XXX We've scanned all the speculative
2220                                  * queues but still haven't found one
2221                                  * that is not empty, even though
2222                                  * vm_page_speculative_count is not 0.
2223                                  *
2224                                  * report the anomaly...
2225                                  */
2226                                 printf("vm_pageout_scan: "
2227                                        "all speculative queues empty "
2228                                        "but count=%d.  Re-adjusting.\n",
2229                                        vm_page_speculative_count);
2230                                 if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
2231                                         vm_page_speculative_count_drift_max = vm_page_speculative_count;
2232                                 vm_page_speculative_count_drifts++;
2233 #if DEVELOPMENT || DEBUG
2234                                 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2235 #endif /* DEVELOPMENT || DEBUG */
2236                                 /* readjust... */
2237                                 vm_page_speculative_count = 0;
2238                                 /* ... and continue */
2239                                 continue;
2240                         }
2241
2242                         if (vm_page_speculative_count > vm_page_speculative_target || force_speculative_aging == TRUE)
2243                                 can_steal = TRUE;
2244                         else {
2245                                 if (!delay_speculative_age) {
2246                                         mach_timespec_t ts_fully_aged;
2247
2248                                         ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
2249                                         ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
2250                                                 * 1000 * NSEC_PER_USEC;
2251
2252                                         ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2253
2254                                         clock_sec_t sec;
2255                                         clock_nsec_t nsec;
2256                                         clock_get_system_nanotime(&sec, &nsec);
2257                                         ts.tv_sec = (unsigned int) sec;
2258                                         ts.tv_nsec = nsec;
2259
2260                                         if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
2261                                                 can_steal = TRUE;
2262                                         else
2263                                                 delay_speculative_age++;
2264                                 } else {
2265                                         delay_speculative_age++;
2266                                         if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2267                                                 delay_speculative_age = 0;
2268                                 }
2269                         }
2270                         if (can_steal == TRUE)
2271                                 vm_page_speculate_ageit(aq);
2272                 }
2273                 force_speculative_aging = FALSE;
2274
2275 #if CONFIG_BACKGROUND_QUEUE
2276                 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0 &&
2277                     ((vm_page_background_mode == VM_PAGE_BG_DISABLED) || (vm_page_background_count <= vm_page_background_target)))
2278 #else
2279                 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0)
2280 #endif
2281                 {
2282                         int     pages_evicted;
2283
2284                         if (object != NULL) {
2285                                 vm_object_unlock(object);
2286                                 object = NULL;
2287                         }
2288                         pages_evicted = vm_object_cache_evict(100, 10);
2289
2290                         if (pages_evicted) {
2291
2292                                 vm_pageout_cache_evicted += pages_evicted;
2293
2294                                 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2295                                                vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
2296                                 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2297
2298                                 /*
2299                                  * we just freed up to 100 pages,
2300                                  * so go back to the top of the main loop
2301                                  * and re-evaulate the memory situation
2302                                  */
2303                                 continue;
2304                         } else
2305                                 cache_evict_throttle = 1000;
2306                 }
2307                 if  (cache_evict_throttle)
2308                         cache_evict_throttle--;
2309
2310 #if CONFIG_JETSAM
2311                 /*
2312                  * don't let the filecache_min fall below 15% of available memory
2313                  * on systems with an active compressor that isn't nearing its
2314                  * limits w/r to accepting new data
2315                  *
2316                  * on systems w/o the compressor/swapper, the filecache is always
2317                  * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2318                  * since most (if not all) of the anonymous pages are in the
2319                  * throttled queue (which isn't counted as available) which
2320                  * effectively disables this filter
2321                  */
2322                 if (vm_compressor_low_on_space())
2323                         vm_page_filecache_min = 0;
2324                 else
2325                         vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 7);
2326 #else
2327                 if (vm_compressor_out_of_space())
2328                         vm_page_filecache_min = 0;
2329                 else {
2330                         /*
2331                          * don't let the filecache_min fall below 33% of available memory...
2332                          */
2333                         vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
2334                 }
2335 #endif
2336                 if (vm_page_free_count < (vm_page_free_reserved / 4))
2337                         vm_page_filecache_min = 0;
2338
2339                 exceeded_burst_throttle = FALSE;
2340                 /*
2341                  * Sometimes we have to pause:
2342                  *      1) No inactive pages - nothing to do.
2343                  *      2) Loop control - no acceptable pages found on the inactive queue
2344                  *         within the last vm_pageout_burst_inactive_throttle iterations
2345                  *      3) Flow control - default pageout queue is full
2346                  */
2347                 if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2348                     vm_page_queue_empty(&vm_page_queue_anonymous) &&
2349                     vm_page_queue_empty(&sq->age_q)) {
2350                         vm_pageout_scan_empty_throttle++;
2351                         msecs = vm_pageout_empty_wait;
2352                         goto vm_pageout_scan_delay;
2353
2354                 } else if (inactive_burst_count >=
2355                            MIN(vm_pageout_burst_inactive_throttle,
2356                                (vm_page_inactive_count +
2357                                 vm_page_speculative_count))) {
2358                         vm_pageout_scan_burst_throttle++;
2359                         msecs = vm_pageout_burst_wait;
2360
2361                         exceeded_burst_throttle = TRUE;
2362                         goto vm_pageout_scan_delay;
2363
2364                 } else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
2365                            VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
2366                         vm_pageout_scan_swap_throttle++;
2367                         msecs = vm_pageout_swap_wait;
2368                         goto vm_pageout_scan_delay;
2369
2370                 } else if (VM_PAGE_Q_THROTTLED(iq) &&
2371                                   VM_DYNAMIC_PAGING_ENABLED()) {
2372                         clock_sec_t sec;
2373                         clock_nsec_t nsec;
2374
2375                         switch (flow_control.state) {
2376
2377                         case FCS_IDLE:
2378                                 if ((vm_page_free_count + local_freed) < vm_page_free_target) {
2379
2380                                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2381                                                                     VM_PAGEOUT_PB_THREAD_YIELD);
2382                                         if (!VM_PAGE_Q_THROTTLED(iq)) {
2383                                                 vm_pageout_scan_yield_unthrottled++;
2384                                                 continue;
2385                                         }
2386                                         if (vm_page_pageable_external_count > vm_page_filecache_min &&
2387                                             !vm_page_queue_empty(&vm_page_queue_inactive)) {
2388                                                 anons_grabbed = ANONS_GRABBED_LIMIT;
2389                                                 vm_pageout_scan_throttle_deferred++;
2390                                                 goto consider_inactive;
2391                                         }
2392                                         if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
2393                                                 continue;
2394                                 }
2395 reset_deadlock_timer:
2396                                 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
2397                                 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2398                                 clock_get_system_nanotime(&sec, &nsec);
2399                                 flow_control.ts.tv_sec = (unsigned int) sec;
2400                                 flow_control.ts.tv_nsec = nsec;
2401                                 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2402
2403                                 flow_control.state = FCS_DELAYED;
2404                                 msecs = vm_pageout_deadlock_wait;
2405
2406                                 break;
2407
2408                         case FCS_DELAYED:
2409                                 clock_get_system_nanotime(&sec, &nsec);
2410                                 ts.tv_sec = (unsigned int) sec;
2411                                 ts.tv_nsec = nsec;
2412
2413                                 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2414                                         /*
2415                                          * the pageout thread for the default pager is potentially
2416                                          * deadlocked since the
2417                                          * default pager queue has been throttled for more than the
2418                                          * allowable time... we need to move some clean pages or dirty
2419                                          * pages belonging to the external pagers if they aren't throttled
2420                                          * vm_page_free_wanted represents the number of threads currently
2421                                          * blocked waiting for pages... we'll move one page for each of
2422                                          * these plus a fixed amount to break the logjam... once we're done
2423                                          * moving this number of pages, we'll re-enter the FSC_DELAYED state
2424                                          * with a new timeout target since we have no way of knowing
2425                                          * whether we've broken the deadlock except through observation
2426                                          * of the queue associated with the default pager... we need to
2427                                          * stop moving pages and allow the system to run to see what
2428                                          * state it settles into.
2429                                          */
2430                                         vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
2431                                         vm_pageout_scan_deadlock_detected++;
2432                                         flow_control.state = FCS_DEADLOCK_DETECTED;
2433                                         thread_wakeup((event_t) &vm_pageout_garbage_collect);
2434                                         goto consider_inactive;
2435                                 }
2436                                 /*
2437                                  * just resniff instead of trying
2438                                  * to compute a new delay time... we're going to be
2439                                  * awakened immediately upon a laundry completion,
2440                                  * so we won't wait any longer than necessary
2441                                  */
2442                                 msecs = vm_pageout_idle_wait;
2443                                 break;
2444
2445                         case FCS_DEADLOCK_DETECTED:
2446                                 if (vm_pageout_deadlock_target)
2447                                         goto consider_inactive;
2448                                 goto reset_deadlock_timer;
2449
2450                         }
2451 vm_pageout_scan_delay:
2452                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2453                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2454
2455                         if (flow_control.state == FCS_DELAYED &&
2456                             !VM_PAGE_Q_THROTTLED(iq)) {
2457                                 flow_control.state = FCS_IDLE;
2458                                 goto consider_inactive;
2459                         }
2460
2461                         if (vm_page_free_count >= vm_page_free_target) {
2462                                 /*
2463                                  * we're here because
2464                                  *  1) someone else freed up some pages while we had
2465                                  *     the queues unlocked above
2466                                  * and we've hit one of the 3 conditions that
2467                                  * cause us to pause the pageout scan thread
2468                                  *
2469                                  * since we already have enough free pages,
2470                                  * let's avoid stalling and return normally
2471                                  *
2472                                  * before we return, make sure the pageout I/O threads
2473                                  * are running throttled in case there are still requests
2474                                  * in the laundry... since we have enough free pages
2475                                  * we don't need the laundry to be cleaned in a timely
2476                                  * fashion... so let's avoid interfering with foreground
2477                                  * activity
2478                                  *
2479                                  * we don't want to hold vm_page_queue_free_lock when
2480                                  * calling vm_pageout_adjust_eq_iothrottle (since it
2481                                  * may cause other locks to be taken), we do the intitial
2482                                  * check outside of the lock.  Once we take the lock,
2483                                  * we recheck the condition since it may have changed.
2484                                  * if it has, no problem, we will make the threads
2485                                  * non-throttled before actually blocking
2486                                  */
2487                                 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2488                         }
2489                         lck_mtx_lock(&vm_page_queue_free_lock);
2490
2491                         if (vm_page_free_count >= vm_page_free_target &&
2492                             (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2493                                 goto return_from_scan;
2494                         }
2495                         lck_mtx_unlock(&vm_page_queue_free_lock);
2496
2497                         if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2498                                 /*
2499                                  * we're most likely about to block due to one of
2500                                  * the 3 conditions that cause vm_pageout_scan to
2501                                  * not be able to make forward progress w/r
2502                                  * to providing new pages to the free queue,
2503                                  * so unthrottle the I/O threads in case we
2504                                  * have laundry to be cleaned... it needs
2505                                  * to be completed ASAP.
2506                                  *
2507                                  * even if we don't block, we want the io threads
2508                                  * running unthrottled since the sum of free +
2509                                  * clean pages is still under our free target
2510                                  */
2511                                 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
2512                         }
2513                         if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2514                                 /*
2515                                  * if we get here we're below our free target and
2516                                  * we're stalling due to a full laundry queue or
2517                                  * we don't have any inactive pages other then
2518                                  * those in the clean queue...
2519                                  * however, we have pages on the clean queue that
2520                                  * can be moved to the free queue, so let's not
2521                                  * stall the pageout scan
2522                                  */
2523                                 flow_control.state = FCS_IDLE;
2524                                 goto consider_inactive;
2525                         }
2526                         VM_CHECK_MEMORYSTATUS;
2527
2528                         if (flow_control.state != FCS_IDLE)
2529                                 vm_pageout_scan_throttle++;
2530                         iq->pgo_throttled = TRUE;
2531
2532                         assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2533                         counter(c_vm_pageout_scan_block++);
2534
2535                         vm_page_unlock_queues();
2536
2537                         assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2538
2539                         VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2540                                        iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2541                         memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2542
2543                         thread_block(THREAD_CONTINUE_NULL);
2544
2545                         VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2546                                        iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2547                         memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2548
2549                         vm_page_lock_queues();
2550
2551                         iq->pgo_throttled = FALSE;
2552
2553                         if (loop_count >= vm_page_inactive_count)
2554                                 loop_count = 0;
2555                         inactive_burst_count = 0;
2556
2557                         goto Restart;
2558                         /*NOTREACHED*/
2559                 }
2560
2561
2562                 flow_control.state = FCS_IDLE;
2563 consider_inactive:
2564                 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2565                                                                             vm_pageout_inactive_external_forced_reactivate_limit);
2566                 loop_count++;
2567                 inactive_burst_count++;
2568                 vm_pageout_inactive++;
2569
2570
2571                 /*
2572                  * Choose a victim.
2573                  */
2574                 while (1) {
2575                         uint32_t        inactive_external_count;
2576
2577 #if CONFIG_BACKGROUND_QUEUE
2578                         page_from_bg_q = FALSE;
2579 #endif /* CONFIG_BACKGROUND_QUEUE */
2580
2581                         m = NULL;
2582                         m_object = VM_OBJECT_NULL;
2583
2584                         if (VM_DYNAMIC_PAGING_ENABLED()) {
2585                                 assert(vm_page_throttled_count == 0);
2586                                 assert(vm_page_queue_empty(&vm_page_queue_throttled));
2587                         }
2588
2589                         /*
2590                          * Try for a clean-queue inactive page.
2591                          * These are pages that vm_pageout_scan tried to steal earlier, but
2592                          * were dirty and had to be cleaned.  Pick them up now that they are clean.
2593                          */
2594                         if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2595                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2596
2597                                 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2598
2599                                 break;
2600                         }
2601
2602                         /*
2603                          * The next most eligible pages are ones we paged in speculatively,
2604                          * but which have not yet been touched and have been aged out.
2605                          */
2606                         if (!vm_page_queue_empty(&sq->age_q)) {
2607                                 m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2608
2609                                 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2610
2611                                 if (!m->dirty || force_anonymous == FALSE)
2612                                         break;
2613                                 else
2614                                         m = NULL;
2615                         }
2616
2617 #if CONFIG_BACKGROUND_QUEUE
2618                         if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2619                                 vm_object_t     bg_m_object = NULL;
2620
2621                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2622
2623                                 bg_m_object = VM_PAGE_OBJECT(m);
2624
2625                                 if (!VM_PAGE_PAGEABLE(m)) {
2626                                         /*
2627                                          * This page is on the background queue
2628                                          * but not on a pageable queue.  This is
2629                                          * likely a transient state and whoever
2630                                          * took it out of its pageable queue
2631                                          * will likely put it back on a pageable
2632                                          * queue soon but we can't deal with it
2633                                          * at this point, so let's ignore this
2634                                          * page.
2635                                          */
2636                                 } else if (force_anonymous == FALSE || bg_m_object->internal) {
2637
2638                                         if (bg_m_object->internal &&
2639                                             ((vm_compressor_out_of_space() == TRUE) ||
2640                                              (vm_page_free_count < (vm_page_free_reserved / 4)))) {
2641
2642                                                 vm_pageout_skipped_bq_internal++;
2643                                         } else {
2644                                                 page_from_bg_q = TRUE;
2645
2646                                                 if (bg_m_object->internal)
2647                                                         vm_pageout_considered_bq_internal++;
2648                                                 else
2649                                                         vm_pageout_considered_bq_external++;
2650
2651                                                 break;
2652                                         }
2653                                 }
2654                         }
2655 #endif
2656
2657                         grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2658                         inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2659
2660                         if ((vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) ||
2661                             ((inactive_external_count < vm_page_anonymous_count) && (inactive_external_count < (vm_page_pageable_external_count / 3)))) {
2662                                 grab_anonymous = TRUE;
2663                                 anons_grabbed = 0;
2664
2665                                 vm_pageout_skipped_external++;
2666                                 goto want_anonymous;
2667                         }
2668 #if CONFIG_JETSAM
2669                         /* If the file-backed pool has accumulated
2670                          * significantly more pages than the jetsam
2671                          * threshold, prefer to reclaim those
2672                          * inline to minimise compute overhead of reclaiming
2673                          * anonymous pages.
2674                          * This calculation does not account for the CPU local
2675                          * external page queues, as those are expected to be
2676                          * much smaller relative to the global pools.
2677                          */
2678                         if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2679                                 if (vm_page_pageable_external_count >
2680                                     vm_page_filecache_min) {
2681                                         if ((vm_page_pageable_external_count *
2682                                                 vm_pageout_memorystatus_fb_factor_dr) >
2683                                             (memorystatus_available_pages_critical *
2684                                             vm_pageout_memorystatus_fb_factor_nr)) {
2685                                                 grab_anonymous = FALSE;
2686 #if DEVELOPMENT || DEBUG
2687                                                 vm_grab_anon_overrides++;
2688 #endif
2689                                         }
2690                                 }
2691 #if DEVELOPMENT || DEBUG
2692                                 if (grab_anonymous) {
2693                                         vm_grab_anon_nops++;
2694                                 }
2695 #endif
2696                         }
2697 #endif /* CONFIG_JETSAM */
2698
2699 want_anonymous:
2700                         if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2701
2702                                 if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2703                                         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2704
2705                                         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2706                                         anons_grabbed = 0;
2707
2708                                         if (vm_page_pageable_external_count < vm_page_filecache_min) {
2709                                                 if ((++reactivated_this_call % 100))
2710                                                         goto must_activate_page;
2711                                                 /*
2712                                                  * steal 1% of the file backed pages even if
2713                                                  * we are under the limit that has been set
2714                                                  * for a healthy filecache
2715                                                  */
2716                                         }
2717                                         break;
2718                                 }
2719                         }
2720                         if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2721                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2722
2723                                 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2724                                 anons_grabbed++;
2725
2726                                 break;
2727                         }
2728
2729                         /*
2730                          * if we've gotten here, we have no victim page.
2731                          * check to see if we've not finished balancing the queues
2732                          * or we have a page on the aged speculative queue that we
2733                          * skipped due to force_anonymous == TRUE.. or we have
2734                          * speculative  pages that we can prematurely age... if
2735                          * one of these cases we'll keep going, else panic
2736                          */
2737                         force_anonymous = FALSE;
2738                         vm_pageout_no_victim++;
2739
2740                         if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
2741                                 goto done_with_inactivepage;
2742
2743                         if (!vm_page_queue_empty(&sq->age_q))
2744                                 goto done_with_inactivepage;
2745
2746                         if (vm_page_speculative_count) {
2747                                 force_speculative_aging = TRUE;
2748                                 goto done_with_inactivepage;
2749                         }
2750                         panic("vm_pageout: no victim");
2751
2752                         /* NOTREACHED */
2753                 }
2754                 assert(VM_PAGE_PAGEABLE(m));
2755                 m_object = VM_PAGE_OBJECT(m);
2756                 force_anonymous = FALSE;
2757
2758                 page_prev_q_state = m->vm_page_q_state;
2759                 /*
2760                  * we just found this page on one of our queues...
2761                  * it can't also be on the pageout queue, so safe
2762                  * to call vm_page_queues_remove
2763                  */
2764                 vm_page_queues_remove(m, TRUE);
2765
2766                 assert(!m->laundry);
2767                 assert(!m->private);
2768                 assert(!m->fictitious);
2769                 assert(m_object != kernel_object);
2770                 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2771
2772                 vm_pageout_stats[vm_pageout_stat_now].considered++;
2773                 vm_pageout_considered_page++;
2774
2775                 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2776
2777                 /*
2778                  * check to see if we currently are working
2779                  * with the same object... if so, we've
2780                  * already got the lock
2781                  */
2782                 if (m_object != object) {
2783                         /*
2784                          * the object associated with candidate page is
2785                          * different from the one we were just working
2786                          * with... dump the lock if we still own it
2787                          */
2788                         if (object != NULL) {
2789                                 vm_object_unlock(object);
2790                                 object = NULL;
2791                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2792                         }
2793                         /*
2794                          * Try to lock object; since we've alread got the
2795                          * page queues lock, we can only 'try' for this one.
2796                          * if the 'try' fails, we need to do a mutex_pause
2797                          * to allow the owner of the object lock a chance to
2798                          * run... otherwise, we're likely to trip over this
2799                          * object in the same state as we work our way through
2800                          * the queue... clumps of pages associated with the same
2801                          * object are fairly typical on the inactive and active queues
2802                          */
2803                         if (!vm_object_lock_try_scan(m_object)) {
2804                                 vm_page_t m_want = NULL;
2805
2806                                 vm_pageout_inactive_nolock++;
2807
2808                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2809                                         vm_pageout_cleaned_nolock++;
2810
2811                                 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2812                                 m->reference = FALSE;
2813
2814 #if !CONFIG_EMBEDDED
2815                                 /*
2816                                  * m->object must be stable since we hold the page queues lock...
2817                                  * we can update the scan_collisions field sans the object lock
2818                                  * since it is a separate field and this is the only spot that does
2819                                  * a read-modify-write operation and it is never executed concurrently...
2820                                  * we can asynchronously set this field to 0 when creating a UPL, so it
2821                                  * is possible for the value to be a bit non-determistic, but that's ok
2822                                  * since it's only used as a hint
2823                                  */
2824
2825                                 /*
2826                                  * This is not used on EMBEDDED because having this variable set *could* lead
2827                                  * us to self-cannibalize pages from m_object to fill a UPL for a pagein.
2828                                  * And, there's a high probability that the object that vm_pageout_scan
2829                                  * wants and collides on is a very popular object e.g. the shared cache on EMBEDDED.
2830                                  * The older pages that we cannibalize from the shared cache could be really
2831                                  * important text pages e.g. the system call stubs.
2832                                  */
2833                                 m_object->scan_collisions = 1;
2834 #endif /* !CONFIG_EMBEDDED */
2835
2836                                 if ( !vm_page_queue_empty(&sq->age_q) )
2837                                         m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2838                                 else if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2839                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2840                                 else if ( !vm_page_queue_empty(&vm_page_queue_inactive) &&
2841                                           (anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)))
2842                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2843                                 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2844                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2845
2846                                 /*
2847                                  * this is the next object we're going to be interested in
2848                                  * try to make sure its available after the mutex_yield
2849                                  * returns control
2850                                  */
2851                                 if (m_want)
2852                                         vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2853
2854                                 /*
2855                                  * force us to dump any collected free pages
2856                                  * and to pause before moving on
2857                                  */
2858                                 try_failed = TRUE;
2859
2860                                 goto requeue_page;
2861                         }
2862                         object = m_object;
2863                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2864
2865                         try_failed = FALSE;
2866                 }
2867                 assert(m_object == object);
2868                 assert(VM_PAGE_OBJECT(m) == m_object);
2869
2870                 if (m->busy) {
2871                         /*
2872                          *      Somebody is already playing with this page.
2873                          *      Put it back on the appropriate queue
2874                          *
2875                          */
2876                         vm_pageout_inactive_busy++;
2877
2878                         if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2879                                 vm_pageout_cleaned_busy++;
2880 requeue_page:
2881                         if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2882                                 vm_page_enqueue_inactive(m, FALSE);
2883                         else
2884                                 vm_page_activate(m);
2885 #if CONFIG_BACKGROUND_QUEUE
2886                         if (page_from_bg_q == TRUE) {
2887                                 if (m_object->internal)
2888                                         vm_pageout_rejected_bq_internal++;
2889                                 else
2890                                         vm_pageout_rejected_bq_external++;
2891                         }
2892 #endif
2893                         goto done_with_inactivepage;
2894                 }
2895
2896
2897                 /*
2898                  *      If it's absent, in error or the object is no longer alive,
2899                  *      we can reclaim the page... in the no longer alive case,
2900                  *      there are 2 states the page can be in that preclude us
2901                  *      from reclaiming it - busy or cleaning - that we've already
2902                  *      dealt with
2903                  */
2904                 if (m->absent || m->error || !object->alive) {
2905
2906                         if (m->absent)
2907                                 vm_pageout_inactive_absent++;
2908                         else if (!object->alive)
2909                                 vm_pageout_inactive_notalive++;
2910                         else
2911                                 vm_pageout_inactive_error++;
2912 reclaim_page:
2913                         if (vm_pageout_deadlock_target) {
2914                                 vm_pageout_scan_inactive_throttle_success++;
2915                                 vm_pageout_deadlock_target--;
2916                         }
2917
2918                         DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2919
2920                         if (object->internal) {
2921                                 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2922                         } else {
2923                                 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2924                         }
2925                         assert(!m->cleaning);
2926                         assert(!m->laundry);
2927
2928                         m->busy = TRUE;
2929
2930                         /*
2931                          * remove page from object here since we're already
2932                          * behind the object lock... defer the rest of the work
2933                          * we'd normally do in vm_page_free_prepare_object
2934                          * until 'vm_page_free_list' is called
2935                          */
2936                         if (m->tabled)
2937                                 vm_page_remove(m, TRUE);
2938
2939                         assert(m->pageq.next == 0 && m->pageq.prev == 0);
2940                         m->snext = local_freeq;
2941                         local_freeq = m;
2942                         local_freed++;
2943
2944                         if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2945                                 vm_pageout_freed_from_speculative++;
2946                         else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2947                                 vm_pageout_freed_from_cleaned++;
2948                         else
2949                                 vm_pageout_freed_from_inactive_clean++;
2950
2951                         vm_pageout_stats[vm_pageout_stat_now].reclaimed_clean++;
2952
2953                         inactive_burst_count = 0;
2954                         goto done_with_inactivepage;
2955                 }
2956                 /*
2957                  * If the object is empty, the page must be reclaimed even
2958                  * if dirty or used.
2959                  * If the page belongs to a volatile object, we stick it back
2960                  * on.
2961                  */
2962                 if (object->copy == VM_OBJECT_NULL) {
2963                         if (object->purgable == VM_PURGABLE_EMPTY) {
2964                                 if (m->pmapped == TRUE) {
2965                                         /* unmap the page */
2966                                         refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
2967                                         if (refmod_state & VM_MEM_MODIFIED) {
2968                                                 SET_PAGE_DIRTY(m, FALSE);
2969                                         }
2970                                 }
2971                                 if (m->dirty || m->precious) {
2972                                         /* we saved the cost of cleaning this page ! */
2973                                         vm_page_purged_count++;
2974                                 }
2975                                 goto reclaim_page;
2976                         }
2977
2978                         if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
2979                                 /*
2980                                  * With the VM compressor, the cost of
2981                                  * reclaiming a page is much lower (no I/O),
2982                                  * so if we find a "volatile" page, it's better
2983                                  * to let it get compressed rather than letting
2984                                  * it occupy a full page until it gets purged.
2985                                  * So no need to check for "volatile" here.
2986                                  */
2987                         } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2988                                 /*
2989                                  * Avoid cleaning a "volatile" page which might
2990                                  * be purged soon.
2991                                  */
2992
2993                                 /* if it's wired, we can't put it on our queue */
2994                                 assert(!VM_PAGE_WIRED(m));
2995
2996                                 /* just stick it back on! */
2997                                 reactivated_this_call++;
2998
2999                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3000                                         vm_pageout_cleaned_volatile_reactivated++;
3001
3002                                 goto reactivate_page;
3003                         }
3004                 }
3005                 /*
3006                  *      If it's being used, reactivate.
3007                  *      (Fictitious pages are either busy or absent.)
3008                  *      First, update the reference and dirty bits
3009                  *      to make sure the page is unreferenced.
3010                  */
3011                 refmod_state = -1;
3012
3013                 if (m->reference == FALSE && m->pmapped == TRUE) {
3014                         refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3015
3016                         if (refmod_state & VM_MEM_REFERENCED)
3017                                 m->reference = TRUE;
3018                         if (refmod_state & VM_MEM_MODIFIED) {
3019                                 SET_PAGE_DIRTY(m, FALSE);
3020                         }
3021                 }
3022
3023                 /*
3024                  *   if (m->cleaning && !m->free_when_done)
3025                  *      If already cleaning this page in place and it hasn't
3026                  *      been recently referenced, just pull off the queue.
3027                  *      We can leave the page mapped, and upl_commit_range
3028                  *      will put it on the clean queue.
3029                  *
3030                  *   if (m->free_when_done && !m->cleaning)
3031                  *      an msync INVALIDATE is in progress...
3032                  *      this page has been marked for destruction
3033                  *      after it has been cleaned,
3034                  *      but not yet gathered into a UPL
3035                  *      where 'cleaning' will be set...
3036                  *      just leave it off the paging queues
3037                  *
3038                  *   if (m->free_when_done && m->clenaing)
3039                  *      an msync INVALIDATE is in progress
3040                  *      and the UPL has already gathered this page...
3041                  *      just leave it off the paging queues
3042                  */
3043
3044                 /*
3045                  * page with m->free_when_done and still on the queues means that an
3046                  * MS_INVALIDATE is in progress on this page... leave it alone
3047                  */
3048                 if (m->free_when_done) {
3049                         goto done_with_inactivepage;
3050                 }
3051
3052                 /* if cleaning, reactivate if referenced.  otherwise, just pull off queue */
3053                 if (m->cleaning) {
3054                         if (m->reference == TRUE) {
3055                                 reactivated_this_call++;
3056                                 goto reactivate_page;
3057                         } else {
3058                                 goto done_with_inactivepage;
3059                         }
3060                 }
3061
3062                 if (m->reference || m->dirty) {
3063                         /* deal with a rogue "reusable" page */
3064                         VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
3065                 }
3066
3067                 if (!m->no_cache &&
3068 #if CONFIG_BACKGROUND_QUEUE
3069                     page_from_bg_q == FALSE &&
3070 #endif
3071                     (m->reference ||
3072                      (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
3073                         /*
3074                          * The page we pulled off the inactive list has
3075                          * been referenced.  It is possible for other
3076                          * processors to be touching pages faster than we
3077                          * can clear the referenced bit and traverse the
3078                          * inactive queue, so we limit the number of
3079                          * reactivations.
3080                          */
3081                         if (++reactivated_this_call >= reactivate_limit) {
3082                                 vm_pageout_reactivation_limit_exceeded++;
3083                         } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
3084                                 vm_pageout_inactive_force_reclaim++;
3085                         } else {
3086                                 uint32_t isinuse;
3087
3088                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3089                                         vm_pageout_cleaned_reference_reactivated++;
3090 reactivate_page:
3091                                 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
3092                                      vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
3093                                         /*
3094                                          * no explict mappings of this object exist
3095                                          * and it's not open via the filesystem
3096                                          */
3097                                         vm_page_deactivate(m);
3098                                         vm_pageout_inactive_deactivated++;
3099                                 } else {
3100 must_activate_page:
3101                                         /*
3102                                          * The page was/is being used, so put back on active list.
3103                                          */
3104                                         vm_page_activate(m);
3105                                         VM_STAT_INCR(reactivations);
3106                                         inactive_burst_count = 0;
3107                                 }
3108 #if CONFIG_BACKGROUND_QUEUE
3109                                 if (page_from_bg_q == TRUE) {
3110                                         if (m_object->internal)
3111                                                 vm_pageout_rejected_bq_internal++;
3112                                         else
3113                                                 vm_pageout_rejected_bq_external++;
3114                                 }
3115 #endif
3116                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3117                                         vm_pageout_cleaned_reactivated++;
3118                                 vm_pageout_inactive_used++;
3119
3120                                 goto done_with_inactivepage;
3121                         }
3122                         /*
3123                          * Make sure we call pmap_get_refmod() if it
3124                          * wasn't already called just above, to update
3125                          * the dirty bit.
3126                          */
3127                         if ((refmod_state == -1) && !m->dirty && m->pmapped) {
3128                                 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3129                                 if (refmod_state & VM_MEM_MODIFIED) {
3130                                         SET_PAGE_DIRTY(m, FALSE);
3131                                 }
3132                         }
3133                 }
3134
3135                 XPR(XPR_VM_PAGEOUT,
3136                 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
3137                 object, m->offset, m, 0,0);
3138
3139                 /*
3140                  * we've got a candidate page to steal...
3141                  *
3142                  * m->dirty is up to date courtesy of the
3143                  * preceding check for m->reference... if
3144                  * we get here, then m->reference had to be
3145                  * FALSE (or possibly "reactivate_limit" was
3146                  * exceeded), but in either case we called
3147                  * pmap_get_refmod() and updated both
3148                  * m->reference and m->dirty
3149                  *
3150                  * if it's dirty or precious we need to
3151                  * see if the target queue is throtttled
3152                  * it if is, we need to skip over it by moving it back
3153                  * to the end of the inactive queue
3154                  */
3155
3156                 inactive_throttled = FALSE;
3157
3158                 if (m->dirty || m->precious) {
3159                         if (object->internal) {
3160                                 if (VM_PAGE_Q_THROTTLED(iq))
3161                                         inactive_throttled = TRUE;
3162                         } else if (VM_PAGE_Q_THROTTLED(eq)) {
3163                                 inactive_throttled = TRUE;
3164                         }
3165                 }
3166 throttle_inactive:
3167                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3168                     object->internal && m->dirty &&
3169                     (object->purgable == VM_PURGABLE_DENY ||
3170                      object->purgable == VM_PURGABLE_NONVOLATILE ||
3171                      object->purgable == VM_PURGABLE_VOLATILE)) {
3172                         vm_page_check_pageable_safe(m);
3173                         assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3174                         vm_page_queue_enter(&vm_page_queue_throttled, m,
3175                                             vm_page_t, pageq);
3176                         m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
3177                         vm_page_throttled_count++;
3178
3179                         vm_pageout_scan_reclaimed_throttled++;
3180
3181                         inactive_burst_count = 0;
3182                         goto done_with_inactivepage;
3183                 }
3184                 if (inactive_throttled == TRUE) {
3185
3186                         if (object->internal == FALSE) {
3187                                 /*
3188                                  * we need to break up the following potential deadlock case...
3189                                  *  a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3190                                  *  b) The thread doing the writing is waiting for pages while holding the truncate lock
3191                                  *  c) Most of the pages in the inactive queue belong to this file.
3192                                  *
3193                                  * we are potentially in this deadlock because...
3194                                  *  a) the external pageout queue is throttled
3195                                  *  b) we're done with the active queue and moved on to the inactive queue
3196                                  *  c) we've got a dirty external page
3197                                  *
3198                                  * since we don't know the reason for the external pageout queue being throttled we
3199                                  * must suspect that we are deadlocked, so move the current page onto the active queue
3200                                  * in an effort to cause a page from the active queue to 'age' to the inactive queue
3201                                  *
3202                                  * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3203                                  * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3204                                  * pool the next time we select a victim page... if we can make enough new free pages,
3205                                  * the deadlock will break, the external pageout queue will empty and it will no longer
3206                                  * be throttled
3207                                  *
3208                                  * if we have jetsam configured, keep a count of the pages reactivated this way so
3209                                  * that we can try to find clean pages in the active/inactive queues before
3210                                  * deciding to jetsam a process
3211                                  */
3212                                 vm_pageout_scan_inactive_throttled_external++;
3213
3214                                 vm_page_check_pageable_safe(m);
3215                                 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3216                                 vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3217                                 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
3218                                 vm_page_active_count++;
3219                                 vm_page_pageable_external_count++;
3220
3221                                 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
3222
3223 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3224                                 vm_pageout_inactive_external_forced_reactivate_limit--;
3225
3226                                 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
3227                                         vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3228                                         /*
3229                                          * Possible deadlock scenario so request jetsam action
3230                                          */
3231                                         assert(object);
3232                                         vm_object_unlock(object);
3233                                         object = VM_OBJECT_NULL;
3234                                         vm_page_unlock_queues();
3235
3236                                         VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3237                                                vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3238
3239                                         /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
3240                                         if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
3241                                                 vm_pageout_inactive_external_forced_jetsam_count++;
3242                                         }
3243
3244                                         VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
3245                                                         vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3246
3247                                         vm_page_lock_queues();
3248                                         delayed_unlock = 1;
3249                                 }
3250 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3251                                 force_anonymous = TRUE;
3252 #endif
3253                                 inactive_burst_count = 0;
3254                                 goto done_with_inactivepage;
3255                         } else {
3256                                 vm_pageout_scan_inactive_throttled_internal++;
3257                                 goto must_activate_page;
3258                         }
3259                 }
3260
3261                 /*
3262                  * we've got a page that we can steal...
3263                  * eliminate all mappings and make sure
3264                  * we have the up-to-date modified state
3265                  *
3266                  * if we need to do a pmap_disconnect then we
3267                  * need to re-evaluate m->dirty since the pmap_disconnect
3268                  * provides the true state atomically... the
3269                  * page was still mapped up to the pmap_disconnect
3270                  * and may have been dirtied at the last microsecond
3271                  *
3272                  * Note that if 'pmapped' is FALSE then the page is not
3273                  * and has not been in any map, so there is no point calling
3274                  * pmap_disconnect().  m->dirty could have been set in anticipation
3275                  * of likely usage of the page.
3276                  */
3277                 if (m->pmapped == TRUE) {
3278                         int pmap_options;
3279
3280                         /*
3281                          * Don't count this page as going into the compressor
3282                          * if any of these are true:
3283                          * 1) compressed pager isn't enabled
3284                          * 2) Freezer enabled device with compressed pager
3285                          *    backend (exclusive use) i.e. most of the VM system
3286                          *    (including vm_pageout_scan) has no knowledge of
3287                          *    the compressor
3288                          * 3) This page belongs to a file and hence will not be
3289                          *    sent into the compressor
3290                          */
3291                         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3292                             object->internal == FALSE) {
3293                                 pmap_options = 0;
3294                         } else if (m->dirty || m->precious) {
3295                                 /*
3296                                  * VM knows that this page is dirty (or
3297                                  * precious) and needs to be compressed
3298                                  * rather than freed.
3299                                  * Tell the pmap layer to count this page
3300                                  * as "compressed".
3301                                  */
3302                                 pmap_options = PMAP_OPTIONS_COMPRESSOR;
3303                         } else {
3304                                 /*
3305                                  * VM does not know if the page needs to
3306                                  * be preserved but the pmap layer might tell
3307                                  * us if any mapping has "modified" it.
3308                                  * Let's the pmap layer to count this page
3309                                  * as compressed if and only if it has been
3310                                  * modified.
3311                                  */
3312                                 pmap_options =
3313                                         PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3314                         }
3315                         refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3316                                                                pmap_options,
3317                                                                NULL);
3318                         if (refmod_state & VM_MEM_MODIFIED) {
3319                                 SET_PAGE_DIRTY(m, FALSE);
3320                         }
3321                 }
3322                 /*
3323                  * reset our count of pages that have been reclaimed
3324                  * since the last page was 'stolen'
3325                  */
3326                 inactive_reclaim_run = 0;
3327
3328                 /*
3329                  *      If it's clean and not precious, we can free the page.
3330                  */
3331                 if (!m->dirty && !m->precious) {
3332
3333                         if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3334                                 vm_pageout_speculative_clean++;
3335                         else {
3336                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
3337                                         vm_pageout_inactive_anonymous++;
3338                                 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3339                                         vm_pageout_cleaned_reclaimed++;
3340
3341                                 vm_pageout_inactive_clean++;
3342                         }
3343                         /*
3344                          * OK, at this point we have found a page we are going to free.
3345                          */
3346 #if CONFIG_PHANTOM_CACHE
3347                         if (!object->internal)
3348                                 vm_phantom_cache_add_ghost(m);
3349 #endif
3350                         goto reclaim_page;
3351                 }
3352
3353                 /*
3354                  * The page may have been dirtied since the last check
3355                  * for a throttled target queue (which may have been skipped
3356                  * if the page was clean then).  With the dirty page
3357                  * disconnected here, we can make one final check.
3358                  */
3359                 if (object->internal) {
3360                         if (VM_PAGE_Q_THROTTLED(iq))
3361                                 inactive_throttled = TRUE;
3362                 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3363                         inactive_throttled = TRUE;
3364                 }
3365
3366                 if (inactive_throttled == TRUE)
3367                         goto throttle_inactive;
3368
3369 #if VM_PRESSURE_EVENTS
3370 #if CONFIG_JETSAM
3371
3372                 /*
3373                  * If Jetsam is enabled, then the sending
3374                  * of memory pressure notifications is handled
3375                  * from the same thread that takes care of high-water
3376                  * and other jetsams i.e. the memorystatus_thread.
3377                  */
3378
3379 #else /* CONFIG_JETSAM */
3380
3381                 vm_pressure_response();
3382
3383 #endif /* CONFIG_JETSAM */
3384 #endif /* VM_PRESSURE_EVENTS */
3385
3386                 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3387                         vm_pageout_speculative_dirty++;
3388                 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
3389                         vm_pageout_inactive_anonymous++;
3390
3391                 if (object->internal)
3392                         vm_pageout_inactive_dirty_internal++;
3393                 else
3394                         vm_pageout_inactive_dirty_external++;
3395
3396                 /*
3397                  * do NOT set the pageout bit!
3398                  * sure, we might need free pages, but this page is going to take time to become free
3399                  * anyway, so we may as well put it on the clean queue first and take it from there later
3400                  * if necessary.  that way, we'll ensure we don't free up too much. -mj
3401                  */
3402                 vm_pageout_cluster(m);
3403
3404 done_with_inactivepage:
3405
3406                 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) {
3407
3408                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3409                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3410                         if (try_failed == TRUE)
3411                                 lck_mtx_yield(&vm_page_queue_lock);
3412                 }
3413
3414                 /*
3415                  * back to top of pageout scan loop
3416                  */
3417         }
3418 }
3419
3420
3421 int vm_page_free_count_init;
3422
3423 void
3424 vm_page_free_reserve(
3425         int pages)
3426 {
3427         int             free_after_reserve;
3428
3429         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3430
3431                 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3432                         vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3433                 else
3434                         vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3435
3436         } else {
3437                 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3438                         vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3439                 else
3440                         vm_page_free_reserved += pages;
3441         }
3442         free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
3443
3444         vm_page_free_min = vm_page_free_reserved +
3445                 VM_PAGE_FREE_MIN(free_after_reserve);
3446
3447         if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3448                 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3449
3450         vm_page_free_target = vm_page_free_reserved +
3451                 VM_PAGE_FREE_TARGET(free_after_reserve);
3452
3453         if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3454                 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3455
3456         if (vm_page_free_target < vm_page_free_min + 5)
3457                 vm_page_free_target = vm_page_free_min + 5;
3458
3459         vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3460 }
3461
3462 /*
3463  *      vm_pageout is the high level pageout daemon.
3464  */
3465
3466 void
3467 vm_pageout_continue(void)
3468 {
3469         DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3470         vm_pageout_scan_event_counter++;
3471
3472 #if !CONFIG_EMBEDDED
3473         lck_mtx_lock(&vm_page_queue_free_lock);
3474         vm_pageout_running = TRUE;
3475         lck_mtx_unlock(&vm_page_queue_free_lock);
3476 #endif /* CONFIG_EMBEDDED */
3477
3478         vm_pageout_scan();
3479         /*
3480          * we hold both the vm_page_queue_free_lock
3481          * and the vm_page_queues_lock at this point
3482          */
3483         assert(vm_page_free_wanted == 0);
3484         assert(vm_page_free_wanted_privileged == 0);
3485         assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3486
3487 #if !CONFIG_EMBEDDED
3488         vm_pageout_running = FALSE;
3489         if (vm_pageout_waiter) {
3490                 vm_pageout_waiter = FALSE;
3491                 thread_wakeup((event_t)&vm_pageout_waiter);
3492         }
3493 #endif /* !CONFIG_EMBEDDED */
3494
3495         lck_mtx_unlock(&vm_page_queue_free_lock);
3496         vm_page_unlock_queues();
3497
3498         counter(c_vm_pageout_block++);
3499         thread_block((thread_continue_t)vm_pageout_continue);
3500         /*NOTREACHED*/
3501 }
3502
3503 #if !CONFIG_EMBEDDED
3504 kern_return_t
3505 vm_pageout_wait(uint64_t deadline)
3506 {
3507         kern_return_t kr;
3508
3509         lck_mtx_lock(&vm_page_queue_free_lock);
3510         for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3511                 vm_pageout_waiter = TRUE;
3512                 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3513                                 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3514                                 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3515                         kr = KERN_OPERATION_TIMED_OUT;
3516                 }
3517         }
3518         lck_mtx_unlock(&vm_page_queue_free_lock);
3519
3520         return (kr);
3521 }
3522 #endif /* !CONFIG_EMBEDDED */
3523
3524
3525 static void
3526 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3527 {
3528         vm_page_t       m = NULL;
3529         vm_object_t     object;
3530         vm_object_offset_t offset;
3531         memory_object_t pager;
3532
3533         /* On systems without a compressor, the external IO thread clears its
3534          * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3535          * creation)
3536          */
3537         if (vm_pageout_internal_iothread != THREAD_NULL)
3538                 current_thread()->options &= ~TH_OPT_VMPRIV;
3539
3540         vm_page_lockspin_queues();
3541
3542         while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3543
3544                    q->pgo_busy = TRUE;
3545                    vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3546
3547                    assert(m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q);
3548                    VM_PAGE_CHECK(m);
3549                    /*
3550                     * grab a snapshot of the object and offset this
3551                     * page is tabled in so that we can relookup this
3552                     * page after we've taken the object lock - these
3553                     * fields are stable while we hold the page queues lock
3554                     * but as soon as we drop it, there is nothing to keep
3555                     * this page in this object... we hold an activity_in_progress
3556                     * on this object which will keep it from terminating
3557                     */
3558                    object = VM_PAGE_OBJECT(m);
3559                    offset = m->offset;
3560
3561                    if (object->object_slid) {
3562                            panic("slid page %p not allowed on this path\n", m);
3563                    }
3564                    m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3565                    VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3566
3567                    vm_page_unlock_queues();
3568
3569                    vm_object_lock(object);
3570
3571                    m = vm_page_lookup(object, offset);
3572
3573                    if (m == NULL ||
3574                        m->busy || m->cleaning || !m->laundry || (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
3575                            /*
3576                             * it's either the same page that someone else has
3577                             * started cleaning (or it's finished cleaning or
3578                             * been put back on the pageout queue), or
3579                             * the page has been freed or we have found a
3580                             * new page at this offset... in all of these cases
3581                             * we merely need to release the activity_in_progress
3582                             * we took when we put the page on the pageout queue
3583                             */
3584                            vm_object_activity_end(object);
3585                            vm_object_unlock(object);
3586
3587                            vm_page_lockspin_queues();
3588                            continue;
3589                    }
3590                    pager = object->pager;
3591
3592                    if (pager == MEMORY_OBJECT_NULL) {
3593                            /*
3594                             * This pager has been destroyed by either
3595                             * memory_object_destroy or vm_object_destroy, and
3596                             * so there is nowhere for the page to go.
3597                             */
3598                            if (m->free_when_done) {
3599                                    /*
3600                                     * Just free the page... VM_PAGE_FREE takes
3601                                     * care of cleaning up all the state...
3602                                     * including doing the vm_pageout_throttle_up
3603                                     */
3604                                    VM_PAGE_FREE(m);
3605                            } else {
3606                                    vm_page_lockspin_queues();
3607
3608                                    vm_pageout_throttle_up(m);
3609                                    vm_page_activate(m);
3610
3611                                    vm_page_unlock_queues();
3612
3613                                    /*
3614                                     *   And we are done with it.
3615                                     */
3616                            }
3617                            vm_object_activity_end(object);
3618                            vm_object_unlock(object);
3619
3620                            vm_page_lockspin_queues();
3621                            continue;
3622                    }
3623 #if 0
3624                    /*
3625                     * we don't hold the page queue lock
3626                     * so this check isn't safe to make
3627                     */
3628                    VM_PAGE_CHECK(m);
3629 #endif
3630                    /*
3631                     * give back the activity_in_progress reference we
3632                     * took when we queued up this page and replace it
3633                     * it with a paging_in_progress reference that will
3634                     * also hold the paging offset from changing and
3635                     * prevent the object from terminating
3636                     */
3637                    vm_object_activity_end(object);
3638                    vm_object_paging_begin(object);
3639                    vm_object_unlock(object);
3640
3641                    /*
3642                     * Send the data to the pager.
3643                     * any pageout clustering happens there
3644                     */
3645                    memory_object_data_return(pager,
3646                                              m->offset + object->paging_offset,
3647                                              PAGE_SIZE,
3648                                              NULL,
3649                                              NULL,
3650                                              FALSE,
3651                                              FALSE,
3652                                              0);
3653
3654                    vm_object_lock(object);
3655                    vm_object_paging_end(object);
3656                    vm_object_unlock(object);
3657
3658                    vm_pageout_io_throttle();
3659
3660                    vm_page_lockspin_queues();
3661         }
3662         q->pgo_busy = FALSE;
3663         q->pgo_idle = TRUE;
3664
3665         assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3666         vm_page_unlock_queues();
3667
3668         thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3669         /*NOTREACHED*/
3670 }
3671
3672
3673 #define         MAX_FREE_BATCH          32
3674 uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by
3675                                      * this thread.
3676                                      */
3677
3678
3679 #if DEVELOPMENT || DEBUG
3680 uint64_t compressor_epoch_start, compressor_epoch_stop, compressor_threads_runtime;
3681 #endif
3682
3683 void
3684 vm_pageout_iothread_internal_continue(struct cq *);
3685 void
3686 vm_pageout_iothread_internal_continue(struct cq *cq)
3687 {
3688         struct vm_pageout_queue *q;
3689         vm_page_t       m = NULL;
3690         boolean_t       pgo_draining;
3691         vm_page_t   local_q;
3692         int         local_cnt;
3693         vm_page_t   local_freeq = NULL;
3694         int         local_freed = 0;
3695         int         local_batch_size;
3696         int     ncomps = 0;
3697 #if DEVELOPMENT || DEBUG
3698         boolean_t marked_active = FALSE;
3699 #endif
3700         KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3701
3702         q = cq->q;
3703         local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 2);
3704
3705 #if RECORD_THE_COMPRESSED_DATA
3706         if (q->pgo_laundry)
3707                 c_compressed_record_init();
3708 #endif
3709         while (TRUE) {
3710                 int     pages_left_on_q = 0;
3711
3712                 local_cnt = 0;
3713                 local_q = NULL;
3714
3715                 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3716
3717                 vm_page_lock_queues();
3718 #if DEVELOPMENT || DEBUG
3719                 if (marked_active == FALSE) {
3720                         vmct_active++;
3721                         vmct_state[cq->id] = VMCT_ACTIVE;
3722                         marked_active = TRUE;
3723                         if (vmct_active == 1) {
3724                                 compressor_epoch_start = mach_absolute_time();
3725                         }
3726                 }
3727 #endif
3728                 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3729
3730                 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3731
3732                 while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt <  local_batch_size) {
3733
3734                         vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3735                         assert(m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q);
3736                         VM_PAGE_CHECK(m);
3737
3738                         m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3739                         VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3740                         m->laundry = FALSE;
3741
3742                         m->snext = local_q;
3743                         local_q = m;
3744                         local_cnt++;
3745                 }
3746                 if (local_q == NULL)
3747                         break;
3748
3749                 q->pgo_busy = TRUE;
3750
3751                 if ((pgo_draining = q->pgo_draining) == FALSE) {
3752                         vm_pageout_throttle_up_batch(q, local_cnt);
3753                         pages_left_on_q = q->pgo_laundry;
3754                 } else
3755                         pages_left_on_q = q->pgo_laundry - local_cnt;
3756
3757                 vm_page_unlock_queues();
3758
3759 #if !RECORD_THE_COMPRESSED_DATA
3760                 if (pages_left_on_q >= local_batch_size && cq->id < (vm_compressor_thread_count - 1)) {
3761                         thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3762                 }
3763 #endif
3764                 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
3765
3766                 while (local_q) {
3767
3768                         KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3769
3770                         m = local_q;
3771                         local_q = m->snext;
3772                         m->snext = NULL;
3773
3774                         if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m, FALSE) == KERN_SUCCESS) {
3775                                 ncomps++;
3776                                 m->snext = local_freeq;
3777                                 local_freeq = m;
3778                                 local_freed++;
3779
3780                                 if (local_freed >= MAX_FREE_BATCH) {
3781                                         vm_pageout_freed_after_compression += local_freed;
3782
3783                                         vm_page_free_list(local_freeq, TRUE);
3784                                         local_freeq = NULL;
3785                                         local_freed = 0;
3786                                 }
3787                         }
3788 #if !CONFIG_JETSAM
3789                         while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3790                                 kern_return_t   wait_result;
3791                                 int             need_wakeup = 0;
3792
3793                                 if (local_freeq) {
3794                                         vm_pageout_freed_after_compression += local_freed;
3795
3796                                         vm_page_free_list(local_freeq, TRUE);
3797                                         local_freeq = NULL;
3798                                         local_freed = 0;
3799
3800                                         continue;
3801                                 }
3802                                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3803
3804                                 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3805
3806                                         if (vm_page_free_wanted_privileged++ == 0)
3807                                                 need_wakeup = 1;
3808                                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3809
3810                                         lck_mtx_unlock(&vm_page_queue_free_lock);
3811
3812                                         if (need_wakeup)
3813                                                 thread_wakeup((event_t)&vm_page_free_wanted);
3814
3815                                         if (wait_result == THREAD_WAITING)
3816
3817                                                 thread_block(THREAD_CONTINUE_NULL);
3818                                 } else
3819                                         lck_mtx_unlock(&vm_page_queue_free_lock);
3820                         }
3821 #endif
3822                 }
3823                 if (local_freeq) {
3824                         vm_pageout_freed_after_compression += local_freed;
3825
3826                         vm_page_free_list(local_freeq, TRUE);
3827                         local_freeq = NULL;
3828                         local_freed = 0;
3829                 }
3830                 if (pgo_draining == TRUE) {
3831                         vm_page_lockspin_queues();
3832                         vm_pageout_throttle_up_batch(q, local_cnt);
3833                         vm_page_unlock_queues();
3834                 }
3835         }
3836         KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3837
3838         /*
3839          * queue lock is held and our q is empty
3840          */
3841         q->pgo_busy = FALSE;
3842         q->pgo_idle = TRUE;
3843
3844         assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3845 #if DEVELOPMENT || DEBUG
3846         if (marked_active == TRUE) {
3847                 vmct_active--;
3848                 vmct_state[cq->id] = VMCT_IDLE;
3849
3850                 if (vmct_active == 0) {
3851                         compressor_epoch_stop = mach_absolute_time();
3852                         assert(compressor_epoch_stop > compressor_epoch_start);
3853                         /* This interval includes intervals where one or more
3854                          * compressor threads were pre-empted
3855                          */
3856                         vmct_stats.vmct_cthreads_total += compressor_epoch_stop - compressor_epoch_start;
3857                 }
3858
3859         }
3860 #endif
3861         vm_page_unlock_queues();
3862 #if DEVELOPMENT || DEBUG
3863         if (__improbable(vm_compressor_time_thread)) {
3864                 vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self();
3865                 vmct_stats.vmct_pages[cq->id] += ncomps;
3866                 vmct_stats.vmct_iterations[cq->id]++;
3867                 if (ncomps > vmct_stats.vmct_maxpages[cq->id]) {
3868                         vmct_stats.vmct_maxpages[cq->id] = ncomps;
3869                 }
3870                 if (ncomps < vmct_stats.vmct_minpages[cq->id]) {
3871                         vmct_stats.vmct_minpages[cq->id] = ncomps;
3872                 }
3873         }
3874 #endif
3875
3876         KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3877
3878         thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3879         /*NOTREACHED*/
3880 }
3881
3882
3883 kern_return_t
3884 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m, boolean_t object_locked_by_caller)
3885 {
3886         vm_object_t     object;
3887         memory_object_t pager;
3888         int             compressed_count_delta;
3889         kern_return_t   retval;
3890
3891         object = VM_PAGE_OBJECT(m);
3892
3893         if (object->object_slid) {
3894                 panic("slid page %p not allowed on this path\n", m);
3895         }
3896         assert(!m->free_when_done);
3897         assert(!m->laundry);
3898
3899         pager = object->pager;
3900
3901         if (object_locked_by_caller == FALSE && (!object->pager_initialized || pager == MEMORY_OBJECT_NULL))  {
3902
3903                 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3904
3905                 vm_object_lock(object);
3906
3907                 /*
3908                  * If there is no memory object for the page, create
3909                  * one and hand it to the compression pager.
3910                  */
3911
3912                 if (!object->pager_initialized)
3913                         vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3914                 if (!object->pager_initialized)
3915                         vm_object_compressor_pager_create(object);
3916
3917                 pager = object->pager;
3918
3919                 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3920                         /*
3921                          * Still no pager for the object,
3922                          * or the pager has been destroyed.
3923                          * Reactivate the page.
3924                          *
3925                          * Should only happen if there is no
3926                          * compression pager
3927                          */
3928                         PAGE_WAKEUP_DONE(m);
3929
3930                         vm_page_lockspin_queues();
3931                         vm_page_activate(m);
3932                         vm_pageout_dirty_no_pager++;
3933                         vm_page_unlock_queues();
3934
3935                         /*
3936                          *      And we are done with it.
3937                          */
3938                         vm_object_activity_end(object);
3939                         vm_object_unlock(object);
3940
3941                         return KERN_FAILURE;
3942                 }
3943                 vm_object_unlock(object);
3944
3945                 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3946         }
3947         assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3948
3949         if (object_locked_by_caller == FALSE)
3950                 assert(object->activity_in_progress > 0);
3951
3952         retval = vm_compressor_pager_put(
3953                 pager,
3954                 m->offset + object->paging_offset,
3955                 VM_PAGE_GET_PHYS_PAGE(m),
3956                 current_chead,
3957                 scratch_buf,
3958                 &compressed_count_delta);
3959
3960         if (object_locked_by_caller == FALSE) {
3961                 vm_object_lock(object);
3962
3963                 assert(object->activity_in_progress > 0);
3964                 assert(VM_PAGE_OBJECT(m) == object);
3965         }
3966
3967         vm_compressor_pager_count(pager,
3968                                   compressed_count_delta,
3969                                   FALSE, /* shared_lock */
3970                                   object);
3971
3972         assert( !VM_PAGE_WIRED(m));
3973
3974         if (retval == KERN_SUCCESS) {
3975                 /*
3976                  * If the object is purgeable, its owner's
3977                  * purgeable ledgers will be updated in
3978                  * vm_page_remove() but the page still
3979                  * contributes to the owner's memory footprint,
3980                  * so account for it as such.
3981                  */
3982                 if (object->purgable != VM_PURGABLE_DENY &&
3983                     object->vo_purgeable_owner != NULL) {
3984                         /* one more compressed purgeable page */
3985                         vm_purgeable_compressed_update(object,
3986                                                        +1);
3987                 }
3988                 VM_STAT_INCR(compressions);
3989
3990                 if (m->tabled)
3991                         vm_page_remove(m, TRUE);
3992
3993         } else {
3994                 PAGE_WAKEUP_DONE(m);
3995
3996                 vm_page_lockspin_queues();
3997
3998                 vm_page_activate(m);
3999                 vm_compressor_failed++;
4000
4001                 vm_page_unlock_queues();
4002         }
4003         if (object_locked_by_caller == FALSE) {
4004                 vm_object_activity_end(object);
4005                 vm_object_unlock(object);
4006         }
4007         return retval;
4008 }
4009
4010
4011 static void
4012 vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *eq, boolean_t req_lowpriority)
4013 {
4014         uint32_t        policy;
4015
4016         if (hibernate_cleaning_in_progress == TRUE)
4017                 req_lowpriority = FALSE;
4018
4019         if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) {
4020
4021                 vm_page_unlock_queues();
4022
4023                 if (req_lowpriority == TRUE) {
4024                         policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
4025                         DTRACE_VM(laundrythrottle);
4026                 } else {
4027                         policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
4028                         DTRACE_VM(laundryunthrottle);
4029                 }
4030                 proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
4031                                                 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
4032
4033                 eq->pgo_lowpriority = req_lowpriority;
4034
4035                 vm_page_lock_queues();
4036         }
4037 }
4038
4039
4040 static void
4041 vm_pageout_iothread_external(void)
4042 {
4043         thread_t        self = current_thread();
4044
4045         self->options |= TH_OPT_VMPRIV;
4046
4047         DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
4048
4049         proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
4050                                TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
4051
4052         vm_page_lock_queues();
4053
4054         vm_pageout_queue_external.pgo_tid = self->thread_id;
4055         vm_pageout_queue_external.pgo_lowpriority = TRUE;
4056         vm_pageout_queue_external.pgo_inited = TRUE;
4057
4058         vm_page_unlock_queues();
4059
4060         vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
4061
4062         /*NOTREACHED*/
4063 }
4064
4065
4066 static void
4067 vm_pageout_iothread_internal(struct cq *cq)
4068 {
4069         thread_t        self = current_thread();
4070
4071         self->options |= TH_OPT_VMPRIV;
4072
4073         vm_page_lock_queues();
4074
4075         vm_pageout_queue_internal.pgo_tid = self->thread_id;
4076         vm_pageout_queue_internal.pgo_lowpriority = TRUE;
4077         vm_pageout_queue_internal.pgo_inited = TRUE;
4078
4079         vm_page_unlock_queues();
4080
4081         if (vm_restricted_to_single_processor == TRUE)
4082                 thread_vm_bind_group_add();
4083
4084
4085         thread_set_thread_name(current_thread(), "VM_compressor");
4086 #if DEVELOPMENT || DEBUG
4087         vmct_stats.vmct_minpages[cq->id] = INT32_MAX;
4088 #endif
4089         vm_pageout_iothread_internal_continue(cq);
4090
4091         /*NOTREACHED*/
4092 }
4093
4094 kern_return_t
4095 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
4096 {
4097         if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
4098                 return KERN_SUCCESS;
4099         } else {
4100                 return KERN_FAILURE; /* Already set */
4101         }
4102 }
4103
4104 extern boolean_t        memorystatus_manual_testing_on;
4105 extern unsigned int     memorystatus_level;
4106
4107
4108 #if VM_PRESSURE_EVENTS
4109
4110 boolean_t vm_pressure_events_enabled = FALSE;
4111
4112 void
4113 vm_pressure_response(void)
4114 {
4115
4116         vm_pressure_level_t     old_level = kVMPressureNormal;
4117         int                     new_level = -1;
4118         unsigned int            total_pages;
4119         uint64_t                available_memory = 0;
4120
4121         if (vm_pressure_events_enabled == FALSE)
4122                 return;
4123
4124 #if CONFIG_EMBEDDED
4125
4126         available_memory = (uint64_t) memorystatus_available_pages;
4127
4128 #else /* CONFIG_EMBEDDED */
4129
4130         available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4131         memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4132
4133 #endif /* CONFIG_EMBEDDED */
4134
4135         total_pages = (unsigned int) atop_64(max_mem);
4136 #if CONFIG_SECLUDED_MEMORY
4137         total_pages -= vm_page_secluded_count;
4138 #endif /* CONFIG_SECLUDED_MEMORY */
4139         memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
4140
4141         if (memorystatus_manual_testing_on) {
4142                 return;
4143         }
4144
4145         old_level = memorystatus_vm_pressure_level;
4146
4147         switch (memorystatus_vm_pressure_level) {
4148
4149                 case kVMPressureNormal:
4150                 {
4151                         if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4152                                 new_level = kVMPressureCritical;
4153                         }  else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4154                                 new_level = kVMPressureWarning;
4155                         }
4156                         break;
4157                 }
4158
4159                 case kVMPressureWarning:
4160                 case kVMPressureUrgent:
4161                 {
4162                         if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4163                                 new_level = kVMPressureNormal;
4164                         }  else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4165                                 new_level = kVMPressureCritical;
4166                         }
4167                         break;
4168                 }
4169
4170                 case kVMPressureCritical:
4171                 {
4172                         if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4173                                 new_level = kVMPressureNormal;
4174                         }  else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4175                                 new_level = kVMPressureWarning;
4176                         }
4177                         break;
4178                 }
4179
4180                 default:
4181                         return;
4182         }
4183
4184         if (new_level != -1) {
4185                 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4186
4187                 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) {
4188                         if (vm_pressure_thread_running == FALSE) {
4189                                 thread_wakeup(&vm_pressure_thread);
4190                         }
4191
4192                         if (old_level != new_level) {
4193                                 thread_wakeup(&vm_pressure_changed);
4194                         }
4195                 }
4196         }
4197
4198 }
4199 #endif /* VM_PRESSURE_EVENTS */
4200
4201 kern_return_t
4202 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4203
4204 #if CONFIG_EMBEDDED
4205
4206         return KERN_FAILURE;
4207
4208 #elif !VM_PRESSURE_EVENTS
4209
4210         return KERN_FAILURE;
4211
4212 #else /* VM_PRESSURE_EVENTS */
4213
4214         kern_return_t   kr = KERN_SUCCESS;
4215
4216         if (pressure_level != NULL) {
4217
4218                 vm_pressure_level_t     old_level = memorystatus_vm_pressure_level;
4219
4220                 if (wait_for_pressure == TRUE) {
4221                         wait_result_t           wr = 0;
4222
4223                         while (old_level == *pressure_level) {
4224                                 wr = assert_wait((event_t) &vm_pressure_changed,
4225                                                  THREAD_INTERRUPTIBLE);
4226                                 if (wr == THREAD_WAITING) {
4227                                         wr = thread_block(THREAD_CONTINUE_NULL);
4228                                 }
4229                                 if (wr == THREAD_INTERRUPTED) {
4230                                         return KERN_ABORTED;
4231                                 }
4232                                 if (wr == THREAD_AWAKENED) {
4233
4234                                         old_level = memorystatus_vm_pressure_level;
4235
4236                                         if (old_level != *pressure_level) {
4237                                                 break;
4238                                         }
4239                                 }
4240                         }
4241                 }
4242
4243                 *pressure_level = old_level;
4244                 kr = KERN_SUCCESS;
4245         } else {
4246                 kr = KERN_INVALID_ARGUMENT;
4247         }
4248
4249         return kr;
4250 #endif /* VM_PRESSURE_EVENTS */
4251 }
4252
4253 #if VM_PRESSURE_EVENTS
4254 void
4255 vm_pressure_thread(void) {
4256         static boolean_t thread_initialized = FALSE;
4257
4258         if (thread_initialized == TRUE) {
4259                 vm_pressure_thread_running = TRUE;
4260                 consider_vm_pressure_events();
4261                 vm_pressure_thread_running = FALSE;
4262         }
4263
4264         thread_initialized = TRUE;
4265         assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4266         thread_block((thread_continue_t)vm_pressure_thread);
4267 }
4268 #endif /* VM_PRESSURE_EVENTS */
4269
4270
4271 uint32_t vm_pageout_considered_page_last = 0;
4272
4273 /*
4274  * called once per-second via "compute_averages"
4275  */
4276 void
4277 compute_pageout_gc_throttle(__unused void *arg)
4278 {
4279         if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
4280
4281                 vm_pageout_considered_page_last = vm_pageout_considered_page;
4282
4283                 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4284         }
4285 }
4286
4287 /*
4288  * vm_pageout_garbage_collect can also be called when the zone allocator needs
4289  * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4290  * jetsams. We need to check if the zone map size is above its jetsam limit to
4291  * decide if this was indeed the case.
4292  *
4293  * We need to do this on a different thread because of the following reasons:
4294  *
4295  * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4296  * itself causing the system to hang. We perform synchronous jetsams if we're
4297  * leaking in the VM map entries zone, so the leaking process could be doing a
4298  * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4299  * jetsam itself. We also need the vm_map lock on the process termination path,
4300  * which would now lead the dying process to deadlock against itself.
4301  *
4302  * 2. The jetsam path might need to allocate zone memory itself. We could try
4303  * using the non-blocking variant of zalloc for this path, but we can still
4304  * end up trying to do a kernel_memory_allocate when the zone_map is almost
4305  * full.
4306  */
4307
4308 extern boolean_t is_zone_map_nearing_exhaustion(void);
4309
4310 void
4311 vm_pageout_garbage_collect(int collect)
4312 {
4313         if (collect) {
4314                 if (is_zone_map_nearing_exhaustion()) {
4315                         /*
4316                          * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4317                          *
4318                          * Bail out after calling zone_gc (which triggers the
4319                          * zone-map-exhaustion jetsams). If we fall through, the subsequent
4320                          * operations that clear out a bunch of caches might allocate zone
4321                          * memory themselves (for eg. vm_map operations would need VM map
4322                          * entries). Since the zone map is almost full at this point, we
4323                          * could end up with a panic. We just need to quickly jetsam a
4324                          * process and exit here.
4325                          *
4326                          * It could so happen that we were woken up to relieve memory
4327                          * pressure and the zone map also happened to be near its limit at
4328                          * the time, in which case we'll skip out early. But that should be
4329                          * ok; if memory pressure persists, the thread will simply be woken
4330                          * up again.
4331                          */
4332                         consider_zone_gc(TRUE);
4333
4334                 } else {
4335                         /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4336                         boolean_t buf_large_zfree = FALSE;
4337                         boolean_t first_try = TRUE;
4338
4339                         stack_collect();
4340
4341                         consider_machine_collect();
4342                         m_drain();
4343
4344                         do {
4345                                 if (consider_buffer_cache_collect != NULL) {
4346                                         buf_large_zfree = (*consider_buffer_cache_collect)(0);
4347                                 }
4348                                 if (first_try == TRUE || buf_large_zfree == TRUE) {
4349                                         /*
4350                                          * consider_zone_gc should be last, because the other operations
4351                                          * might return memory to zones.
4352                                          */
4353                                         consider_zone_gc(FALSE);
4354                                 }
4355                                 first_try = FALSE;
4356
4357                         } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4358
4359                         consider_machine_adjust();
4360                 }
4361         }
4362
4363         assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4364
4365         thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4366         /*NOTREACHED*/
4367 }
4368
4369
4370 #if VM_PAGE_BUCKETS_CHECK
4371 #if VM_PAGE_FAKE_BUCKETS
4372 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4373 #endif /* VM_PAGE_FAKE_BUCKETS */
4374 #endif /* VM_PAGE_BUCKETS_CHECK */
4375
4376
4377
4378 void
4379 vm_set_restrictions()
4380 {
4381         host_basic_info_data_t hinfo;
4382         mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4383
4384 #define BSD_HOST 1
4385         host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4386
4387         assert(hinfo.max_cpus > 0);
4388
4389         if (hinfo.max_cpus <= 3) {
4390                 /*
4391                  * on systems with a limited number of CPUS, bind the
4392                  * 4 major threads that can free memory and that tend to use
4393                  * a fair bit of CPU under pressured conditions to a single processor.
4394                  * This insures that these threads don't hog all of the available CPUs
4395                  * (important for camera launch), while allowing them to run independently
4396                  * w/r to locks... the 4 threads are
4397                  * vm_pageout_scan,  vm_pageout_iothread_internal (compressor),
4398                  * vm_compressor_swap_trigger_thread (minor and major compactions),
4399                  * memorystatus_thread (jetsams).
4400                  *
4401                  * the first time the thread is run, it is responsible for checking the
4402                  * state of vm_restricted_to_single_processor, and if TRUE it calls
4403                  * thread_bind_master...  someday this should be replaced with a group
4404                  * scheduling mechanism and KPI.
4405                  */
4406                 vm_restricted_to_single_processor = TRUE;
4407         }
4408 }
4409
4410 void
4411 vm_pageout(void)
4412 {
4413         thread_t        self = current_thread();
4414         thread_t        thread;
4415         kern_return_t   result;
4416         spl_t           s;
4417
4418         /*
4419          * Set thread privileges.
4420          */
4421         s = splsched();
4422
4423         thread_lock(self);
4424         self->options |= TH_OPT_VMPRIV;
4425         sched_set_thread_base_priority(self, BASEPRI_VM);
4426         thread_unlock(self);
4427
4428         if (!self->reserved_stack)
4429                 self->reserved_stack = self->kernel_stack;
4430
4431         if (vm_restricted_to_single_processor == TRUE)
4432                 thread_vm_bind_group_add();
4433
4434         splx(s);
4435
4436         thread_set_thread_name(current_thread(), "VM_pageout_scan");
4437
4438         /*
4439          *      Initialize some paging parameters.
4440          */
4441
4442         if (vm_pageout_swap_wait == 0)
4443                 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4444
4445         if (vm_pageout_idle_wait == 0)
4446                 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4447
4448         if (vm_pageout_burst_wait == 0)
4449                 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4450
4451         if (vm_pageout_empty_wait == 0)
4452                 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4453
4454         if (vm_pageout_deadlock_wait == 0)
4455                 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4456
4457         if (vm_pageout_deadlock_relief == 0)
4458                 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4459
4460         if (vm_pageout_inactive_relief == 0)
4461                 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
4462
4463         if (vm_pageout_burst_active_throttle == 0)
4464                 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
4465
4466         if (vm_pageout_burst_inactive_throttle == 0)
4467                 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4468
4469         /*
4470          * Set kernel task to low backing store privileged
4471          * status
4472          */
4473         task_lock(kernel_task);
4474         kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
4475         task_unlock(kernel_task);
4476
4477         vm_page_free_count_init = vm_page_free_count;
4478
4479         /*
4480          * even if we've already called vm_page_free_reserve
4481          * call it again here to insure that the targets are
4482          * accurately calculated (it uses vm_page_free_count_init)
4483          * calling it with an arg of 0 will not change the reserve
4484          * but will re-calculate free_min and free_target
4485          */
4486         if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4487                 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4488         } else
4489                 vm_page_free_reserve(0);
4490
4491
4492         vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4493         vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4494         vm_pageout_queue_external.pgo_laundry = 0;
4495         vm_pageout_queue_external.pgo_idle = FALSE;
4496         vm_pageout_queue_external.pgo_busy = FALSE;
4497         vm_pageout_queue_external.pgo_throttled = FALSE;
4498         vm_pageout_queue_external.pgo_draining = FALSE;
4499         vm_pageout_queue_external.pgo_lowpriority = FALSE;
4500         vm_pageout_queue_external.pgo_tid = -1;
4501         vm_pageout_queue_external.pgo_inited = FALSE;
4502
4503         vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4504         vm_pageout_queue_internal.pgo_maxlaundry = 0;
4505         vm_pageout_queue_internal.pgo_laundry = 0;
4506         vm_pageout_queue_internal.pgo_idle = FALSE;
4507         vm_pageout_queue_internal.pgo_busy = FALSE;
4508         vm_pageout_queue_internal.pgo_throttled = FALSE;
4509         vm_pageout_queue_internal.pgo_draining = FALSE;
4510         vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4511         vm_pageout_queue_internal.pgo_tid = -1;
4512         vm_pageout_queue_internal.pgo_inited = FALSE;
4513
4514         /* internal pageout thread started when default pager registered first time */
4515         /* external pageout and garbage collection threads started here */
4516
4517         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4518                                               BASEPRI_VM,
4519                                               &vm_pageout_external_iothread);
4520         if (result != KERN_SUCCESS)
4521                 panic("vm_pageout_iothread_external: create failed");
4522
4523         thread_deallocate(vm_pageout_external_iothread);
4524
4525         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4526                                               BASEPRI_DEFAULT,
4527                                               &thread);
4528         if (result != KERN_SUCCESS)
4529                 panic("vm_pageout_garbage_collect: create failed");
4530
4531         thread_deallocate(thread);
4532
4533 #if VM_PRESSURE_EVENTS
4534         result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4535                                                 BASEPRI_DEFAULT,
4536                                                 &thread);
4537
4538         if (result != KERN_SUCCESS)
4539                 panic("vm_pressure_thread: create failed");
4540
4541         thread_deallocate(thread);
4542 #endif
4543
4544         vm_object_reaper_init();
4545
4546
4547         bzero(&vm_config, sizeof(vm_config));
4548
4549         switch(vm_compressor_mode) {
4550
4551         case VM_PAGER_DEFAULT:
4552                 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4553
4554         case VM_PAGER_COMPRESSOR_WITH_SWAP:
4555                 vm_config.compressor_is_present = TRUE;
4556                 vm_config.swap_is_present = TRUE;
4557                 vm_config.compressor_is_active = TRUE;
4558                 vm_config.swap_is_active = TRUE;
4559                 break;
4560
4561         case VM_PAGER_COMPRESSOR_NO_SWAP:
4562                 vm_config.compressor_is_present = TRUE;
4563                 vm_config.swap_is_present = TRUE;
4564                 vm_config.compressor_is_active = TRUE;
4565                 break;
4566
4567         case VM_PAGER_FREEZER_DEFAULT:
4568                 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4569
4570         case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4571                 vm_config.compressor_is_present = TRUE;
4572                 vm_config.swap_is_present = TRUE;
4573                 break;
4574
4575         case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4576                 vm_config.compressor_is_present = TRUE;
4577                 vm_config.swap_is_present = TRUE;
4578                 vm_config.compressor_is_active = TRUE;
4579                 vm_config.freezer_swap_is_active = TRUE;
4580                 break;
4581
4582         case VM_PAGER_NOT_CONFIGURED:
4583                 break;
4584
4585         default:
4586                 printf("unknown compressor mode - %x\n", vm_compressor_mode);
4587                 break;
4588         }
4589         if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4590                 vm_compressor_pager_init();
4591
4592 #if VM_PRESSURE_EVENTS
4593         vm_pressure_events_enabled = TRUE;
4594 #endif /* VM_PRESSURE_EVENTS */
4595
4596 #if CONFIG_PHANTOM_CACHE
4597         vm_phantom_cache_init();
4598 #endif
4599 #if VM_PAGE_BUCKETS_CHECK
4600 #if VM_PAGE_FAKE_BUCKETS
4601         printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4602                (uint64_t) vm_page_fake_buckets_start,
4603                (uint64_t) vm_page_fake_buckets_end);
4604         pmap_protect(kernel_pmap,
4605                      vm_page_fake_buckets_start,
4606                      vm_page_fake_buckets_end,
4607                      VM_PROT_READ);
4608 //      *(char *) vm_page_fake_buckets_start = 'x';     /* panic! */
4609 #endif /* VM_PAGE_FAKE_BUCKETS */
4610 #endif /* VM_PAGE_BUCKETS_CHECK */
4611
4612 #if VM_OBJECT_TRACKING
4613         vm_object_tracking_init();
4614 #endif /* VM_OBJECT_TRACKING */
4615
4616         vm_tests();
4617
4618         vm_pageout_continue();
4619
4620         /*
4621          * Unreached code!
4622          *
4623          * The vm_pageout_continue() call above never returns, so the code below is never
4624          * executed.  We take advantage of this to declare several DTrace VM related probe
4625          * points that our kernel doesn't have an analog for.  These are probe points that
4626          * exist in Solaris and are in the DTrace documentation, so people may have written
4627          * scripts that use them.  Declaring the probe points here means their scripts will
4628          * compile and execute which we want for portability of the scripts, but since this
4629          * section of code is never reached, the probe points will simply never fire.  Yes,
4630          * this is basically a hack.  The problem is the DTrace probe points were chosen with
4631          * Solaris specific VM events in mind, not portability to different VM implementations.
4632          */
4633
4634         DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4635         DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4636         DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4637         DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4638         DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4639         DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4640         DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4641         /*NOTREACHED*/
4642 }
4643
4644
4645
4646 #if CONFIG_EMBEDDED
4647 int vm_compressor_thread_count = 1;
4648 #else
4649 int vm_compressor_thread_count = 2;
4650 #endif
4651
4652 kern_return_t
4653 vm_pageout_internal_start(void)
4654 {
4655         kern_return_t   result;
4656         int             i;
4657         host_basic_info_data_t hinfo;
4658
4659         assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
4660
4661         mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4662 #define BSD_HOST 1
4663         host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4664
4665         assert(hinfo.max_cpus > 0);
4666
4667         PE_parse_boot_argn("vmcomp_threads", &vm_compressor_thread_count, sizeof(vm_compressor_thread_count));
4668         if (vm_compressor_thread_count >= hinfo.max_cpus)
4669                 vm_compressor_thread_count = hinfo.max_cpus - 1;
4670         if (vm_compressor_thread_count <= 0)
4671                 vm_compressor_thread_count = 1;
4672         else if (vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4673                 vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4674
4675         vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4676
4677         PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
4678
4679         for (i = 0; i < vm_compressor_thread_count; i++) {
4680                 ciq[i].id = i;
4681                 ciq[i].q = &vm_pageout_queue_internal;
4682                 ciq[i].current_chead = NULL;
4683                 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4684
4685                 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_VM, &vm_pageout_internal_iothread);
4686
4687                 if (result == KERN_SUCCESS)
4688                         thread_deallocate(vm_pageout_internal_iothread);
4689                 else
4690                         break;
4691         }
4692         return result;
4693 }
4694
4695 #if CONFIG_IOSCHED
4696 /*
4697  * To support I/O Expedite for compressed files we mark the upls with special flags.
4698  * The way decmpfs works is that we create a big upl which marks all the pages needed to
4699  * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4700  * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4701  * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4702  * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4703  * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4704  * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4705  * unless the real I/O upl is being destroyed).
4706  */
4707
4708
4709 static void
4710 upl_set_decmp_info(upl_t upl, upl_t src_upl)
4711 {
4712         assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4713
4714         upl_lock(src_upl);
4715         if (src_upl->decmp_io_upl) {
4716                 /*
4717                  * If there is already an alive real I/O UPL, ignore this new UPL.
4718                  * This case should rarely happen and even if it does, it just means
4719                  * that we might issue a spurious expedite which the driver is expected
4720                  * to handle.
4721                  */
4722                 upl_unlock(src_upl);
4723                 return;
4724         }
4725         src_upl->decmp_io_upl = (void *)upl;
4726         src_upl->ref_count++;
4727
4728         upl->flags |= UPL_DECMP_REAL_IO;
4729         upl->decmp_io_upl = (void *)src_upl;
4730         upl_unlock(src_upl);
4731 }
4732 #endif /* CONFIG_IOSCHED */
4733
4734 #if UPL_DEBUG
4735 int     upl_debug_enabled = 1;
4736 #else
4737 int     upl_debug_enabled = 0;
4738 #endif
4739
4740 static upl_t
4741 upl_create(int type, int flags, upl_size_t size)
4742 {
4743         upl_t   upl;
4744         vm_size_t       page_field_size = 0;
4745         int     upl_flags = 0;
4746         vm_size_t       upl_size  = sizeof(struct upl);
4747
4748         size = round_page_32(size);
4749
4750         if (type & UPL_CREATE_LITE) {
4751                 page_field_size = (atop(size) + 7) >> 3;
4752                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4753
4754                 upl_flags |= UPL_LITE;
4755         }
4756         if (type & UPL_CREATE_INTERNAL) {
4757                 upl_size += sizeof(struct upl_page_info) * atop(size);
4758
4759                 upl_flags |= UPL_INTERNAL;
4760         }
4761         upl = (upl_t)kalloc(upl_size + page_field_size);
4762
4763         if (page_field_size)
4764                 bzero((char *)upl + upl_size, page_field_size);
4765
4766         upl->flags = upl_flags | flags;
4767         upl->kaddr = (vm_offset_t)0;
4768         upl->size = 0;
4769         upl->map_object = NULL;
4770         upl->ref_count = 1;
4771         upl->ext_ref_count = 0;
4772         upl->highest_page = 0;
4773         upl_lock_init(upl);
4774         upl->vector_upl = NULL;
4775         upl->associated_upl = NULL;
4776 #if CONFIG_IOSCHED
4777         if (type & UPL_CREATE_IO_TRACKING) {
4778                 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4779         }
4780
4781         upl->upl_reprio_info = 0;
4782         upl->decmp_io_upl = 0;
4783         if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4784                 /* Only support expedite on internal UPLs */
4785                 thread_t        curthread = current_thread();
4786                 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4787                 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4788                 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4789                 if (curthread->decmp_upl != NULL)
4790                         upl_set_decmp_info(upl, curthread->decmp_upl);
4791         }
4792 #endif
4793 #if CONFIG_IOSCHED || UPL_DEBUG
4794         if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4795                 upl->upl_creator = current_thread();
4796                 upl->uplq.next = 0;
4797                 upl->uplq.prev = 0;
4798                 upl->flags |= UPL_TRACKED_BY_OBJECT;
4799         }
4800 #endif
4801
4802 #if UPL_DEBUG
4803         upl->ubc_alias1 = 0;
4804         upl->ubc_alias2 = 0;
4805
4806         upl->upl_state = 0;
4807         upl->upl_commit_index = 0;
4808         bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4809
4810         (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4811 #endif /* UPL_DEBUG */
4812
4813         return(upl);
4814 }
4815
4816 static void
4817 upl_destroy(upl_t upl)
4818 {
4819         int     page_field_size;  /* bit field in word size buf */
4820         int     size;
4821
4822         if (upl->ext_ref_count) {
4823                 panic("upl(%p) ext_ref_count", upl);
4824         }
4825
4826 #if CONFIG_IOSCHED
4827         if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4828                 upl_t src_upl;
4829                 src_upl = upl->decmp_io_upl;
4830                 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4831                 upl_lock(src_upl);
4832                 src_upl->decmp_io_upl = NULL;
4833                 upl_unlock(src_upl);
4834                 upl_deallocate(src_upl);
4835         }
4836 #endif /* CONFIG_IOSCHED */
4837
4838 #if CONFIG_IOSCHED || UPL_DEBUG
4839         if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4840                 vm_object_t     object;
4841
4842                 if (upl->flags & UPL_SHADOWED) {
4843                         object = upl->map_object->shadow;
4844                 } else {
4845                         object = upl->map_object;
4846                 }
4847
4848                 vm_object_lock(object);
4849                 queue_remove(&object->uplq, upl, upl_t, uplq);
4850                 vm_object_activity_end(object);
4851                 vm_object_collapse(object, 0, TRUE);
4852                 vm_object_unlock(object);
4853         }
4854 #endif
4855         /*
4856          * drop a reference on the map_object whether or
4857          * not a pageout object is inserted
4858          */
4859         if (upl->flags & UPL_SHADOWED)
4860                 vm_object_deallocate(upl->map_object);
4861
4862         if (upl->flags & UPL_DEVICE_MEMORY)
4863                 size = PAGE_SIZE;
4864         else
4865                 size = upl->size;
4866         page_field_size = 0;
4867
4868         if (upl->flags & UPL_LITE) {
4869                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4870                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4871         }
4872         upl_lock_destroy(upl);
4873         upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4874
4875 #if CONFIG_IOSCHED
4876         if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4877                 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4878 #endif
4879
4880         if (upl->flags & UPL_INTERNAL) {
4881                 kfree(upl,
4882                       sizeof(struct upl) +
4883                       (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4884                       + page_field_size);
4885         } else {
4886                 kfree(upl, sizeof(struct upl) + page_field_size);
4887         }
4888 }
4889
4890 void
4891 upl_deallocate(upl_t upl)
4892 {
4893         upl_lock(upl);
4894         if (--upl->ref_count == 0) {
4895                 if(vector_upl_is_valid(upl))
4896                         vector_upl_deallocate(upl);
4897                 upl_unlock(upl);
4898                 upl_destroy(upl);
4899         }
4900         else
4901                 upl_unlock(upl);
4902 }
4903
4904 #if CONFIG_IOSCHED
4905 void
4906 upl_mark_decmp(upl_t upl)
4907 {
4908         if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4909                 upl->flags |= UPL_DECMP_REQ;
4910                 upl->upl_creator->decmp_upl = (void *)upl;
4911         }
4912 }
4913
4914 void
4915 upl_unmark_decmp(upl_t upl)
4916 {
4917         if(upl && (upl->flags & UPL_DECMP_REQ)) {
4918                 upl->upl_creator->decmp_upl = NULL;
4919         }
4920 }
4921
4922 #endif /* CONFIG_IOSCHED */
4923
4924 #define VM_PAGE_Q_BACKING_UP(q)         \
4925         ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4926
4927 boolean_t must_throttle_writes(void);
4928
4929 boolean_t
4930 must_throttle_writes()
4931 {
4932         if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4933             vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4934                 return (TRUE);
4935
4936         return (FALSE);
4937 }
4938
4939
4940 #if DEVELOPMENT || DEBUG
4941 /*/*
4942  * Statistics about UPL enforcement of copy-on-write obligations.
4943  */
4944 unsigned long upl_cow = 0;
4945 unsigned long upl_cow_again = 0;
4946 unsigned long upl_cow_pages = 0;
4947 unsigned long upl_cow_again_pages = 0;
4948
4949 unsigned long iopl_cow = 0;
4950 unsigned long iopl_cow_pages = 0;
4951 #endif
4952
4953 /*
4954  *      Routine:        vm_object_upl_request
4955  *      Purpose:
4956  *              Cause the population of a portion of a vm_object.
4957  *              Depending on the nature of the request, the pages
4958  *              returned may be contain valid data or be uninitialized.
4959  *              A page list structure, listing the physical pages
4960  *              will be returned upon request.
4961  *              This function is called by the file system or any other
4962  *              supplier of backing store to a pager.
4963  *              IMPORTANT NOTE: The caller must still respect the relationship
4964  *              between the vm_object and its backing memory object.  The
4965  *              caller MUST NOT substitute changes in the backing file
4966  *              without first doing a memory_object_lock_request on the
4967  *              target range unless it is know that the pages are not
4968  *              shared with another entity at the pager level.
4969  *              Copy_in_to:
4970  *                      if a page list structure is present
4971  *                      return the mapped physical pages, where a
4972  *                      page is not present, return a non-initialized
4973  *                      one.  If the no_sync bit is turned on, don't
4974  *                      call the pager unlock to synchronize with other
4975  *                      possible copies of the page. Leave pages busy
4976  *                      in the original object, if a page list structure
4977  *                      was specified.  When a commit of the page list
4978  *                      pages is done, the dirty bit will be set for each one.
4979  *              Copy_out_from:
4980  *                      If a page list structure is present, return
4981  *                      all mapped pages.  Where a page does not exist
4982  *                      map a zero filled one. Leave pages busy in
4983  *                      the original object.  If a page list structure
4984  *                      is not specified, this call is a no-op.
4985  *
4986  *              Note:  access of default pager objects has a rather interesting
4987  *              twist.  The caller of this routine, presumably the file system
4988  *              page cache handling code, will never actually make a request
4989  *              against a default pager backed object.  Only the default
4990  *              pager will make requests on backing store related vm_objects
4991  *              In this way the default pager can maintain the relationship
4992  *              between backing store files (abstract memory objects) and
4993  *              the vm_objects (cache objects), they support.
4994  *
4995  */
4996
4997 __private_extern__ kern_return_t
4998 vm_object_upl_request(
4999         vm_object_t             object,
5000         vm_object_offset_t      offset,
5001         upl_size_t              size,
5002         upl_t                   *upl_ptr,
5003         upl_page_info_array_t   user_page_list,
5004         unsigned int            *page_list_count,
5005         upl_control_flags_t     cntrl_flags,
5006         vm_tag_t                tag)
5007 {
5008         vm_page_t               dst_page = VM_PAGE_NULL;
5009         vm_object_offset_t      dst_offset;
5010         upl_size_t              xfer_size;
5011         unsigned int            size_in_pages;
5012         boolean_t               dirty;
5013         boolean_t               hw_dirty;
5014         upl_t                   upl = NULL;
5015         unsigned int            entry;
5016 #if MACH_CLUSTER_STATS
5017         boolean_t               encountered_lrp = FALSE;
5018 #endif
5019         vm_page_t               alias_page = NULL;
5020         int                     refmod_state = 0;
5021         wpl_array_t             lite_list = NULL;
5022         vm_object_t             last_copy_object;
5023         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
5024         struct  vm_page_delayed_work    *dwp;
5025         int                     dw_count;
5026         int                     dw_limit;
5027         int                     io_tracking_flag = 0;
5028         int                     grab_options;
5029         ppnum_t                 phys_page;
5030
5031         if (cntrl_flags & ~UPL_VALID_FLAGS) {
5032                 /*
5033                  * For forward compatibility's sake,
5034                  * reject any unknown flag.
5035                  */
5036                 return KERN_INVALID_VALUE;
5037         }
5038         if ( (!object->internal) && (object->paging_offset != 0) )
5039                 panic("vm_object_upl_request: external object with non-zero paging offset\n");
5040         if (object->phys_contiguous)
5041                 panic("vm_object_upl_request: contiguous object specified\n");
5042
5043
5044         if (size > MAX_UPL_SIZE_BYTES)
5045                 size = MAX_UPL_SIZE_BYTES;
5046
5047         if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
5048                 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
5049
5050 #if CONFIG_IOSCHED || UPL_DEBUG
5051         if (object->io_tracking || upl_debug_enabled)
5052                 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
5053 #endif
5054 #if CONFIG_IOSCHED
5055         if (object->io_tracking)
5056                 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
5057 #endif
5058
5059         if (cntrl_flags & UPL_SET_INTERNAL) {
5060                 if (cntrl_flags & UPL_SET_LITE) {
5061
5062                         upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5063
5064                         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5065                         lite_list = (wpl_array_t)
5066                                         (((uintptr_t)user_page_list) +
5067                                         ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
5068                         if (size == 0) {
5069                                 user_page_list = NULL;
5070                                 lite_list = NULL;
5071                         }
5072                 } else {
5073                         upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
5074
5075                         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5076                         if (size == 0) {
5077                                 user_page_list = NULL;
5078                         }
5079                 }
5080         } else {
5081                 if (cntrl_flags & UPL_SET_LITE) {
5082
5083                         upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5084
5085                         lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
5086                         if (size == 0) {
5087                                 lite_list = NULL;
5088                         }
5089                 } else {
5090                         upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
5091                 }
5092         }
5093         *upl_ptr = upl;
5094
5095         if (user_page_list)
5096                 user_page_list[0].device = FALSE;
5097
5098         if (cntrl_flags & UPL_SET_LITE) {
5099                 upl->map_object = object;
5100         } else {
5101                 upl->map_object = vm_object_allocate(size);
5102                 /*
5103                  * No neeed to lock the new object: nobody else knows
5104                  * about it yet, so it's all ours so far.
5105                  */
5106                 upl->map_object->shadow = object;
5107                 upl->map_object->pageout = TRUE;
5108                 upl->map_object->can_persist = FALSE;
5109                 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5110                 upl->map_object->vo_shadow_offset = offset;
5111                 upl->map_object->wimg_bits = object->wimg_bits;
5112
5113                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5114
5115                 upl->flags |= UPL_SHADOWED;
5116         }
5117         if (cntrl_flags & UPL_FOR_PAGEOUT)
5118                 upl->flags |= UPL_PAGEOUT;
5119
5120         vm_object_lock(object);
5121         vm_object_activity_begin(object);
5122
5123         grab_options = 0;
5124 #if CONFIG_SECLUDED_MEMORY
5125         if (object->can_grab_secluded) {
5126                 grab_options |= VM_PAGE_GRAB_SECLUDED;
5127         }
5128 #endif /* CONFIG_SECLUDED_MEMORY */
5129
5130         /*
5131          * we can lock in the paging_offset once paging_in_progress is set
5132          */
5133         upl->size = size;
5134         upl->offset = offset + object->paging_offset;
5135
5136 #if CONFIG_IOSCHED || UPL_DEBUG
5137         if (object->io_tracking || upl_debug_enabled) {
5138                 vm_object_activity_begin(object);
5139                 queue_enter(&object->uplq, upl, upl_t, uplq);
5140         }
5141 #endif
5142         if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
5143                 /*
5144                  * Honor copy-on-write obligations
5145                  *
5146                  * The caller is gathering these pages and
5147                  * might modify their contents.  We need to
5148                  * make sure that the copy object has its own
5149                  * private copies of these pages before we let
5150                  * the caller modify them.
5151                  */
5152                 vm_object_update(object,
5153                                  offset,
5154                                  size,
5155                                  NULL,
5156                                  NULL,
5157                                  FALSE, /* should_return */
5158                                  MEMORY_OBJECT_COPY_SYNC,
5159                                  VM_PROT_NO_CHANGE);
5160 #if DEVELOPMENT || DEBUG
5161                 upl_cow++;
5162                 upl_cow_pages += size >> PAGE_SHIFT;
5163 #endif
5164         }
5165         /*
5166          * remember which copy object we synchronized with
5167          */
5168         last_copy_object = object->copy;
5169         entry = 0;
5170
5171         xfer_size = size;
5172         dst_offset = offset;
5173         size_in_pages = size / PAGE_SIZE;
5174
5175         dwp = &dw_array[0];
5176         dw_count = 0;
5177         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5178
5179         if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5180             object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5181                 object->scan_collisions = 0;
5182
5183         if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5184                 boolean_t       isSSD = FALSE;
5185
5186 #if CONFIG_EMBEDDED
5187                 isSSD = TRUE;
5188 #else
5189                 vnode_pager_get_isSSD(object->pager, &isSSD);
5190 #endif
5191                 vm_object_unlock(object);
5192
5193                 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5194
5195                 if (isSSD == TRUE)
5196                         delay(1000 * size_in_pages);
5197                 else
5198                         delay(5000 * size_in_pages);
5199                 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5200
5201                 vm_object_lock(object);
5202         }
5203
5204         while (xfer_size) {
5205
5206                 dwp->dw_mask = 0;
5207
5208                 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5209                         vm_object_unlock(object);
5210                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
5211                         vm_object_lock(object);
5212                 }
5213                 if (cntrl_flags & UPL_COPYOUT_FROM) {
5214                         upl->flags |= UPL_PAGE_SYNC_DONE;
5215
5216                         if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5217                                 dst_page->fictitious ||
5218                                 dst_page->absent ||
5219                                 dst_page->error ||
5220                                 dst_page->cleaning ||
5221                                 (VM_PAGE_WIRED(dst_page))) {
5222
5223                                 if (user_page_list)
5224                                         user_page_list[entry].phys_addr = 0;
5225
5226                                 goto try_next_page;
5227                         }
5228                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5229
5230                         /*
5231                          * grab this up front...
5232                          * a high percentange of the time we're going to
5233                          * need the hardware modification state a bit later
5234                          * anyway... so we can eliminate an extra call into
5235                          * the pmap layer by grabbing it here and recording it
5236                          */
5237                         if (dst_page->pmapped)
5238                                 refmod_state = pmap_get_refmod(phys_page);
5239                         else
5240                                 refmod_state = 0;
5241
5242                         if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5243                                 /*
5244                                  * page is on inactive list and referenced...
5245                                  * reactivate it now... this gets it out of the
5246                                  * way of vm_pageout_scan which would have to
5247                                  * reactivate it upon tripping over it
5248                                  */
5249                                 dwp->dw_mask |= DW_vm_page_activate;
5250                         }
5251                         if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5252                                 /*
5253                                  * we're only asking for DIRTY pages to be returned
5254                                  */
5255                                 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5256                                         /*
5257                                          * if we were the page stolen by vm_pageout_scan to be
5258                                          * cleaned (as opposed to a buddy being clustered in
5259                                          * or this request is not being driven by a PAGEOUT cluster
5260                                          * then we only need to check for the page being dirty or
5261                                          * precious to decide whether to return it
5262                                          */
5263                                         if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
5264                                                 goto check_busy;
5265                                         goto dont_return;
5266                                 }
5267                                 /*
5268                                  * this is a request for a PAGEOUT cluster and this page
5269                                  * is merely along for the ride as a 'buddy'... not only
5270                                  * does it have to be dirty to be returned, but it also
5271                                  * can't have been referenced recently...
5272                                  */
5273                                 if ( (hibernate_cleaning_in_progress == TRUE ||
5274                                       (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) ||
5275                                        (dst_page->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5276                                      ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
5277                                         goto check_busy;
5278                                 }
5279 dont_return:
5280                                 /*
5281                                  * if we reach here, we're not to return
5282                                  * the page... go on to the next one
5283                                  */
5284                                 if (dst_page->laundry == TRUE) {
5285                                         /*
5286                                          * if we get here, the page is not 'cleaning' (filtered out above).
5287                                          * since it has been referenced, remove it from the laundry
5288                                          * so we don't pay the cost of an I/O to clean a page
5289                                          * we're just going to take back
5290                                          */
5291                                         vm_page_lockspin_queues();
5292
5293                                         vm_pageout_steal_laundry(dst_page, TRUE);
5294                                         vm_page_activate(dst_page);
5295
5296                                         vm_page_unlock_queues();
5297                                 }
5298                                 if (user_page_list)
5299                                         user_page_list[entry].phys_addr = 0;
5300
5301                                 goto try_next_page;
5302                         }
5303 check_busy:
5304                         if (dst_page->busy) {
5305                                 if (cntrl_flags & UPL_NOBLOCK) {
5306                                         if (user_page_list)
5307                                                 user_page_list[entry].phys_addr = 0;
5308                                         dwp->dw_mask = 0;
5309
5310                                         goto try_next_page;
5311                                 }
5312                                 /*
5313                                  * someone else is playing with the
5314                                  * page.  We will have to wait.
5315                                  */
5316                                 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5317
5318                                 continue;
5319                         }
5320                         if (dst_page->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5321
5322                                 vm_page_lockspin_queues();
5323
5324                                 if (dst_page->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5325                                         /*
5326                                          * we've buddied up a page for a clustered pageout
5327                                          * that has already been moved to the pageout
5328                                          * queue by pageout_scan... we need to remove
5329                                          * it from the queue and drop the laundry count
5330                                          * on that queue
5331                                          */
5332                                         vm_pageout_throttle_up(dst_page);
5333                                 }
5334                                 vm_page_unlock_queues();
5335                         }
5336 #if MACH_CLUSTER_STATS
5337                         /*
5338                          * pageout statistics gathering.  count
5339                          * all the pages we will page out that
5340                          * were not counted in the initial
5341                          * vm_pageout_scan work
5342                          */
5343                         if (dst_page->pageout)
5344                                 encountered_lrp = TRUE;
5345                         if ((dst_page->dirty || (object->internal && dst_page->precious))) {
5346                                 if (encountered_lrp)
5347                                         CLUSTER_STAT(pages_at_higher_offsets++;)
5348                                 else
5349                                         CLUSTER_STAT(pages_at_lower_offsets++;)
5350                         }
5351 #endif
5352                         hw_dirty = refmod_state & VM_MEM_MODIFIED;
5353                         dirty = hw_dirty ? TRUE : dst_page->dirty;
5354
5355                         if (phys_page > upl->highest_page)
5356                                 upl->highest_page = phys_page;
5357
5358                         assert (!pmap_is_noencrypt(phys_page));
5359
5360                         if (cntrl_flags & UPL_SET_LITE) {
5361                                 unsigned int    pg_num;
5362
5363                                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5364                                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5365                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5366
5367                                 if (hw_dirty)
5368                                         pmap_clear_modify(phys_page);
5369
5370                                 /*
5371                                  * Mark original page as cleaning
5372                                  * in place.
5373                                  */
5374                                 dst_page->cleaning = TRUE;
5375                                 dst_page->precious = FALSE;
5376                         } else {
5377                                 /*
5378                                  * use pageclean setup, it is more
5379                                  * convenient even for the pageout
5380                                  * cases here
5381                                  */
5382                                 vm_object_lock(upl->map_object);
5383                                 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5384                                 vm_object_unlock(upl->map_object);
5385
5386                                 alias_page->absent = FALSE;
5387                                 alias_page = NULL;
5388                         }
5389                         if (dirty) {
5390                                 SET_PAGE_DIRTY(dst_page, FALSE);
5391                         } else {
5392                                 dst_page->dirty = FALSE;
5393                         }
5394
5395                         if (!dirty)
5396                                 dst_page->precious = TRUE;
5397
5398                         if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5399                                 if ( !VM_PAGE_WIRED(dst_page))
5400                                         dst_page->free_when_done = TRUE;
5401                         }
5402                 } else {
5403                         if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5404                                 /*
5405                                  * Honor copy-on-write obligations
5406                                  *
5407                                  * The copy object has changed since we
5408                                  * last synchronized for copy-on-write.
5409                                  * Another copy object might have been
5410                                  * inserted while we released the object's
5411                                  * lock.  Since someone could have seen the
5412                                  * original contents of the remaining pages
5413                                  * through that new object, we have to
5414                                  * synchronize with it again for the remaining
5415                                  * pages only.  The previous pages are "busy"
5416                                  * so they can not be seen through the new
5417                                  * mapping.  The new mapping will see our
5418                                  * upcoming changes for those previous pages,
5419                                  * but that's OK since they couldn't see what
5420                                  * was there before.  It's just a race anyway
5421                                  * and there's no guarantee of consistency or
5422                                  * atomicity.  We just don't want new mappings
5423                                  * to see both the *before* and *after* pages.
5424                                  */
5425                                 if (object->copy != VM_OBJECT_NULL) {
5426                                         vm_object_update(
5427                                                 object,
5428                                                 dst_offset,/* current offset */
5429                                                 xfer_size, /* remaining size */
5430                                                 NULL,
5431                                                 NULL,
5432                                                 FALSE,     /* should_return */
5433                                                 MEMORY_OBJECT_COPY_SYNC,
5434                                                 VM_PROT_NO_CHANGE);
5435
5436 #if DEVELOPMENT || DEBUG
5437                                         upl_cow_again++;
5438                                         upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
5439 #endif
5440                                 }
5441                                 /*
5442                                  * remember the copy object we synced with
5443                                  */
5444                                 last_copy_object = object->copy;
5445                         }
5446                         dst_page = vm_page_lookup(object, dst_offset);
5447
5448                         if (dst_page != VM_PAGE_NULL) {
5449
5450                                 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5451                                         /*
5452                                          * skip over pages already present in the cache
5453                                          */
5454                                         if (user_page_list)
5455                                                 user_page_list[entry].phys_addr = 0;
5456
5457                                         goto try_next_page;
5458                                 }
5459                                 if (dst_page->fictitious) {
5460                                         panic("need corner case for fictitious page");
5461                                 }
5462
5463                                 if (dst_page->busy || dst_page->cleaning) {
5464                                         /*
5465                                          * someone else is playing with the
5466                                          * page.  We will have to wait.
5467                                          */
5468                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5469
5470                                         continue;
5471                                 }
5472                                 if (dst_page->laundry)
5473                                         vm_pageout_steal_laundry(dst_page, FALSE);
5474                         } else {
5475                                 if (object->private) {
5476                                         /*
5477                                          * This is a nasty wrinkle for users
5478                                          * of upl who encounter device or
5479                                          * private memory however, it is
5480                                          * unavoidable, only a fault can
5481                                          * resolve the actual backing
5482                                          * physical page by asking the
5483                                          * backing device.
5484                                          */
5485                                         if (user_page_list)
5486                                                 user_page_list[entry].phys_addr = 0;
5487
5488                                         goto try_next_page;
5489                                 }
5490                                 if (object->scan_collisions) {
5491                                         /*
5492                                          * the pageout_scan thread is trying to steal
5493                                          * pages from this object, but has run into our
5494                                          * lock... grab 2 pages from the head of the object...
5495                                          * the first is freed on behalf of pageout_scan, the
5496                                          * 2nd is for our own use... we use vm_object_page_grab
5497                                          * in both cases to avoid taking pages from the free
5498                                          * list since we are under memory pressure and our
5499                                          * lock on this object is getting in the way of
5500                                          * relieving it
5501                                          */
5502                                         dst_page = vm_object_page_grab(object);
5503
5504                                         if (dst_page != VM_PAGE_NULL)
5505                                                 vm_page_release(dst_page,
5506                                                                 FALSE);
5507
5508                                         dst_page = vm_object_page_grab(object);
5509                                 }
5510                                 if (dst_page == VM_PAGE_NULL) {
5511                                         /*
5512                                          * need to allocate a page
5513                                          */
5514                                         dst_page = vm_page_grab_options(grab_options);
5515                                 }
5516                                 if (dst_page == VM_PAGE_NULL) {
5517                                         if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5518                                                /*
5519                                                 * we don't want to stall waiting for pages to come onto the free list
5520                                                 * while we're already holding absent pages in this UPL
5521                                                 * the caller will deal with the empty slots
5522                                                 */
5523                                                 if (user_page_list)
5524                                                         user_page_list[entry].phys_addr = 0;
5525
5526                                                 goto try_next_page;
5527                                         }
5528                                         /*
5529                                          * no pages available... wait
5530                                          * then try again for the same
5531                                          * offset...
5532                                          */
5533                                         vm_object_unlock(object);
5534
5535                                         OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5536
5537                                         VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5538
5539                                         VM_PAGE_WAIT();
5540                                         OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5541
5542                                         VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5543
5544                                         vm_object_lock(object);
5545
5546                                         continue;
5547                                 }
5548                                 vm_page_insert(dst_page, object, dst_offset);
5549
5550                                 dst_page->absent = TRUE;
5551                                 dst_page->busy = FALSE;
5552
5553                                 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5554                                         /*
5555                                          * if UPL_RET_ONLY_ABSENT was specified,
5556                                          * than we're definitely setting up a
5557                                          * upl for a clustered read/pagein
5558                                          * operation... mark the pages as clustered
5559                                          * so upl_commit_range can put them on the
5560                                          * speculative list
5561                                          */
5562                                         dst_page->clustered = TRUE;
5563
5564                                         if ( !(cntrl_flags & UPL_FILE_IO))
5565                                                 VM_STAT_INCR(pageins);
5566                                 }
5567                         }
5568                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5569
5570                         dst_page->overwriting = TRUE;
5571
5572                         if (dst_page->pmapped) {
5573                                 if ( !(cntrl_flags & UPL_FILE_IO))
5574                                         /*
5575                                          * eliminate all mappings from the
5576                                          * original object and its prodigy
5577                                          */
5578                                         refmod_state = pmap_disconnect(phys_page);
5579                                 else
5580                                         refmod_state = pmap_get_refmod(phys_page);
5581                         } else
5582                                 refmod_state = 0;
5583
5584                         hw_dirty = refmod_state & VM_MEM_MODIFIED;
5585                         dirty = hw_dirty ? TRUE : dst_page->dirty;
5586
5587                         if (cntrl_flags & UPL_SET_LITE) {
5588                                 unsigned int    pg_num;
5589
5590                                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5591                                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5592                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5593
5594                                 if (hw_dirty)
5595                                         pmap_clear_modify(phys_page);
5596
5597                                 /*
5598                                  * Mark original page as cleaning
5599                                  * in place.
5600                                  */
5601                                 dst_page->cleaning = TRUE;
5602                                 dst_page->precious = FALSE;
5603                         } else {
5604                                 /*
5605                                  * use pageclean setup, it is more
5606                                  * convenient even for the pageout
5607                                  * cases here
5608                                  */
5609                                 vm_object_lock(upl->map_object);
5610                                 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5611                                 vm_object_unlock(upl->map_object);
5612
5613                                 alias_page->absent = FALSE;
5614                                 alias_page = NULL;
5615                         }
5616
5617                         if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5618                                 upl->flags &= ~UPL_CLEAR_DIRTY;
5619                                 upl->flags |= UPL_SET_DIRTY;
5620                                 dirty = TRUE;
5621                                 upl->flags |= UPL_SET_DIRTY;
5622                         } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5623                                 /*
5624                                  * clean in place for read implies
5625                                  * that a write will be done on all
5626                                  * the pages that are dirty before
5627                                  * a upl commit is done.  The caller
5628                                  * is obligated to preserve the
5629                                  * contents of all pages marked dirty
5630                                  */
5631                                 upl->flags |= UPL_CLEAR_DIRTY;
5632                         }
5633                         dst_page->dirty = dirty;
5634
5635                         if (!dirty)
5636                                 dst_page->precious = TRUE;
5637
5638                         if ( !VM_PAGE_WIRED(dst_page)) {
5639                                 /*
5640                                  * deny access to the target page while
5641                                  * it is being worked on
5642                                  */
5643                                 dst_page->busy = TRUE;
5644                         } else
5645                                 dwp->dw_mask |= DW_vm_page_wire;
5646
5647                         /*
5648                          * We might be about to satisfy a fault which has been
5649                          * requested. So no need for the "restart" bit.
5650                          */
5651                         dst_page->restart = FALSE;
5652                         if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5653                                 /*
5654                                  * expect the page to be used
5655                                  */
5656                                 dwp->dw_mask |= DW_set_reference;
5657                         }
5658                         if (cntrl_flags & UPL_PRECIOUS) {
5659                                 if (object->internal) {
5660                                         SET_PAGE_DIRTY(dst_page, FALSE);
5661                                         dst_page->precious = FALSE;
5662                                 } else {
5663                                         dst_page->precious = TRUE;
5664                                 }
5665                         } else {
5666                                 dst_page->precious = FALSE;
5667                         }
5668                 }
5669                 if (dst_page->busy)
5670                         upl->flags |= UPL_HAS_BUSY;
5671
5672                 if (phys_page > upl->highest_page)
5673                         upl->highest_page = phys_page;
5674                 assert (!pmap_is_noencrypt(phys_page));
5675                 if (user_page_list) {
5676                         user_page_list[entry].phys_addr = phys_page;
5677                         user_page_list[entry].free_when_done    = dst_page->free_when_done;
5678                         user_page_list[entry].absent    = dst_page->absent;
5679                         user_page_list[entry].dirty     = dst_page->dirty;
5680                         user_page_list[entry].precious  = dst_page->precious;
5681                         user_page_list[entry].device    = FALSE;
5682                         user_page_list[entry].needed    = FALSE;
5683                         if (dst_page->clustered == TRUE)
5684                                 user_page_list[entry].speculative = (dst_page->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
5685                         else
5686                                 user_page_list[entry].speculative = FALSE;
5687                         user_page_list[entry].cs_validated = dst_page->cs_validated;
5688                         user_page_list[entry].cs_tainted = dst_page->cs_tainted;
5689                         user_page_list[entry].cs_nx = dst_page->cs_nx;
5690                         user_page_list[entry].mark      = FALSE;
5691                 }
5692                 /*
5693                  * if UPL_RET_ONLY_ABSENT is set, then
5694                  * we are working with a fresh page and we've
5695                  * just set the clustered flag on it to
5696                  * indicate that it was drug in as part of a
5697                  * speculative cluster... so leave it alone
5698                  */
5699                 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5700                         /*
5701                          * someone is explicitly grabbing this page...
5702                          * update clustered and speculative state
5703                          *
5704                          */
5705                         if (dst_page->clustered)
5706                                 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5707                 }
5708 try_next_page:
5709                 if (dwp->dw_mask) {
5710                         if (dwp->dw_mask & DW_vm_page_activate)
5711                                 VM_STAT_INCR(reactivations);
5712
5713                         VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5714
5715                         if (dw_count >= dw_limit) {
5716                                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5717
5718                                 dwp = &dw_array[0];
5719                                 dw_count = 0;
5720                         }
5721                 }
5722                 entry++;
5723                 dst_offset += PAGE_SIZE_64;
5724                 xfer_size -= PAGE_SIZE;
5725         }
5726         if (dw_count)
5727                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5728
5729         if (alias_page != NULL) {
5730                 VM_PAGE_FREE(alias_page);
5731         }
5732
5733         if (page_list_count != NULL) {
5734                 if (upl->flags & UPL_INTERNAL)
5735                         *page_list_count = 0;
5736                 else if (*page_list_count > entry)
5737                         *page_list_count = entry;
5738         }
5739 #if UPL_DEBUG
5740         upl->upl_state = 1;
5741 #endif
5742         vm_object_unlock(object);
5743
5744         return KERN_SUCCESS;
5745 }
5746
5747 /*
5748  *      Routine:        vm_object_super_upl_request
5749  *      Purpose:
5750  *              Cause the population of a portion of a vm_object
5751  *              in much the same way as memory_object_upl_request.
5752  *              Depending on the nature of the request, the pages
5753  *              returned may be contain valid data or be uninitialized.
5754  *              However, the region may be expanded up to the super
5755  *              cluster size provided.
5756  */
5757
5758 __private_extern__ kern_return_t
5759 vm_object_super_upl_request(
5760         vm_object_t object,
5761         vm_object_offset_t      offset,
5762         upl_size_t              size,
5763         upl_size_t              super_cluster,
5764         upl_t                   *upl,
5765         upl_page_info_t         *user_page_list,
5766         unsigned int            *page_list_count,
5767         upl_control_flags_t     cntrl_flags,
5768         vm_tag_t                tag)
5769 {
5770         if (object->paging_offset > offset  || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5771                 return KERN_FAILURE;
5772
5773         assert(object->paging_in_progress);
5774         offset = offset - object->paging_offset;
5775
5776         if (super_cluster > size) {
5777
5778                 vm_object_offset_t      base_offset;
5779                 upl_size_t              super_size;
5780                 vm_object_size_t        super_size_64;
5781
5782                 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5783                 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5784                 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5785                 super_size = (upl_size_t) super_size_64;
5786                 assert(super_size == super_size_64);
5787
5788                 if (offset > (base_offset + super_size)) {
5789                         panic("vm_object_super_upl_request: Missed target pageout"
5790                               " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5791                               offset, base_offset, super_size, super_cluster,
5792                               size, object->paging_offset);
5793                 }
5794                 /*
5795                  * apparently there is a case where the vm requests a
5796                  * page to be written out who's offset is beyond the
5797                  * object size
5798                  */
5799                 if ((offset + size) > (base_offset + super_size)) {
5800                         super_size_64 = (offset + size) - base_offset;
5801                         super_size = (upl_size_t) super_size_64;
5802                         assert(super_size == super_size_64);
5803                 }
5804
5805                 offset = base_offset;
5806                 size = super_size;
5807         }
5808         return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
5809 }
5810
5811 #if CONFIG_EMBEDDED
5812 int cs_executable_create_upl = 0;
5813 extern int proc_selfpid(void);
5814 extern char *proc_name_address(void *p);
5815 #endif /* CONFIG_EMBEDDED */
5816
5817 kern_return_t
5818 vm_map_create_upl(
5819         vm_map_t                map,
5820         vm_map_address_t        offset,
5821         upl_size_t              *upl_size,
5822         upl_t                   *upl,
5823         upl_page_info_array_t   page_list,
5824         unsigned int            *count,
5825         upl_control_flags_t     *flags,
5826         vm_tag_t                tag)
5827 {
5828         vm_map_entry_t          entry;
5829         upl_control_flags_t     caller_flags;
5830         int                     force_data_sync;
5831         int                     sync_cow_data;
5832         vm_object_t             local_object;
5833         vm_map_offset_t         local_offset;
5834         vm_map_offset_t         local_start;
5835         kern_return_t           ret;
5836
5837         assert(page_aligned(offset));
5838
5839         caller_flags = *flags;
5840
5841         if (caller_flags & ~UPL_VALID_FLAGS) {
5842                 /*
5843                  * For forward compatibility's sake,
5844                  * reject any unknown flag.
5845                  */
5846                 return KERN_INVALID_VALUE;
5847         }
5848         force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5849         sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5850
5851         if (upl == NULL)
5852                 return KERN_INVALID_ARGUMENT;
5853
5854 REDISCOVER_ENTRY:
5855         vm_map_lock_read(map);
5856
5857         if (!vm_map_lookup_entry(map, offset, &entry)) {
5858                 vm_map_unlock_read(map);
5859                 return KERN_FAILURE;
5860         }
5861
5862         if ((entry->vme_end - offset) < *upl_size) {
5863                 *upl_size = (upl_size_t) (entry->vme_end - offset);
5864                 assert(*upl_size == entry->vme_end - offset);
5865         }
5866
5867         if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5868                 *flags = 0;
5869
5870                 if (!entry->is_sub_map &&
5871                     VME_OBJECT(entry) != VM_OBJECT_NULL) {
5872                         if (VME_OBJECT(entry)->private)
5873                                 *flags = UPL_DEV_MEMORY;
5874
5875                         if (VME_OBJECT(entry)->phys_contiguous)
5876                                 *flags |= UPL_PHYS_CONTIG;
5877                 }
5878                 vm_map_unlock_read(map);
5879                 return KERN_SUCCESS;
5880         }
5881
5882         if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
5883             !VME_OBJECT(entry)->phys_contiguous) {
5884                 if (*upl_size > MAX_UPL_SIZE_BYTES)
5885                         *upl_size = MAX_UPL_SIZE_BYTES;
5886         }
5887
5888         /*
5889          *      Create an object if necessary.
5890          */
5891         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5892
5893                 if (vm_map_lock_read_to_write(map))
5894                         goto REDISCOVER_ENTRY;
5895
5896                 VME_OBJECT_SET(entry,
5897                                vm_object_allocate((vm_size_t)
5898                                                   (entry->vme_end -
5899                                                    entry->vme_start)));
5900                 VME_OFFSET_SET(entry, 0);
5901                 assert(entry->use_pmap);
5902
5903                 vm_map_lock_write_to_read(map);
5904         }
5905
5906         if (!(caller_flags & UPL_COPYOUT_FROM) &&
5907             !(entry->protection & VM_PROT_WRITE)) {
5908                 vm_map_unlock_read(map);
5909                 return KERN_PROTECTION_FAILURE;
5910         }
5911
5912 #if CONFIG_EMBEDDED
5913         if (map->pmap != kernel_pmap &&
5914             (caller_flags & UPL_COPYOUT_FROM) &&
5915             (entry->protection & VM_PROT_EXECUTE) &&
5916             !(entry->protection & VM_PROT_WRITE)) {
5917                 vm_offset_t     kaddr;
5918                 vm_size_t       ksize;
5919
5920                 /*
5921                  * We're about to create a read-only UPL backed by
5922                  * memory from an executable mapping.
5923                  * Wiring the pages would result in the pages being copied
5924                  * (due to the "MAP_PRIVATE" mapping) and no longer
5925                  * code-signed, so no longer eligible for execution.
5926                  * Instead, let's copy the data into a kernel buffer and
5927                  * create the UPL from this kernel buffer.
5928                  * The kernel buffer is then freed, leaving the UPL holding
5929                  * the last reference on the VM object, so the memory will
5930                  * be released when the UPL is committed.
5931                  */
5932
5933                 vm_map_unlock_read(map);
5934                 /* allocate kernel buffer */
5935                 ksize = round_page(*upl_size);
5936                 kaddr = 0;
5937                 ret = kmem_alloc_pageable(kernel_map,
5938                                           &kaddr,
5939                                           ksize,
5940                                           tag);
5941                 if (ret == KERN_SUCCESS) {
5942                         /* copyin the user data */
5943                         assert(page_aligned(offset));
5944                         ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
5945                 }
5946                 if (ret == KERN_SUCCESS) {
5947                         if (ksize > *upl_size) {
5948                                 /* zero out the extra space in kernel buffer */
5949                                 memset((void *)(kaddr + *upl_size),
5950                                        0,
5951                                        ksize - *upl_size);
5952                         }
5953                         /* create the UPL from the kernel buffer */
5954                         ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
5955                                                 upl, page_list, count, flags, tag);
5956                 }
5957                 if (kaddr != 0) {
5958                         /* free the kernel buffer */
5959                         kmem_free(kernel_map, kaddr, ksize);
5960                         kaddr = 0;
5961                         ksize = 0;
5962                 }
5963 #if DEVELOPMENT || DEBUG
5964                 DTRACE_VM4(create_upl_from_executable,
5965                            vm_map_t, map,
5966                            vm_map_address_t, offset,
5967                            upl_size_t, *upl_size,
5968                            kern_return_t, ret);
5969 #endif /* DEVELOPMENT || DEBUG */
5970                 return ret;
5971         }
5972 #endif /* CONFIG_EMBEDDED */
5973
5974         local_object = VME_OBJECT(entry);
5975         assert(local_object != VM_OBJECT_NULL);
5976
5977         if (!entry->is_sub_map &&
5978             !entry->needs_copy &&
5979             *upl_size != 0 &&
5980             local_object->vo_size > *upl_size && /* partial UPL */
5981             entry->wired_count == 0 && /* No COW for entries that are wired */
5982             (map->pmap != kernel_pmap) && /* alias checks */
5983             (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
5984              ||
5985              (/* case 2 */
5986               local_object->internal &&
5987               (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
5988               local_object->ref_count > 1))) {
5989                 vm_prot_t       prot;
5990
5991                 /*
5992                  * Case 1:
5993                  * Set up the targeted range for copy-on-write to avoid
5994                  * applying true_share/copy_delay to the entire object.
5995                  *
5996                  * Case 2:
5997                  * This map entry covers only part of an internal
5998                  * object.  There could be other map entries covering
5999                  * other areas of this object and some of these map
6000                  * entries could be marked as "needs_copy", which
6001                  * assumes that the object is COPY_SYMMETRIC.
6002                  * To avoid marking this object as COPY_DELAY and
6003                  * "true_share", let's shadow it and mark the new
6004                  * (smaller) object as "true_share" and COPY_DELAY.
6005                  */
6006
6007                 if (vm_map_lock_read_to_write(map)) {
6008                         goto REDISCOVER_ENTRY;
6009                 }
6010                 vm_map_lock_assert_exclusive(map);
6011                 assert(VME_OBJECT(entry) == local_object);
6012
6013                 vm_map_clip_start(map,
6014                                   entry,
6015                                   vm_map_trunc_page(offset,
6016                                                     VM_MAP_PAGE_MASK(map)));
6017                 vm_map_clip_end(map,
6018                                 entry,
6019                                 vm_map_round_page(offset + *upl_size,
6020                                                   VM_MAP_PAGE_MASK(map)));
6021                 if ((entry->vme_end - offset) < *upl_size) {
6022                         *upl_size = (upl_size_t) (entry->vme_end - offset);
6023                         assert(*upl_size == entry->vme_end - offset);
6024                 }
6025
6026                 prot = entry->protection & ~VM_PROT_WRITE;
6027                 if (override_nx(map, VME_ALIAS(entry)) && prot)
6028                         prot |= VM_PROT_EXECUTE;
6029                 vm_object_pmap_protect(local_object,
6030                                        VME_OFFSET(entry),
6031                                        entry->vme_end - entry->vme_start,
6032                                        ((entry->is_shared ||
6033                                          map->mapped_in_other_pmaps)
6034                                         ? PMAP_NULL
6035                                         : map->pmap),
6036                                        entry->vme_start,
6037                                        prot);
6038
6039                 assert(entry->wired_count == 0);
6040
6041                 /*
6042                  * Lock the VM object and re-check its status: if it's mapped
6043                  * in another address space, we could still be racing with
6044                  * another thread holding that other VM map exclusively.
6045                  */
6046                 vm_object_lock(local_object);
6047                 if (local_object->true_share) {
6048                         /* object is already in proper state: no COW needed */
6049                         assert(local_object->copy_strategy !=
6050                                MEMORY_OBJECT_COPY_SYMMETRIC);
6051                 } else {
6052                         /* not true_share: ask for copy-on-write below */
6053                         assert(local_object->copy_strategy ==
6054                                MEMORY_OBJECT_COPY_SYMMETRIC);
6055                         entry->needs_copy = TRUE;
6056                 }
6057                 vm_object_unlock(local_object);
6058
6059                 vm_map_lock_write_to_read(map);
6060         }
6061
6062         if (entry->needs_copy)  {
6063                 /*
6064                  * Honor copy-on-write for COPY_SYMMETRIC
6065                  * strategy.
6066                  */
6067                 vm_map_t                local_map;
6068                 vm_object_t             object;
6069                 vm_object_offset_t      new_offset;
6070                 vm_prot_t               prot;
6071                 boolean_t               wired;
6072                 vm_map_version_t        version;
6073                 vm_map_t                real_map;
6074                 vm_prot_t               fault_type;
6075
6076                 local_map = map;
6077
6078                 if (caller_flags & UPL_COPYOUT_FROM) {
6079                         fault_type = VM_PROT_READ | VM_PROT_COPY;
6080                         vm_counters.create_upl_extra_cow++;
6081                         vm_counters.create_upl_extra_cow_pages +=
6082                                 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
6083                 } else {
6084                         fault_type = VM_PROT_WRITE;
6085                 }
6086                 if (vm_map_lookup_locked(&local_map,
6087                                          offset, fault_type,
6088                                          OBJECT_LOCK_EXCLUSIVE,
6089                                          &version, &object,
6090                                          &new_offset, &prot, &wired,
6091                                          NULL,
6092                                          &real_map) != KERN_SUCCESS) {
6093                         if (fault_type == VM_PROT_WRITE) {
6094                                 vm_counters.create_upl_lookup_failure_write++;
6095                         } else {
6096                                 vm_counters.create_upl_lookup_failure_copy++;
6097                         }
6098                         vm_map_unlock_read(local_map);
6099                         return KERN_FAILURE;
6100                 }
6101                 if (real_map != map)
6102                         vm_map_unlock(real_map);
6103                 vm_map_unlock_read(local_map);
6104
6105                 vm_object_unlock(object);
6106
6107                 goto REDISCOVER_ENTRY;
6108         }
6109
6110         if (entry->is_sub_map) {
6111                 vm_map_t        submap;
6112
6113                 submap = VME_SUBMAP(entry);
6114                 local_start = entry->vme_start;
6115                 local_offset = VME_OFFSET(entry);
6116
6117                 vm_map_reference(submap);
6118                 vm_map_unlock_read(map);
6119
6120                 ret = vm_map_create_upl(submap,
6121                                         local_offset + (offset - local_start),
6122                                         upl_size, upl, page_list, count, flags, tag);
6123                 vm_map_deallocate(submap);
6124
6125                 return ret;
6126         }
6127
6128         if (sync_cow_data &&
6129             (VME_OBJECT(entry)->shadow ||
6130              VME_OBJECT(entry)->copy)) {
6131                 local_object = VME_OBJECT(entry);
6132                 local_start = entry->vme_start;
6133                 local_offset = VME_OFFSET(entry);
6134
6135                 vm_object_reference(local_object);
6136                 vm_map_unlock_read(map);
6137
6138                 if (local_object->shadow && local_object->copy) {
6139                         vm_object_lock_request(local_object->shadow,
6140                                                ((vm_object_offset_t)
6141                                                 ((offset - local_start) +
6142                                                  local_offset) +
6143                                                 local_object->vo_shadow_offset),
6144                                                *upl_size, FALSE,
6145                                                MEMORY_OBJECT_DATA_SYNC,
6146                                                VM_PROT_NO_CHANGE);
6147                 }
6148                 sync_cow_data = FALSE;
6149                 vm_object_deallocate(local_object);
6150
6151                 goto REDISCOVER_ENTRY;
6152         }
6153         if (force_data_sync) {
6154                 local_object = VME_OBJECT(entry);
6155                 local_start = entry->vme_start;
6156                 local_offset = VME_OFFSET(entry);
6157
6158                 vm_object_reference(local_object);
6159                 vm_map_unlock_read(map);
6160
6161                 vm_object_lock_request(local_object,
6162                                        ((vm_object_offset_t)
6163                                         ((offset - local_start) +
6164                                          local_offset)),
6165                                        (vm_object_size_t)*upl_size,
6166                                        FALSE,
6167                                        MEMORY_OBJECT_DATA_SYNC,
6168                                        VM_PROT_NO_CHANGE);
6169
6170                 force_data_sync = FALSE;
6171                 vm_object_deallocate(local_object);
6172
6173                 goto REDISCOVER_ENTRY;
6174         }
6175         if (VME_OBJECT(entry)->private)
6176                 *flags = UPL_DEV_MEMORY;
6177         else
6178                 *flags = 0;
6179
6180         if (VME_OBJECT(entry)->phys_contiguous)
6181                 *flags |= UPL_PHYS_CONTIG;
6182
6183         local_object = VME_OBJECT(entry);
6184         local_offset = VME_OFFSET(entry);
6185         local_start = entry->vme_start;
6186
6187 #if CONFIG_EMBEDDED
6188         /*
6189          * Wiring will copy the pages to the shadow object.
6190          * The shadow object will not be code-signed so
6191          * attempting to execute code from these copied pages
6192          * would trigger a code-signing violation.
6193          */
6194         if (entry->protection & VM_PROT_EXECUTE) {
6195 #if MACH_ASSERT
6196                 printf("pid %d[%s] create_upl out of executable range from "
6197                        "0x%llx to 0x%llx: side effects may include "
6198                        "code-signing violations later on\n",
6199                        proc_selfpid(),
6200                        (current_task()->bsd_info
6201                         ? proc_name_address(current_task()->bsd_info)
6202                         : "?"),
6203                        (uint64_t) entry->vme_start,
6204                        (uint64_t) entry->vme_end);
6205 #endif /* MACH_ASSERT */
6206                 DTRACE_VM2(cs_executable_create_upl,
6207                            uint64_t, (uint64_t)entry->vme_start,
6208                            uint64_t, (uint64_t)entry->vme_end);
6209                 cs_executable_create_upl++;
6210         }
6211 #endif /* CONFIG_EMBEDDED */
6212
6213         vm_object_lock(local_object);
6214
6215         /*
6216          * Ensure that this object is "true_share" and "copy_delay" now,
6217          * while we're still holding the VM map lock.  After we unlock the map,
6218          * anything could happen to that mapping, including some copy-on-write
6219          * activity.  We need to make sure that the IOPL will point at the
6220          * same memory as the mapping.
6221          */
6222         if (local_object->true_share) {
6223                 assert(local_object->copy_strategy !=
6224                        MEMORY_OBJECT_COPY_SYMMETRIC);
6225         } else if (local_object != kernel_object &&
6226                    local_object != compressor_object &&
6227                    !local_object->phys_contiguous) {
6228 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6229                 if (!local_object->true_share &&
6230                     vm_object_tracking_inited) {
6231                         void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6232                         int num = 0;
6233                         num = OSBacktrace(bt,
6234                                           VM_OBJECT_TRACKING_BTDEPTH);
6235                         btlog_add_entry(vm_object_tracking_btlog,
6236                                         local_object,
6237                                         VM_OBJECT_TRACKING_OP_TRUESHARE,
6238                                         bt,
6239                                         num);
6240                 }
6241 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6242                 local_object->true_share = TRUE;
6243                 if (local_object->copy_strategy ==
6244                     MEMORY_OBJECT_COPY_SYMMETRIC) {
6245                         local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6246                 }
6247         }
6248
6249         vm_object_reference_locked(local_object);
6250         vm_object_unlock(local_object);
6251
6252         vm_map_unlock_read(map);
6253
6254         ret = vm_object_iopl_request(local_object,
6255                                      ((vm_object_offset_t)
6256                                       ((offset - local_start) + local_offset)),
6257                                      *upl_size,
6258                                      upl,
6259                                      page_list,
6260                                      count,
6261                                      caller_flags,
6262                                      tag);
6263         vm_object_deallocate(local_object);
6264
6265         return ret;
6266 }
6267
6268 /*
6269  * Internal routine to enter a UPL into a VM map.
6270  *
6271  * JMM - This should just be doable through the standard
6272  * vm_map_enter() API.
6273  */
6274 kern_return_t
6275 vm_map_enter_upl(
6276         vm_map_t                map,
6277         upl_t                   upl,
6278         vm_map_offset_t         *dst_addr)
6279 {
6280         vm_map_size_t           size;
6281         vm_object_offset_t      offset;
6282         vm_map_offset_t         addr;
6283         vm_page_t               m;
6284         kern_return_t           kr;
6285         int                     isVectorUPL = 0, curr_upl=0;
6286         upl_t                   vector_upl = NULL;
6287         vm_offset_t             vector_upl_dst_addr = 0;
6288         vm_map_t                vector_upl_submap = NULL;
6289         upl_offset_t            subupl_offset = 0;
6290         upl_size_t              subupl_size = 0;
6291
6292         if (upl == UPL_NULL)
6293                 return KERN_INVALID_ARGUMENT;
6294
6295         if((isVectorUPL = vector_upl_is_valid(upl))) {
6296                 int mapped=0,valid_upls=0;
6297                 vector_upl = upl;
6298
6299                 upl_lock(vector_upl);
6300                 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6301                         upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
6302                         if(upl == NULL)
6303                                 continue;
6304                         valid_upls++;
6305                         if (UPL_PAGE_LIST_MAPPED & upl->flags)
6306                                 mapped++;
6307                 }
6308
6309                 if(mapped) {
6310                         if(mapped != valid_upls)
6311                                 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6312                         else {
6313                                 upl_unlock(vector_upl);
6314                                 return KERN_FAILURE;
6315                         }
6316                 }
6317
6318                 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
6319                                     VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
6320                                     &vector_upl_submap);
6321                 if( kr != KERN_SUCCESS )
6322                         panic("Vector UPL submap allocation failed\n");
6323                 map = vector_upl_submap;
6324                 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6325                 curr_upl=0;
6326         }
6327         else
6328                 upl_lock(upl);
6329
6330 process_upl_to_enter:
6331         if(isVectorUPL){
6332                 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6333                         *dst_addr = vector_upl_dst_addr;
6334                         upl_unlock(vector_upl);
6335                         return KERN_SUCCESS;
6336                 }
6337                 upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6338                 if(upl == NULL)
6339                         goto process_upl_to_enter;
6340
6341                 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6342                 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6343         } else {
6344                 /*
6345                  * check to see if already mapped
6346                  */
6347                 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6348                         upl_unlock(upl);
6349                         return KERN_FAILURE;
6350                 }
6351         }
6352         if ((!(upl->flags & UPL_SHADOWED)) &&
6353             ((upl->flags & UPL_HAS_BUSY) ||
6354              !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6355
6356                 vm_object_t             object;
6357                 vm_page_t               alias_page;
6358                 vm_object_offset_t      new_offset;
6359                 unsigned int            pg_num;
6360                 wpl_array_t             lite_list;
6361
6362                 if (upl->flags & UPL_INTERNAL) {
6363                         lite_list = (wpl_array_t)
6364                                 ((((uintptr_t)upl) + sizeof(struct upl))
6365                                  + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6366                 } else {
6367                         lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6368                 }
6369                 object = upl->map_object;
6370                 upl->map_object = vm_object_allocate(upl->size);
6371
6372                 vm_object_lock(upl->map_object);
6373
6374                 upl->map_object->shadow = object;
6375                 upl->map_object->pageout = TRUE;
6376                 upl->map_object->can_persist = FALSE;
6377                 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6378                 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6379                 upl->map_object->wimg_bits = object->wimg_bits;
6380                 offset = upl->map_object->vo_shadow_offset;
6381                 new_offset = 0;
6382                 size = upl->size;
6383
6384                 upl->flags |= UPL_SHADOWED;
6385
6386                 while (size) {
6387                         pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6388                         assert(pg_num == new_offset / PAGE_SIZE);
6389
6390                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6391
6392                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6393
6394                                 vm_object_lock(object);
6395
6396                                 m = vm_page_lookup(object, offset);
6397                                 if (m == VM_PAGE_NULL) {
6398                                         panic("vm_upl_map: page missing\n");
6399                                 }
6400
6401                                 /*
6402                                  * Convert the fictitious page to a private
6403                                  * shadow of the real page.
6404                                  */
6405                                 assert(alias_page->fictitious);
6406                                 alias_page->fictitious = FALSE;
6407                                 alias_page->private = TRUE;
6408                                 alias_page->free_when_done = TRUE;
6409                                 /*
6410                                  * since m is a page in the upl it must
6411                                  * already be wired or BUSY, so it's
6412                                  * safe to assign the underlying physical
6413                                  * page to the alias
6414                                  */
6415                                 VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6416
6417                                 vm_object_unlock(object);
6418
6419                                 vm_page_lockspin_queues();
6420                                 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6421                                 vm_page_unlock_queues();
6422
6423                                 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6424
6425                                 assert(!alias_page->wanted);
6426                                 alias_page->busy = FALSE;
6427                                 alias_page->absent = FALSE;
6428                         }
6429                         size -= PAGE_SIZE;
6430                         offset += PAGE_SIZE_64;
6431                         new_offset += PAGE_SIZE_64;
6432                 }
6433                 vm_object_unlock(upl->map_object);
6434         }
6435         if (upl->flags & UPL_SHADOWED)
6436                 offset = 0;
6437         else
6438                 offset = upl->offset - upl->map_object->paging_offset;
6439
6440         size = upl->size;
6441
6442         vm_object_reference(upl->map_object);
6443
6444         if(!isVectorUPL) {
6445                 *dst_addr = 0;
6446                 /*
6447                 * NEED A UPL_MAP ALIAS
6448                 */
6449                 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6450                                   VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6451                                   upl->map_object, offset, FALSE,
6452                                   VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6453
6454                 if (kr != KERN_SUCCESS) {
6455                         vm_object_deallocate(upl->map_object);
6456                         upl_unlock(upl);
6457                         return(kr);
6458                 }
6459         }
6460         else {
6461                 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6462                                   VM_FLAGS_FIXED, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6463                                   upl->map_object, offset, FALSE,
6464                                   VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6465                 if(kr)
6466                         panic("vm_map_enter failed for a Vector UPL\n");
6467         }
6468         vm_object_lock(upl->map_object);
6469
6470         for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6471                 m = vm_page_lookup(upl->map_object, offset);
6472
6473                 if (m) {
6474                         m->pmapped = TRUE;
6475
6476                         /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6477                          * but only in kernel space. If this was on a user map,
6478                          * we'd have to set the wpmapped bit. */
6479                         /* m->wpmapped = TRUE; */
6480                         assert(map->pmap == kernel_pmap);
6481
6482                         PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE, kr);
6483
6484                         assert(kr == KERN_SUCCESS);
6485 #if KASAN
6486                         kasan_notify_address(addr, PAGE_SIZE_64);
6487 #endif
6488                 }
6489                 offset += PAGE_SIZE_64;
6490         }
6491         vm_object_unlock(upl->map_object);
6492
6493         /*
6494          * hold a reference for the mapping
6495          */
6496         upl->ref_count++;
6497         upl->flags |= UPL_PAGE_LIST_MAPPED;
6498         upl->kaddr = (vm_offset_t) *dst_addr;
6499         assert(upl->kaddr == *dst_addr);
6500
6501         if(isVectorUPL)
6502                 goto process_upl_to_enter;
6503
6504         upl_unlock(upl);
6505
6506         return KERN_SUCCESS;
6507 }
6508
6509 /*
6510  * Internal routine to remove a UPL mapping from a VM map.
6511  *
6512  * XXX - This should just be doable through a standard
6513  * vm_map_remove() operation.  Otherwise, implicit clean-up
6514  * of the target map won't be able to correctly remove
6515  * these (and release the reference on the UPL).  Having
6516  * to do this means we can't map these into user-space
6517  * maps yet.
6518  */
6519 kern_return_t
6520 vm_map_remove_upl(
6521         vm_map_t        map,
6522         upl_t           upl)
6523 {
6524         vm_address_t    addr;
6525         upl_size_t      size;
6526         int             isVectorUPL = 0, curr_upl = 0;
6527         upl_t           vector_upl = NULL;
6528
6529         if (upl == UPL_NULL)
6530                 return KERN_INVALID_ARGUMENT;
6531
6532         if((isVectorUPL = vector_upl_is_valid(upl))) {
6533                 int     unmapped=0, valid_upls=0;
6534                 vector_upl = upl;
6535                 upl_lock(vector_upl);
6536                 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6537                         upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
6538                         if(upl == NULL)
6539                                 continue;
6540                         valid_upls++;
6541                         if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6542                                 unmapped++;
6543                 }
6544
6545                 if(unmapped) {
6546                         if(unmapped != valid_upls)
6547                                 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6548                         else {
6549                                 upl_unlock(vector_upl);
6550                                 return KERN_FAILURE;
6551                         }
6552                 }
6553                 curr_upl=0;
6554         }
6555         else
6556                 upl_lock(upl);
6557
6558 process_upl_to_remove:
6559         if(isVectorUPL) {
6560                 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6561                         vm_map_t v_upl_submap;
6562                         vm_offset_t v_upl_submap_dst_addr;
6563                         vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6564
6565                         vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
6566                         vm_map_deallocate(v_upl_submap);
6567                         upl_unlock(vector_upl);
6568                         return KERN_SUCCESS;
6569                 }
6570
6571                 upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6572                 if(upl == NULL)
6573                         goto process_upl_to_remove;
6574         }
6575
6576         if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6577                 addr = upl->kaddr;
6578                 size = upl->size;
6579
6580                 assert(upl->ref_count > 1);
6581                 upl->ref_count--;               /* removing mapping ref */
6582
6583                 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6584                 upl->kaddr = (vm_offset_t) 0;
6585
6586                 if(!isVectorUPL) {
6587                         upl_unlock(upl);
6588
6589                         vm_map_remove(
6590                                 map,
6591                                 vm_map_trunc_page(addr,
6592                                                   VM_MAP_PAGE_MASK(map)),
6593                                 vm_map_round_page(addr + size,
6594                                                   VM_MAP_PAGE_MASK(map)),
6595                                 VM_MAP_NO_FLAGS);
6596
6597                         return KERN_SUCCESS;
6598                 }
6599                 else {
6600                         /*
6601                         * If it's a Vectored UPL, we'll be removing the entire
6602                         * submap anyways, so no need to remove individual UPL
6603                         * element mappings from within the submap
6604                         */
6605                         goto process_upl_to_remove;
6606                 }
6607         }
6608         upl_unlock(upl);
6609
6610         return KERN_FAILURE;
6611 }
6612
6613
6614 kern_return_t
6615 upl_commit_range(
6616         upl_t                   upl,
6617         upl_offset_t            offset,
6618         upl_size_t              size,
6619         int                     flags,
6620         upl_page_info_t         *page_list,
6621         mach_msg_type_number_t  count,
6622         boolean_t               *empty)
6623 {
6624         upl_size_t              xfer_size, subupl_size = size;
6625         vm_object_t             shadow_object;
6626         vm_object_t             object;
6627         vm_object_t             m_object;
6628         vm_object_offset_t      target_offset;
6629         upl_offset_t            subupl_offset = offset;
6630         int                     entry;
6631         wpl_array_t             lite_list;
6632         int                     occupied;
6633         int                     clear_refmod = 0;
6634         int                     pgpgout_count = 0;
6635         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6636         struct  vm_page_delayed_work    *dwp;
6637         int                     dw_count;
6638         int                     dw_limit;
6639         int                     isVectorUPL = 0;
6640         upl_t                   vector_upl = NULL;
6641         boolean_t               should_be_throttled = FALSE;
6642
6643         vm_page_t               nxt_page = VM_PAGE_NULL;
6644         int                     fast_path_possible = 0;
6645         int                     fast_path_full_commit = 0;
6646         int                     throttle_page = 0;
6647         int                     unwired_count = 0;
6648         int                     local_queue_count = 0;
6649         vm_page_t               first_local, last_local;
6650
6651         *empty = FALSE;
6652
6653         if (upl == UPL_NULL)
6654                 return KERN_INVALID_ARGUMENT;
6655
6656         if (count == 0)
6657                 page_list = NULL;
6658
6659         if((isVectorUPL = vector_upl_is_valid(upl))) {
6660                 vector_upl = upl;
6661                 upl_lock(vector_upl);
6662         }
6663         else
6664                 upl_lock(upl);
6665
6666 process_upl_to_commit:
6667
6668         if(isVectorUPL) {
6669                 size = subupl_size;
6670                 offset = subupl_offset;
6671                 if(size == 0) {
6672                         upl_unlock(vector_upl);
6673                         return KERN_SUCCESS;
6674                 }
6675                 upl =  vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6676                 if(upl == NULL) {
6677                         upl_unlock(vector_upl);
6678                         return KERN_FAILURE;
6679                 }
6680                 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6681                 subupl_size -= size;
6682                 subupl_offset += size;
6683         }
6684
6685 #if UPL_DEBUG
6686         if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6687                 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6688
6689                 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6690                 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6691
6692                 upl->upl_commit_index++;
6693         }
6694 #endif
6695         if (upl->flags & UPL_DEVICE_MEMORY)
6696                 xfer_size = 0;
6697         else if ((offset + size) <= upl->size)
6698                 xfer_size = size;
6699         else {
6700                 if(!isVectorUPL)
6701                         upl_unlock(upl);
6702                 else {
6703                         upl_unlock(vector_upl);
6704                 }
6705                 return KERN_FAILURE;
6706         }
6707         if (upl->flags & UPL_SET_DIRTY)
6708                 flags |= UPL_COMMIT_SET_DIRTY;
6709         if (upl->flags & UPL_CLEAR_DIRTY)
6710                 flags |= UPL_COMMIT_CLEAR_DIRTY;
6711
6712         if (upl->flags & UPL_INTERNAL)
6713                 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6714                                            + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6715         else
6716                 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6717
6718         object = upl->map_object;
6719
6720         if (upl->flags & UPL_SHADOWED) {
6721                 vm_object_lock(object);
6722                 shadow_object = object->shadow;
6723         } else {
6724                 shadow_object = object;
6725         }
6726         entry = offset/PAGE_SIZE;
6727         target_offset = (vm_object_offset_t)offset;
6728
6729         assert(!(target_offset & PAGE_MASK));
6730         assert(!(xfer_size & PAGE_MASK));
6731
6732         if (upl->flags & UPL_KERNEL_OBJECT)
6733                 vm_object_lock_shared(shadow_object);
6734         else
6735                 vm_object_lock(shadow_object);
6736
6737         VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object);
6738
6739         if (upl->flags & UPL_ACCESS_BLOCKED) {
6740                 assert(shadow_object->blocked_access);
6741                 shadow_object->blocked_access = FALSE;
6742                 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6743         }
6744
6745         if (shadow_object->code_signed) {
6746                 /*
6747                  * CODE SIGNING:
6748                  * If the object is code-signed, do not let this UPL tell
6749                  * us if the pages are valid or not.  Let the pages be
6750                  * validated by VM the normal way (when they get mapped or
6751                  * copied).
6752                  */
6753                 flags &= ~UPL_COMMIT_CS_VALIDATED;
6754         }
6755         if (! page_list) {
6756                 /*
6757                  * No page list to get the code-signing info from !?
6758                  */
6759                 flags &= ~UPL_COMMIT_CS_VALIDATED;
6760         }
6761         if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
6762                 should_be_throttled = TRUE;
6763
6764         dwp = &dw_array[0];
6765         dw_count = 0;
6766         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6767
6768         if ((upl->flags & UPL_IO_WIRE) &&
6769             !(flags & UPL_COMMIT_FREE_ABSENT) &&
6770             !isVectorUPL &&
6771             shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6772             shadow_object->purgable != VM_PURGABLE_EMPTY) {
6773
6774                 if (!vm_page_queue_empty(&shadow_object->memq)) {
6775
6776                         if (size == shadow_object->vo_size) {
6777                                 nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
6778                                 fast_path_full_commit = 1;
6779                         }
6780                         fast_path_possible = 1;
6781
6782                         if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
6783                             (shadow_object->purgable == VM_PURGABLE_DENY ||
6784                              shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6785                              shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6786                                 throttle_page = 1;
6787                         }
6788                 }
6789         }
6790         first_local = VM_PAGE_NULL;
6791         last_local = VM_PAGE_NULL;
6792
6793         while (xfer_size) {
6794                 vm_page_t       t, m;
6795
6796                 dwp->dw_mask = 0;
6797                 clear_refmod = 0;
6798
6799                 m = VM_PAGE_NULL;
6800
6801                 if (upl->flags & UPL_LITE) {
6802                         unsigned int    pg_num;
6803
6804                         if (nxt_page != VM_PAGE_NULL) {
6805                                 m = nxt_page;
6806                                 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->listq);
6807                                 target_offset = m->offset;
6808                         }
6809                         pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6810                         assert(pg_num == target_offset/PAGE_SIZE);
6811
6812                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6813                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6814
6815                                 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6816                                         m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6817                         } else
6818                                 m = NULL;
6819                 }
6820                 if (upl->flags & UPL_SHADOWED) {
6821                         if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6822
6823                                 t->free_when_done = FALSE;
6824
6825                                 VM_PAGE_FREE(t);
6826
6827                                 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6828                                         m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6829                         }
6830                 }
6831                 if (m == VM_PAGE_NULL)
6832                         goto commit_next_page;
6833
6834                 m_object = VM_PAGE_OBJECT(m);
6835
6836                 if (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6837                         assert(m->busy);
6838
6839                         dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6840                         goto commit_next_page;
6841                 }
6842
6843                 if (flags & UPL_COMMIT_CS_VALIDATED) {
6844                         /*
6845                          * CODE SIGNING:
6846                          * Set the code signing bits according to
6847                          * what the UPL says they should be.
6848                          */
6849                         m->cs_validated = page_list[entry].cs_validated;
6850                         m->cs_tainted = page_list[entry].cs_tainted;
6851                         m->cs_nx = page_list[entry].cs_nx;
6852                 }
6853                 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6854                         m->written_by_kernel = TRUE;
6855
6856                 if (upl->flags & UPL_IO_WIRE) {
6857
6858                         if (page_list)
6859                                 page_list[entry].phys_addr = 0;
6860
6861                         if (flags & UPL_COMMIT_SET_DIRTY) {
6862                                 SET_PAGE_DIRTY(m, FALSE);
6863                         } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6864                                 m->dirty = FALSE;
6865
6866                                 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6867                                     m->cs_validated && !m->cs_tainted) {
6868                                         /*
6869                                          * CODE SIGNING:
6870                                          * This page is no longer dirty
6871                                          * but could have been modified,
6872                                          * so it will need to be
6873                                          * re-validated.
6874                                          */
6875                                         if (m->slid) {
6876                                                 panic("upl_commit_range(%p): page %p was slid\n",
6877                                                       upl, m);
6878                                         }
6879                                         assert(!m->slid);
6880                                         m->cs_validated = FALSE;
6881 #if DEVELOPMENT || DEBUG
6882                                         vm_cs_validated_resets++;
6883 #endif
6884                                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6885                                 }
6886                                 clear_refmod |= VM_MEM_MODIFIED;
6887                         }
6888                         if (upl->flags & UPL_ACCESS_BLOCKED) {
6889                                 /*
6890                                  * We blocked access to the pages in this UPL.
6891                                  * Clear the "busy" bit and wake up any waiter
6892                                  * for this page.
6893                                  */
6894                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6895                         }
6896                         if (fast_path_possible) {
6897                                 assert(m_object->purgable != VM_PURGABLE_EMPTY);
6898                                 assert(m_object->purgable != VM_PURGABLE_VOLATILE);
6899                                 if (m->absent) {
6900                                         assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
6901                                         assert(m->wire_count == 0);
6902                                         assert(m->busy);
6903
6904                                         m->absent = FALSE;
6905                                         dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6906                                 } else {
6907                                         if (m->wire_count == 0)
6908                                                 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6909                                         assert(m->vm_page_q_state == VM_PAGE_IS_WIRED);
6910
6911                                         /*
6912                                          * XXX FBDP need to update some other
6913                                          * counters here (purgeable_wired_count)
6914                                          * (ledgers), ...
6915                                          */
6916                                         assert(m->wire_count > 0);
6917                                         m->wire_count--;
6918
6919                                         if (m->wire_count == 0) {
6920                                                 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
6921                                                 unwired_count++;
6922                                         }
6923                                 }
6924                                 if (m->wire_count == 0) {
6925                                         assert(m->pageq.next == 0 && m->pageq.prev == 0);
6926
6927                                         if (last_local == VM_PAGE_NULL) {
6928                                                 assert(first_local == VM_PAGE_NULL);
6929
6930                                                 last_local = m;
6931                                                 first_local = m;
6932                                         } else {
6933                                                 assert(first_local != VM_PAGE_NULL);
6934
6935                                                 m->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
6936                                                 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
6937                                                 first_local = m;
6938                                         }
6939                                         local_queue_count++;
6940
6941                                         if (throttle_page) {
6942                                                 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
6943                                         } else {
6944                                                 if (flags & UPL_COMMIT_INACTIVATE) {
6945                                                         if (shadow_object->internal)
6946                                                                 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
6947                                                         else
6948                                                                 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
6949                                                 } else
6950                                                         m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
6951                                         }
6952                                 }
6953                         } else {
6954                                 if (flags & UPL_COMMIT_INACTIVATE) {
6955                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
6956                                         clear_refmod |= VM_MEM_REFERENCED;
6957                                 }
6958                                 if (m->absent) {
6959                                         if (flags & UPL_COMMIT_FREE_ABSENT)
6960                                                 dwp->dw_mask |= DW_vm_page_free;
6961                                         else {
6962                                                 m->absent = FALSE;
6963                                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6964
6965                                                 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6966                                                         dwp->dw_mask |= DW_vm_page_activate;
6967                                         }
6968                                 } else
6969                                         dwp->dw_mask |= DW_vm_page_unwire;
6970                         }
6971                         goto commit_next_page;
6972                 }
6973                 assert(m->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6974
6975                 if (page_list)
6976                         page_list[entry].phys_addr = 0;
6977
6978                 /*
6979                  * make sure to clear the hardware
6980                  * modify or reference bits before
6981                  * releasing the BUSY bit on this page
6982                  * otherwise we risk losing a legitimate
6983                  * change of state
6984                  */
6985                 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6986                         m->dirty = FALSE;
6987
6988                         clear_refmod |= VM_MEM_MODIFIED;
6989                 }
6990                 if (m->laundry)
6991                         dwp->dw_mask |= DW_vm_pageout_throttle_up;
6992
6993                 if (VM_PAGE_WIRED(m))
6994                         m->free_when_done = FALSE;
6995
6996                 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6997                     m->cs_validated && !m->cs_tainted) {
6998                         /*
6999                          * CODE SIGNING:
7000                          * This page is no longer dirty
7001                          * but could have been modified,
7002                          * so it will need to be
7003                          * re-validated.
7004                          */
7005                         if (m->slid) {
7006                                 panic("upl_commit_range(%p): page %p was slid\n",
7007                                       upl, m);
7008                         }
7009                         assert(!m->slid);
7010                         m->cs_validated = FALSE;
7011 #if DEVELOPMENT || DEBUG
7012                         vm_cs_validated_resets++;
7013 #endif
7014                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7015                 }
7016                 if (m->overwriting) {
7017                         /*
7018                          * the (COPY_OUT_FROM == FALSE) request_page_list case
7019                          */
7020                         if (m->busy) {
7021 #if CONFIG_PHANTOM_CACHE
7022                                 if (m->absent && !m_object->internal)
7023                                         dwp->dw_mask |= DW_vm_phantom_cache_update;
7024 #endif
7025                                 m->absent = FALSE;
7026
7027                                 dwp->dw_mask |= DW_clear_busy;
7028                         } else {
7029                                 /*
7030                                  * alternate (COPY_OUT_FROM == FALSE) page_list case
7031                                  * Occurs when the original page was wired
7032                                  * at the time of the list request
7033                                  */
7034                                 assert(VM_PAGE_WIRED(m));
7035
7036                                 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
7037                         }
7038                         m->overwriting = FALSE;
7039                 }
7040                 m->cleaning = FALSE;
7041
7042                 if (m->free_when_done) {
7043                         /*
7044                          * With the clean queue enabled, UPL_PAGEOUT should
7045                          * no longer set the pageout bit. It's pages now go
7046                          * to the clean queue.
7047                          */
7048                         assert(!(flags & UPL_PAGEOUT));
7049                         assert(!m_object->internal);
7050
7051                         m->free_when_done = FALSE;
7052 #if MACH_CLUSTER_STATS
7053                         if (m->wanted) vm_pageout_target_collisions++;
7054 #endif
7055                         if ((flags & UPL_COMMIT_SET_DIRTY) ||
7056                             (m->pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
7057                                 /*
7058                                  * page was re-dirtied after we started
7059                                  * the pageout... reactivate it since
7060                                  * we don't know whether the on-disk
7061                                  * copy matches what is now in memory
7062                                  */
7063                                 SET_PAGE_DIRTY(m, FALSE);
7064
7065                                 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
7066
7067                                 if (upl->flags & UPL_PAGEOUT) {
7068                                         CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
7069                                         VM_STAT_INCR(reactivations);
7070                                         DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
7071                                 }
7072                         } else {
7073                                 /*
7074                                  * page has been successfully cleaned
7075                                  * go ahead and free it for other use
7076                                  */
7077                                 if (m_object->internal) {
7078                                         DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
7079                                 } else {
7080                                         DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
7081                                 }
7082                                 m->dirty = FALSE;
7083                                 m->busy = TRUE;
7084
7085                                 dwp->dw_mask |= DW_vm_page_free;
7086                         }
7087                         goto commit_next_page;
7088                 }
7089 #if MACH_CLUSTER_STATS
7090                 if (m->wpmapped)
7091                         m->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m));
7092
7093                 if (m->dirty)   vm_pageout_cluster_dirtied++;
7094                 else            vm_pageout_cluster_cleaned++;
7095                 if (m->wanted)  vm_pageout_cluster_collisions++;
7096 #endif
7097                 /*
7098                  * It is a part of the semantic of COPYOUT_FROM
7099                  * UPLs that a commit implies cache sync
7100                  * between the vm page and the backing store
7101                  * this can be used to strip the precious bit
7102                  * as well as clean
7103                  */
7104                 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
7105                         m->precious = FALSE;
7106
7107                 if (flags & UPL_COMMIT_SET_DIRTY) {
7108                         SET_PAGE_DIRTY(m, FALSE);
7109                 } else {
7110                         m->dirty = FALSE;
7111                 }
7112
7113                 /* with the clean queue on, move *all* cleaned pages to the clean queue */
7114                 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
7115                         pgpgout_count++;
7116
7117                         VM_STAT_INCR(pageouts);
7118                         DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
7119
7120                         dwp->dw_mask |= DW_enqueue_cleaned;
7121                         vm_pageout_enqueued_cleaned_from_inactive_dirty++;
7122                 } else if (should_be_throttled == TRUE && (m->vm_page_q_state == VM_PAGE_NOT_ON_Q)) {
7123                         /*
7124                          * page coming back in from being 'frozen'...
7125                          * it was dirty before it was frozen, so keep it so
7126                          * the vm_page_activate will notice that it really belongs
7127                          * on the throttle queue and put it there
7128                          */
7129                         SET_PAGE_DIRTY(m, FALSE);
7130                         dwp->dw_mask |= DW_vm_page_activate;
7131
7132                 } else {
7133                         if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && (m->vm_page_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
7134                                 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7135                                 clear_refmod |= VM_MEM_REFERENCED;
7136                         } else if ( !VM_PAGE_PAGEABLE(m)) {
7137
7138                                 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
7139                                         dwp->dw_mask |= DW_vm_page_speculate;
7140                                 else if (m->reference)
7141                                         dwp->dw_mask |= DW_vm_page_activate;
7142                                 else {
7143                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
7144                                         clear_refmod |= VM_MEM_REFERENCED;
7145                                 }
7146                         }
7147                 }
7148                 if (upl->flags & UPL_ACCESS_BLOCKED) {
7149                         /*
7150                          * We blocked access to the pages in this URL.
7151                          * Clear the "busy" bit on this page before we
7152                          * wake up any waiter.
7153                          */
7154                         dwp->dw_mask |= DW_clear_busy;
7155                 }
7156                 /*
7157                  * Wakeup any thread waiting for the page to be un-cleaning.
7158                  */
7159                 dwp->dw_mask |= DW_PAGE_WAKEUP;
7160
7161 commit_next_page:
7162                 if (clear_refmod)
7163                         pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
7164
7165                 target_offset += PAGE_SIZE_64;
7166                 xfer_size -= PAGE_SIZE;
7167                 entry++;
7168
7169                 if (dwp->dw_mask) {
7170                         if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7171                                 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7172
7173                                 if (dw_count >= dw_limit) {
7174                                         vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7175
7176                                         dwp = &dw_array[0];
7177                                         dw_count = 0;
7178                                 }
7179                         } else {
7180                                 if (dwp->dw_mask & DW_clear_busy)
7181                                         m->busy = FALSE;
7182
7183                                 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7184                                         PAGE_WAKEUP(m);
7185                         }
7186                 }
7187         }
7188         if (dw_count)
7189                 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7190
7191         if (fast_path_possible) {
7192
7193                 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7194                 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7195
7196                 if (local_queue_count || unwired_count) {
7197
7198                         if (local_queue_count) {
7199                                 vm_page_t       first_target;
7200                                 vm_page_queue_head_t    *target_queue;
7201
7202                                 if (throttle_page)
7203                                         target_queue = &vm_page_queue_throttled;
7204                                 else {
7205                                         if (flags & UPL_COMMIT_INACTIVATE) {
7206                                                 if (shadow_object->internal)
7207                                                         target_queue = &vm_page_queue_anonymous;
7208                                                 else
7209                                                         target_queue = &vm_page_queue_inactive;
7210                                         } else
7211                                                 target_queue = &vm_page_queue_active;
7212                                 }
7213                                 /*
7214                                  * Transfer the entire local queue to a regular LRU page queues.
7215                                  */
7216                                 vm_page_lockspin_queues();
7217
7218                                 first_target = (vm_page_t) vm_page_queue_first(target_queue);
7219
7220                                 if (vm_page_queue_empty(target_queue))
7221                                         target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7222                                 else
7223                                         first_target->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7224
7225                                 target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7226                                 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7227                                 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7228
7229                                 /*
7230                                  * Adjust the global page counts.
7231                                  */
7232                                 if (throttle_page) {
7233                                         vm_page_throttled_count += local_queue_count;
7234                                 } else {
7235                                         if (flags & UPL_COMMIT_INACTIVATE) {
7236                                                 if (shadow_object->internal)
7237                                                         vm_page_anonymous_count += local_queue_count;
7238                                                 vm_page_inactive_count += local_queue_count;
7239
7240                                                 token_new_pagecount += local_queue_count;
7241                                         } else
7242                                                 vm_page_active_count += local_queue_count;
7243
7244                                         if (shadow_object->internal)
7245                                                 vm_page_pageable_internal_count += local_queue_count;
7246                                         else
7247                                                 vm_page_pageable_external_count += local_queue_count;
7248                                 }
7249                         } else {
7250                                 vm_page_lockspin_queues();
7251                         }
7252                         if (unwired_count) {
7253                                 vm_page_wire_count -= unwired_count;
7254                                 VM_CHECK_MEMORYSTATUS;
7255                         }
7256                         vm_page_unlock_queues();
7257
7258                         VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count);
7259                 }
7260         }
7261         occupied = 1;
7262
7263         if (upl->flags & UPL_DEVICE_MEMORY)  {
7264                 occupied = 0;
7265         } else if (upl->flags & UPL_LITE) {
7266                 int     pg_num;
7267                 int     i;
7268
7269                 occupied = 0;
7270
7271                 if (!fast_path_full_commit) {
7272                         pg_num = upl->size/PAGE_SIZE;
7273                         pg_num = (pg_num + 31) >> 5;
7274
7275                         for (i = 0; i < pg_num; i++) {
7276                                 if (lite_list[i] != 0) {
7277                                         occupied = 1;
7278                                         break;
7279                                 }
7280                         }
7281                 }
7282         } else {
7283                 if (vm_page_queue_empty(&upl->map_object->memq))
7284                         occupied = 0;
7285         }
7286         if (occupied == 0) {
7287                 /*
7288                  * If this UPL element belongs to a Vector UPL and is
7289                  * empty, then this is the right function to deallocate
7290                  * it. So go ahead set the *empty variable. The flag
7291                  * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7292                  * should be considered relevant for the Vector UPL and not
7293                  * the internal UPLs.
7294                  */
7295                 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7296                         *empty = TRUE;
7297
7298                 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7299                         /*
7300                          * this is not a paging object
7301                          * so we need to drop the paging reference
7302                          * that was taken when we created the UPL
7303                          * against this object
7304                          */
7305                         vm_object_activity_end(shadow_object);
7306                         vm_object_collapse(shadow_object, 0, TRUE);
7307                 } else {
7308                          /*
7309                           * we dontated the paging reference to
7310                           * the map object... vm_pageout_object_terminate
7311                           * will drop this reference
7312                           */
7313                 }
7314         }
7315         VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag);
7316         vm_object_unlock(shadow_object);
7317         if (object != shadow_object)
7318                 vm_object_unlock(object);
7319
7320         if(!isVectorUPL)
7321                 upl_unlock(upl);
7322         else {
7323                 /*
7324                  * If we completed our operations on an UPL that is
7325                  * part of a Vectored UPL and if empty is TRUE, then
7326                  * we should go ahead and deallocate this UPL element.
7327                  * Then we check if this was the last of the UPL elements
7328                  * within that Vectored UPL. If so, set empty to TRUE
7329                  * so that in ubc_upl_commit_range or ubc_upl_commit, we
7330                  * can go ahead and deallocate the Vector UPL too.
7331                  */
7332                 if(*empty==TRUE) {
7333                         *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7334                         upl_deallocate(upl);
7335                 }
7336                 goto process_upl_to_commit;
7337         }
7338         if (pgpgout_count) {
7339                 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7340         }
7341
7342         return KERN_SUCCESS;
7343 }
7344
7345 kern_return_t
7346 upl_abort_range(
7347         upl_t                   upl,
7348         upl_offset_t            offset,
7349         upl_size_t              size,
7350         int                     error,
7351         boolean_t               *empty)
7352 {
7353         upl_page_info_t         *user_page_list = NULL;
7354         upl_size_t              xfer_size, subupl_size = size;
7355         vm_object_t             shadow_object;
7356         vm_object_t             object;
7357         vm_object_offset_t      target_offset;
7358         upl_offset_t            subupl_offset = offset;
7359         int                     entry;
7360         wpl_array_t             lite_list;
7361         int                     occupied;
7362         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7363         struct  vm_page_delayed_work    *dwp;
7364         int                     dw_count;
7365         int                     dw_limit;
7366         int                     isVectorUPL = 0;
7367         upl_t                   vector_upl = NULL;
7368
7369         *empty = FALSE;
7370
7371         if (upl == UPL_NULL)
7372                 return KERN_INVALID_ARGUMENT;
7373
7374         if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7375                 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7376
7377         if((isVectorUPL = vector_upl_is_valid(upl))) {
7378                 vector_upl = upl;
7379                 upl_lock(vector_upl);
7380         }
7381         else
7382                 upl_lock(upl);
7383
7384 process_upl_to_abort:
7385         if(isVectorUPL) {
7386                 size = subupl_size;
7387                 offset = subupl_offset;
7388                 if(size == 0) {
7389                         upl_unlock(vector_upl);
7390                         return KERN_SUCCESS;
7391                 }
7392                 upl =  vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7393                 if(upl == NULL) {
7394                         upl_unlock(vector_upl);
7395                         return KERN_FAILURE;
7396                 }
7397                 subupl_size -= size;
7398                 subupl_offset += size;
7399         }
7400
7401         *empty = FALSE;
7402
7403 #if UPL_DEBUG
7404         if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7405                 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7406
7407                 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7408                 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7409                 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7410
7411                 upl->upl_commit_index++;
7412         }
7413 #endif
7414         if (upl->flags & UPL_DEVICE_MEMORY)
7415                 xfer_size = 0;
7416         else if ((offset + size) <= upl->size)
7417                 xfer_size = size;
7418         else {
7419                 if(!isVectorUPL)
7420                         upl_unlock(upl);
7421                 else {
7422                         upl_unlock(vector_upl);
7423                 }
7424
7425                 return KERN_FAILURE;
7426         }
7427         if (upl->flags & UPL_INTERNAL) {
7428                 lite_list = (wpl_array_t)
7429                         ((((uintptr_t)upl) + sizeof(struct upl))
7430                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7431
7432                 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7433         } else {
7434                 lite_list = (wpl_array_t)
7435                         (((uintptr_t)upl) + sizeof(struct upl));
7436         }
7437         object = upl->map_object;
7438
7439         if (upl->flags & UPL_SHADOWED) {
7440                 vm_object_lock(object);
7441                 shadow_object = object->shadow;
7442         } else
7443                 shadow_object = object;
7444
7445         entry = offset/PAGE_SIZE;
7446         target_offset = (vm_object_offset_t)offset;
7447
7448         assert(!(target_offset & PAGE_MASK));
7449         assert(!(xfer_size & PAGE_MASK));
7450
7451         if (upl->flags & UPL_KERNEL_OBJECT)
7452                 vm_object_lock_shared(shadow_object);
7453         else
7454                 vm_object_lock(shadow_object);
7455
7456         if (upl->flags & UPL_ACCESS_BLOCKED) {
7457                 assert(shadow_object->blocked_access);
7458                 shadow_object->blocked_access = FALSE;
7459                 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7460         }
7461
7462         dwp = &dw_array[0];
7463         dw_count = 0;
7464         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7465
7466         if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7467                 panic("upl_abort_range: kernel_object being DUMPED");
7468
7469         while (xfer_size) {
7470                 vm_page_t       t, m;
7471                 unsigned int    pg_num;
7472                 boolean_t       needed;
7473
7474                 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7475                 assert(pg_num == target_offset/PAGE_SIZE);
7476
7477                 needed = FALSE;
7478
7479                 if (user_page_list)
7480                         needed = user_page_list[pg_num].needed;
7481
7482                 dwp->dw_mask = 0;
7483                 m = VM_PAGE_NULL;
7484
7485                 if (upl->flags & UPL_LITE) {
7486
7487                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7488                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7489
7490                                 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7491                                         m = vm_page_lookup(shadow_object, target_offset +
7492                                                            (upl->offset - shadow_object->paging_offset));
7493                         }
7494                 }
7495                 if (upl->flags & UPL_SHADOWED) {
7496                         if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7497                                 t->free_when_done = FALSE;
7498
7499                                 VM_PAGE_FREE(t);
7500
7501                                 if (m == VM_PAGE_NULL)
7502                                         m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7503                         }
7504                 }
7505                 if ((upl->flags & UPL_KERNEL_OBJECT))
7506                         goto abort_next_page;
7507
7508                 if (m != VM_PAGE_NULL) {
7509
7510                         assert(m->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7511
7512                         if (m->absent) {
7513                                 boolean_t must_free = TRUE;
7514
7515                                 /*
7516                                  * COPYOUT = FALSE case
7517                                  * check for error conditions which must
7518                                  * be passed back to the pages customer
7519                                  */
7520                                 if (error & UPL_ABORT_RESTART) {
7521                                         m->restart = TRUE;
7522                                         m->absent = FALSE;
7523                                         m->unusual = TRUE;
7524                                         must_free = FALSE;
7525                                 } else if (error & UPL_ABORT_UNAVAILABLE) {
7526                                         m->restart = FALSE;
7527                                         m->unusual = TRUE;
7528                                         must_free = FALSE;
7529                                 } else if (error & UPL_ABORT_ERROR) {
7530                                         m->restart = FALSE;
7531                                         m->absent = FALSE;
7532                                         m->error = TRUE;
7533                                         m->unusual = TRUE;
7534                                         must_free = FALSE;
7535                                 }
7536                                 if (m->clustered && needed == FALSE) {
7537                                         /*
7538                                          * This page was a part of a speculative
7539                                          * read-ahead initiated by the kernel
7540                                          * itself.  No one is expecting this
7541                                          * page and no one will clean up its
7542                                          * error state if it ever becomes valid
7543                                          * in the future.
7544                                          * We have to free it here.
7545                                          */
7546                                         must_free = TRUE;
7547                                 }
7548                                 m->cleaning = FALSE;
7549
7550                                 if (m->overwriting && !m->busy) {
7551                                         /*
7552                                          * this shouldn't happen since
7553                                          * this is an 'absent' page, but
7554                                          * it doesn't hurt to check for
7555                                          * the 'alternate' method of
7556                                          * stabilizing the page...
7557                                          * we will mark 'busy' to be cleared
7558                                          * in the following code which will
7559                                          * take care of the primary stabilzation
7560                                          * method (i.e. setting 'busy' to TRUE)
7561                                          */
7562                                         dwp->dw_mask |= DW_vm_page_unwire;
7563                                 }
7564                                 m->overwriting = FALSE;
7565
7566                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7567
7568                                 if (must_free == TRUE)
7569                                         dwp->dw_mask |= DW_vm_page_free;
7570                                 else
7571                                         dwp->dw_mask |= DW_vm_page_activate;
7572                         } else {
7573                                 /*
7574                                  * Handle the trusted pager throttle.
7575                                  */
7576                                 if (m->laundry)
7577                                         dwp->dw_mask |= DW_vm_pageout_throttle_up;
7578
7579                                 if (upl->flags & UPL_ACCESS_BLOCKED) {
7580                                         /*
7581                                          * We blocked access to the pages in this UPL.
7582                                          * Clear the "busy" bit and wake up any waiter
7583                                          * for this page.
7584                                          */
7585                                         dwp->dw_mask |= DW_clear_busy;
7586                                 }
7587                                 if (m->overwriting) {
7588                                         if (m->busy)
7589                                                 dwp->dw_mask |= DW_clear_busy;
7590                                         else {
7591                                                 /*
7592                                                  * deal with the 'alternate' method
7593                                                  * of stabilizing the page...
7594                                                  * we will either free the page
7595                                                  * or mark 'busy' to be cleared
7596                                                  * in the following code which will
7597                                                  * take care of the primary stabilzation
7598                                                  * method (i.e. setting 'busy' to TRUE)
7599                                                  */
7600                                                 dwp->dw_mask |= DW_vm_page_unwire;
7601                                         }
7602                                         m->overwriting = FALSE;
7603                                 }
7604                                 m->free_when_done = FALSE;
7605                                 m->cleaning = FALSE;
7606
7607                                 if (error & UPL_ABORT_DUMP_PAGES) {
7608                                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7609
7610                                         dwp->dw_mask |= DW_vm_page_free;
7611                                 } else {
7612                                         if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7613                                                 if (error & UPL_ABORT_REFERENCE) {
7614                                                         /*
7615                                                          * we've been told to explictly
7616                                                          * reference this page... for
7617                                                          * file I/O, this is done by
7618                                                          * implementing an LRU on the inactive q
7619                                                          */
7620                                                         dwp->dw_mask |= DW_vm_page_lru;
7621
7622                                                 } else if ( !VM_PAGE_PAGEABLE(m))
7623                                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
7624                                         }
7625                                         dwp->dw_mask |= DW_PAGE_WAKEUP;
7626                                 }
7627                         }
7628                 }
7629 abort_next_page:
7630                 target_offset += PAGE_SIZE_64;
7631                 xfer_size -= PAGE_SIZE;
7632                 entry++;
7633
7634                 if (dwp->dw_mask) {
7635                         if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7636                                 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7637
7638                                 if (dw_count >= dw_limit) {
7639                                         vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7640
7641                                         dwp = &dw_array[0];
7642                                         dw_count = 0;
7643                                 }
7644                         } else {
7645                                 if (dwp->dw_mask & DW_clear_busy)
7646                                         m->busy = FALSE;
7647
7648                                 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7649                                         PAGE_WAKEUP(m);
7650                         }
7651                 }
7652         }
7653         if (dw_count)
7654                 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7655
7656         occupied = 1;
7657
7658         if (upl->flags & UPL_DEVICE_MEMORY)  {
7659                 occupied = 0;
7660         } else if (upl->flags & UPL_LITE) {
7661                 int     pg_num;
7662                 int     i;
7663
7664                 pg_num = upl->size/PAGE_SIZE;
7665                 pg_num = (pg_num + 31) >> 5;
7666                 occupied = 0;
7667
7668                 for (i = 0; i < pg_num; i++) {
7669                         if (lite_list[i] != 0) {
7670                                 occupied = 1;
7671                                 break;
7672                         }
7673                 }
7674         } else {
7675                 if (vm_page_queue_empty(&upl->map_object->memq))
7676                         occupied = 0;
7677         }
7678         if (occupied == 0) {
7679                 /*
7680                  * If this UPL element belongs to a Vector UPL and is
7681                  * empty, then this is the right function to deallocate
7682                  * it. So go ahead set the *empty variable. The flag
7683                  * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7684                  * should be considered relevant for the Vector UPL and
7685                  * not the internal UPLs.
7686                  */
7687                 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7688                         *empty = TRUE;
7689
7690                 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7691                         /*
7692                          * this is not a paging object
7693                          * so we need to drop the paging reference
7694                          * that was taken when we created the UPL
7695                          * against this object
7696                          */
7697                         vm_object_activity_end(shadow_object);
7698                         vm_object_collapse(shadow_object, 0, TRUE);
7699                 } else {
7700                          /*
7701                           * we dontated the paging reference to
7702                           * the map object... vm_pageout_object_terminate
7703                           * will drop this reference
7704                           */
7705                 }
7706         }
7707         vm_object_unlock(shadow_object);
7708         if (object != shadow_object)
7709                 vm_object_unlock(object);
7710
7711         if(!isVectorUPL)
7712                 upl_unlock(upl);
7713         else {
7714                 /*
7715                 * If we completed our operations on an UPL that is
7716                 * part of a Vectored UPL and if empty is TRUE, then
7717                 * we should go ahead and deallocate this UPL element.
7718                 * Then we check if this was the last of the UPL elements
7719                 * within that Vectored UPL. If so, set empty to TRUE
7720                 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7721                 * can go ahead and deallocate the Vector UPL too.
7722                 */
7723                 if(*empty == TRUE) {
7724                         *empty = vector_upl_set_subupl(vector_upl, upl,0);
7725                         upl_deallocate(upl);
7726                 }
7727                 goto process_upl_to_abort;
7728         }
7729
7730         return KERN_SUCCESS;
7731 }
7732
7733
7734 kern_return_t
7735 upl_abort(
7736         upl_t   upl,
7737         int     error)
7738 {
7739         boolean_t       empty;
7740
7741         if (upl == UPL_NULL)
7742                 return KERN_INVALID_ARGUMENT;
7743
7744         return upl_abort_range(upl, 0, upl->size, error, &empty);
7745 }
7746
7747
7748 /* an option on commit should be wire */
7749 kern_return_t
7750 upl_commit(
7751         upl_t                   upl,
7752         upl_page_info_t         *page_list,
7753         mach_msg_type_number_t  count)
7754 {
7755         boolean_t       empty;
7756
7757         if (upl == UPL_NULL)
7758                 return KERN_INVALID_ARGUMENT;
7759
7760         return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7761 }
7762
7763
7764 void
7765 iopl_valid_data(
7766         upl_t    upl,
7767         vm_tag_t tag)
7768 {
7769         vm_object_t     object;
7770         vm_offset_t     offset;
7771         vm_page_t       m, nxt_page = VM_PAGE_NULL;
7772         upl_size_t      size;
7773         int             wired_count = 0;
7774
7775         if (upl == NULL)
7776                 panic("iopl_valid_data: NULL upl");
7777         if (vector_upl_is_valid(upl))
7778                 panic("iopl_valid_data: vector upl");
7779         if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7780                 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7781
7782         object = upl->map_object;
7783
7784         if (object == kernel_object || object == compressor_object)
7785                 panic("iopl_valid_data: object == kernel or compressor");
7786
7787         if (object->purgable == VM_PURGABLE_VOLATILE ||
7788             object->purgable == VM_PURGABLE_EMPTY)
7789                 panic("iopl_valid_data: object %p purgable %d",
7790                       object, object->purgable);
7791
7792         size = upl->size;
7793
7794         vm_object_lock(object);
7795         VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7796
7797         if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7798                 nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7799         else
7800                 offset = 0 + upl->offset - object->paging_offset;
7801
7802         while (size) {
7803
7804                 if (nxt_page != VM_PAGE_NULL) {
7805                         m = nxt_page;
7806                         nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->listq);
7807                 } else {
7808                         m = vm_page_lookup(object, offset);
7809                         offset += PAGE_SIZE;
7810
7811                         if (m == VM_PAGE_NULL)
7812                                 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7813                 }
7814                 if (m->busy) {
7815                         if (!m->absent)
7816                                 panic("iopl_valid_data: busy page w/o absent");
7817
7818                         if (m->pageq.next || m->pageq.prev)
7819                                 panic("iopl_valid_data: busy+absent page on page queue");
7820                         if (m->reusable) {
7821                                 panic("iopl_valid_data: %p is reusable", m);
7822                         }
7823
7824                         m->absent = FALSE;
7825                         m->dirty = TRUE;
7826                         assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
7827                         assert(m->wire_count == 0);
7828                         m->wire_count++;
7829                         assert(m->wire_count);
7830                         if (m->wire_count == 1) {
7831                                 m->vm_page_q_state = VM_PAGE_IS_WIRED;
7832                                 wired_count++;
7833                         } else {
7834                                 panic("iopl_valid_data: %p already wired\n", m);
7835                         }
7836
7837                         PAGE_WAKEUP_DONE(m);
7838                 }
7839                 size -= PAGE_SIZE;
7840         }
7841         if (wired_count) {
7842
7843                 VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7844                 assert(object->resident_page_count >= object->wired_page_count);
7845
7846                 /* no need to adjust purgeable accounting for this object: */
7847                 assert(object->purgable != VM_PURGABLE_VOLATILE);
7848                 assert(object->purgable != VM_PURGABLE_EMPTY);
7849
7850                 vm_page_lockspin_queues();
7851                 vm_page_wire_count += wired_count;
7852                 vm_page_unlock_queues();
7853         }
7854         VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7855         vm_object_unlock(object);
7856 }
7857
7858
7859 void
7860 vm_object_set_pmap_cache_attr(
7861                 vm_object_t             object,
7862                 upl_page_info_array_t   user_page_list,
7863                 unsigned int            num_pages,
7864                 boolean_t               batch_pmap_op)
7865 {
7866         unsigned int    cache_attr = 0;
7867
7868         cache_attr = object->wimg_bits & VM_WIMG_MASK;
7869         assert(user_page_list);
7870         if (cache_attr != VM_WIMG_USE_DEFAULT) {
7871                 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7872         }
7873 }
7874
7875
7876 boolean_t       vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t);
7877 kern_return_t   vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t, vm_object_offset_t *, int);
7878
7879
7880
7881 boolean_t
7882 vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7883                             wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag)
7884 {
7885         vm_page_t       dst_page;
7886         unsigned int    entry;
7887         int             page_count;
7888         int             delayed_unlock = 0;
7889         boolean_t       retval = TRUE;
7890         ppnum_t         phys_page;
7891
7892         vm_object_lock_assert_exclusive(object);
7893         assert(object->purgable != VM_PURGABLE_VOLATILE);
7894         assert(object->purgable != VM_PURGABLE_EMPTY);
7895         assert(object->pager == NULL);
7896         assert(object->copy == NULL);
7897         assert(object->shadow == NULL);
7898
7899         page_count = object->resident_page_count;
7900         dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7901
7902         vm_page_lock_queues();
7903
7904         while (page_count--) {
7905
7906                 if (dst_page->busy ||
7907                     dst_page->fictitious ||
7908                     dst_page->absent ||
7909                     dst_page->error ||
7910                     dst_page->cleaning ||
7911                     dst_page->restart ||
7912                     dst_page->laundry) {
7913                         retval = FALSE;
7914                         goto done;
7915                 }
7916                 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
7917                         retval = FALSE;
7918                         goto done;
7919                 }
7920                 dst_page->reference = TRUE;
7921
7922                 vm_page_wire(dst_page, tag, FALSE);
7923
7924                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7925                         SET_PAGE_DIRTY(dst_page, FALSE);
7926                 }
7927                 entry = (unsigned int)(dst_page->offset / PAGE_SIZE);
7928                 assert(entry >= 0 && entry < object->resident_page_count);
7929                 lite_list[entry>>5] |= 1 << (entry & 31);
7930
7931                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7932
7933                 if (phys_page > upl->highest_page)
7934                         upl->highest_page = phys_page;
7935
7936                 if (user_page_list) {
7937                         user_page_list[entry].phys_addr = phys_page;
7938                         user_page_list[entry].absent    = dst_page->absent;
7939                         user_page_list[entry].dirty     = dst_page->dirty;
7940                         user_page_list[entry].free_when_done   = dst_page->free_when_done;
7941                         user_page_list[entry].precious  = dst_page->precious;
7942                         user_page_list[entry].device    = FALSE;
7943                         user_page_list[entry].speculative = FALSE;
7944                         user_page_list[entry].cs_validated = FALSE;
7945                         user_page_list[entry].cs_tainted = FALSE;
7946                         user_page_list[entry].cs_nx     = FALSE;
7947                         user_page_list[entry].needed    = FALSE;
7948                         user_page_list[entry].mark      = FALSE;
7949                 }
7950                 if (delayed_unlock++ > 256) {
7951                         delayed_unlock = 0;
7952                         lck_mtx_yield(&vm_page_queue_lock);
7953
7954                         VM_CHECK_MEMORYSTATUS;
7955                 }
7956                 dst_page = (vm_page_t)vm_page_queue_next(&dst_page->listq);
7957         }
7958 done:
7959         vm_page_unlock_queues();
7960
7961         VM_CHECK_MEMORYSTATUS;
7962
7963         return (retval);
7964 }
7965
7966
7967 kern_return_t
7968 vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7969                              wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag, vm_object_offset_t *dst_offset, int page_count)
7970 {
7971         vm_page_t       dst_page;
7972         boolean_t       no_zero_fill = FALSE;
7973         int             interruptible;
7974         int             pages_wired = 0;
7975         int             pages_inserted = 0;
7976         int             entry = 0;
7977         uint64_t        delayed_ledger_update = 0;
7978         kern_return_t   ret = KERN_SUCCESS;
7979         int             grab_options;
7980         ppnum_t         phys_page;
7981
7982         vm_object_lock_assert_exclusive(object);
7983         assert(object->purgable != VM_PURGABLE_VOLATILE);
7984         assert(object->purgable != VM_PURGABLE_EMPTY);
7985         assert(object->pager == NULL);
7986         assert(object->copy == NULL);
7987         assert(object->shadow == NULL);
7988
7989         if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7990                 interruptible = THREAD_ABORTSAFE;
7991         else
7992                 interruptible = THREAD_UNINT;
7993
7994         if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7995                 no_zero_fill = TRUE;
7996
7997         grab_options = 0;
7998 #if CONFIG_SECLUDED_MEMORY
7999         if (object->can_grab_secluded) {
8000                 grab_options |= VM_PAGE_GRAB_SECLUDED;
8001         }
8002 #endif /* CONFIG_SECLUDED_MEMORY */
8003
8004         while (page_count--) {
8005
8006                 while ((dst_page = vm_page_grab_options(grab_options))
8007                        == VM_PAGE_NULL) {
8008
8009                         OSAddAtomic(page_count, &vm_upl_wait_for_pages);
8010
8011                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8012
8013                         if (vm_page_wait(interruptible) == FALSE) {
8014                                 /*
8015                                  * interrupted case
8016                                  */
8017                                 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8018
8019                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8020
8021                                 ret = MACH_SEND_INTERRUPTED;
8022                                 goto done;
8023                         }
8024                         OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8025
8026                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8027                 }
8028                 if (no_zero_fill == FALSE)
8029                         vm_page_zero_fill(dst_page);
8030                 else
8031                         dst_page->absent = TRUE;
8032
8033                 dst_page->reference = TRUE;
8034
8035                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8036                         SET_PAGE_DIRTY(dst_page, FALSE);
8037                 }
8038                 if (dst_page->absent == FALSE) {
8039                         assert(dst_page->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8040                         assert(dst_page->wire_count == 0);
8041                         dst_page->wire_count++;
8042                         dst_page->vm_page_q_state = VM_PAGE_IS_WIRED;
8043                         assert(dst_page->wire_count);
8044                         pages_wired++;
8045                         PAGE_WAKEUP_DONE(dst_page);
8046                 }
8047                 pages_inserted++;
8048
8049                 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
8050
8051                 lite_list[entry>>5] |= 1 << (entry & 31);
8052
8053                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8054
8055                 if (phys_page > upl->highest_page)
8056                         upl->highest_page = phys_page;
8057
8058                 if (user_page_list) {
8059                         user_page_list[entry].phys_addr = phys_page;
8060                         user_page_list[entry].absent    = dst_page->absent;
8061                         user_page_list[entry].dirty     = dst_page->dirty;
8062                         user_page_list[entry].free_when_done    = FALSE;
8063                         user_page_list[entry].precious  = FALSE;
8064                         user_page_list[entry].device    = FALSE;
8065                         user_page_list[entry].speculative = FALSE;
8066                         user_page_list[entry].cs_validated = FALSE;
8067                         user_page_list[entry].cs_tainted = FALSE;
8068                         user_page_list[entry].cs_nx     = FALSE;
8069                         user_page_list[entry].needed    = FALSE;
8070                         user_page_list[entry].mark      = FALSE;
8071                 }
8072                 entry++;
8073                 *dst_offset += PAGE_SIZE_64;
8074         }
8075 done:
8076         if (pages_wired) {
8077                 vm_page_lockspin_queues();
8078                 vm_page_wire_count += pages_wired;
8079                 vm_page_unlock_queues();
8080         }
8081         if (pages_inserted) {
8082                 if (object->internal) {
8083                         OSAddAtomic(pages_inserted, &vm_page_internal_count);
8084                 } else {
8085                         OSAddAtomic(pages_inserted, &vm_page_external_count);
8086                 }
8087         }
8088         if (delayed_ledger_update) {
8089                 task_t          owner;
8090
8091                 owner = object->vo_purgeable_owner;
8092                 assert(owner);
8093
8094                 /* more non-volatile bytes */
8095                 ledger_credit(owner->ledger,
8096                               task_ledgers.purgeable_nonvolatile,
8097                               delayed_ledger_update);
8098                 /* more footprint */
8099                 ledger_credit(owner->ledger,
8100                               task_ledgers.phys_footprint,
8101                               delayed_ledger_update);
8102         }
8103         return (ret);
8104 }
8105
8106
8107 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
8108
8109
8110 kern_return_t
8111 vm_object_iopl_request(
8112         vm_object_t             object,
8113         vm_object_offset_t      offset,
8114         upl_size_t              size,
8115         upl_t                   *upl_ptr,
8116         upl_page_info_array_t   user_page_list,
8117         unsigned int            *page_list_count,
8118         upl_control_flags_t     cntrl_flags,
8119         vm_tag_t                tag)
8120 {
8121         vm_page_t               dst_page;
8122         vm_object_offset_t      dst_offset;
8123         upl_size_t              xfer_size;
8124         upl_t                   upl = NULL;
8125         unsigned int            entry;
8126         wpl_array_t             lite_list = NULL;
8127         int                     no_zero_fill = FALSE;
8128         unsigned int            size_in_pages;
8129         u_int32_t               psize;
8130         kern_return_t           ret;
8131         vm_prot_t               prot;
8132         struct vm_object_fault_info fault_info;
8133         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
8134         struct  vm_page_delayed_work    *dwp;
8135         int                     dw_count;
8136         int                     dw_limit;
8137         int                     dw_index;
8138         boolean_t               caller_lookup;
8139         int                     io_tracking_flag = 0;
8140         int                     interruptible;
8141         ppnum_t                 phys_page;
8142
8143         boolean_t               set_cache_attr_needed = FALSE;
8144         boolean_t               free_wired_pages = FALSE;
8145         boolean_t               fast_path_empty_req = FALSE;
8146         boolean_t               fast_path_full_req = FALSE;
8147
8148         if (cntrl_flags & ~UPL_VALID_FLAGS) {
8149                 /*
8150                  * For forward compatibility's sake,
8151                  * reject any unknown flag.
8152                  */
8153                 return KERN_INVALID_VALUE;
8154         }
8155         if (vm_lopage_needed == FALSE)
8156                 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8157
8158         if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8159                 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
8160                         return KERN_INVALID_VALUE;
8161
8162                 if (object->phys_contiguous) {
8163                         if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8164                                 return KERN_INVALID_ADDRESS;
8165
8166                         if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8167                                 return KERN_INVALID_ADDRESS;
8168                 }
8169         }
8170         if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8171                 no_zero_fill = TRUE;
8172
8173         if (cntrl_flags & UPL_COPYOUT_FROM)
8174                 prot = VM_PROT_READ;
8175         else
8176                 prot = VM_PROT_READ | VM_PROT_WRITE;
8177
8178         if ((!object->internal) && (object->paging_offset != 0))
8179                 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8180
8181 #if CONFIG_IOSCHED || UPL_DEBUG
8182         if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8183                 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8184 #endif
8185
8186 #if CONFIG_IOSCHED
8187         if (object->io_tracking) {
8188                 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8189                 if (object != kernel_object)
8190                         io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8191         }
8192 #endif
8193
8194         if (object->phys_contiguous)
8195                 psize = PAGE_SIZE;
8196         else
8197                 psize = size;
8198
8199         if (cntrl_flags & UPL_SET_INTERNAL) {
8200                 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8201
8202                 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8203                 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8204                                            ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8205                 if (size == 0) {
8206                         user_page_list = NULL;
8207                         lite_list = NULL;
8208                 }
8209         } else {
8210                 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8211
8212                 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8213                 if (size == 0) {
8214                         lite_list = NULL;
8215                 }
8216         }
8217         if (user_page_list)
8218                 user_page_list[0].device = FALSE;
8219         *upl_ptr = upl;
8220
8221         upl->map_object = object;
8222         upl->size = size;
8223
8224         size_in_pages = size / PAGE_SIZE;
8225
8226         if (object == kernel_object &&
8227             !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8228                 upl->flags |= UPL_KERNEL_OBJECT;
8229 #if UPL_DEBUG
8230                 vm_object_lock(object);
8231 #else
8232                 vm_object_lock_shared(object);
8233 #endif
8234         } else {
8235                 vm_object_lock(object);
8236                 vm_object_activity_begin(object);
8237         }
8238         /*
8239          * paging in progress also protects the paging_offset
8240          */
8241         upl->offset = offset + object->paging_offset;
8242
8243         if (cntrl_flags & UPL_BLOCK_ACCESS) {
8244                 /*
8245                  * The user requested that access to the pages in this UPL
8246                  * be blocked until the UPL is commited or aborted.
8247                  */
8248                 upl->flags |= UPL_ACCESS_BLOCKED;
8249         }
8250
8251 #if CONFIG_IOSCHED || UPL_DEBUG
8252         if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8253                 vm_object_activity_begin(object);
8254                 queue_enter(&object->uplq, upl, upl_t, uplq);
8255         }
8256 #endif
8257
8258         if (object->phys_contiguous) {
8259
8260                 if (upl->flags & UPL_ACCESS_BLOCKED) {
8261                         assert(!object->blocked_access);
8262                         object->blocked_access = TRUE;
8263                 }
8264
8265                 vm_object_unlock(object);
8266
8267                 /*
8268                  * don't need any shadow mappings for this one
8269                  * since it is already I/O memory
8270                  */
8271                 upl->flags |= UPL_DEVICE_MEMORY;
8272
8273                 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8274
8275                 if (user_page_list) {
8276                         user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8277                         user_page_list[0].device = TRUE;
8278                 }
8279                 if (page_list_count != NULL) {
8280                         if (upl->flags & UPL_INTERNAL)
8281                                 *page_list_count = 0;
8282                         else
8283                                 *page_list_count = 1;
8284                 }
8285                 return KERN_SUCCESS;
8286         }
8287         if (object != kernel_object && object != compressor_object) {
8288                 /*
8289                  * Protect user space from future COW operations
8290                  */
8291 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8292                 if (!object->true_share &&
8293                     vm_object_tracking_inited) {
8294                         void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8295                         int num = 0;
8296
8297                         num = OSBacktrace(bt,
8298                                           VM_OBJECT_TRACKING_BTDEPTH);
8299                         btlog_add_entry(vm_object_tracking_btlog,
8300                                         object,
8301                                         VM_OBJECT_TRACKING_OP_TRUESHARE,
8302                                         bt,
8303                                         num);
8304                 }
8305 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8306
8307                 vm_object_lock_assert_exclusive(object);
8308                 object->true_share = TRUE;
8309
8310                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8311                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8312         }
8313
8314         if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8315             object->copy != VM_OBJECT_NULL) {
8316                 /*
8317                  * Honor copy-on-write obligations
8318                  *
8319                  * The caller is gathering these pages and
8320                  * might modify their contents.  We need to
8321                  * make sure that the copy object has its own
8322                  * private copies of these pages before we let
8323                  * the caller modify them.
8324                  *
8325                  * NOTE: someone else could map the original object
8326                  * after we've done this copy-on-write here, and they
8327                  * could then see an inconsistent picture of the memory
8328                  * while it's being modified via the UPL.  To prevent this,
8329                  * we would have to block access to these pages until the
8330                  * UPL is released.  We could use the UPL_BLOCK_ACCESS
8331                  * code path for that...
8332                  */
8333                 vm_object_update(object,
8334                                  offset,
8335                                  size,
8336                                  NULL,
8337                                  NULL,
8338                                  FALSE, /* should_return */
8339                                  MEMORY_OBJECT_COPY_SYNC,
8340                                  VM_PROT_NO_CHANGE);
8341 #if DEVELOPMENT || DEBUG
8342                 iopl_cow++;
8343                 iopl_cow_pages += size >> PAGE_SHIFT;
8344 #endif
8345         }
8346         if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8347             object->purgable != VM_PURGABLE_VOLATILE &&
8348             object->purgable != VM_PURGABLE_EMPTY &&
8349             object->copy == NULL &&
8350             size == object->vo_size &&
8351             offset == 0 &&
8352             object->shadow == NULL &&
8353             object->pager == NULL)
8354         {
8355                 if (object->resident_page_count == size_in_pages)
8356                 {
8357                         assert(object != compressor_object);
8358                         assert(object != kernel_object);
8359                         fast_path_full_req = TRUE;
8360                 }
8361                 else if (object->resident_page_count == 0)
8362                 {
8363                         assert(object != compressor_object);
8364                         assert(object != kernel_object);
8365                         fast_path_empty_req = TRUE;
8366                         set_cache_attr_needed = TRUE;
8367                 }
8368         }
8369
8370         if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8371                 interruptible = THREAD_ABORTSAFE;
8372         else
8373                 interruptible = THREAD_UNINT;
8374
8375         entry = 0;
8376
8377         xfer_size = size;
8378         dst_offset = offset;
8379         dw_count = 0;
8380
8381         if (fast_path_full_req) {
8382
8383                 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE)
8384                         goto finish;
8385                 /*
8386                  * we couldn't complete the processing of this request on the fast path
8387                  * so fall through to the slow path and finish up
8388                  */
8389
8390         } else if (fast_path_empty_req) {
8391
8392                 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8393                         ret = KERN_MEMORY_ERROR;
8394                         goto return_err;
8395                 }
8396                 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, tag, &dst_offset, size_in_pages);
8397
8398                 if (ret) {
8399                         free_wired_pages = TRUE;
8400                         goto return_err;
8401                 }
8402                 goto finish;
8403         }
8404
8405         fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8406         fault_info.user_tag  = 0;
8407         fault_info.lo_offset = offset;
8408         fault_info.hi_offset = offset + xfer_size;
8409         fault_info.no_cache  = FALSE;
8410         fault_info.stealth = FALSE;
8411         fault_info.io_sync = FALSE;
8412         fault_info.cs_bypass = FALSE;
8413         fault_info.mark_zf_absent = TRUE;
8414         fault_info.interruptible = interruptible;
8415         fault_info.batch_pmap_op = TRUE;
8416
8417         dwp = &dw_array[0];
8418         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8419
8420         while (xfer_size) {
8421                 vm_fault_return_t       result;
8422
8423                 dwp->dw_mask = 0;
8424
8425                 if (fast_path_full_req) {
8426                         /*
8427                          * if we get here, it means that we ran into a page
8428                          * state we couldn't handle in the fast path and
8429                          * bailed out to the slow path... since the order
8430                          * we look at pages is different between the 2 paths,
8431                          * the following check is needed to determine whether
8432                          * this page was already processed in the fast path
8433                          */
8434                         if (lite_list[entry>>5] & (1 << (entry & 31)))
8435                                 goto skip_page;
8436                 }
8437                 dst_page = vm_page_lookup(object, dst_offset);
8438
8439                 if (dst_page == VM_PAGE_NULL ||
8440                     dst_page->busy ||
8441                     dst_page->error ||
8442                     dst_page->restart ||
8443                     dst_page->absent ||
8444                     dst_page->fictitious) {
8445
8446                    if (object == kernel_object)
8447                            panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8448                    if (object == compressor_object)
8449                            panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8450
8451                    if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8452                            ret = KERN_MEMORY_ERROR;
8453                            goto return_err;
8454                    }
8455                    set_cache_attr_needed = TRUE;
8456
8457                    /*
8458                     * We just looked up the page and the result remains valid
8459                     * until the object lock is release, so send it to
8460                     * vm_fault_page() (as "dst_page"), to avoid having to
8461                     * look it up again there.
8462                     */
8463                    caller_lookup = TRUE;
8464
8465                    do {
8466                         vm_page_t       top_page;
8467                         kern_return_t   error_code;
8468
8469                         fault_info.cluster_size = xfer_size;
8470
8471                         vm_object_paging_begin(object);
8472
8473                         result = vm_fault_page(object, dst_offset,
8474                                                prot | VM_PROT_WRITE, FALSE,
8475                                                caller_lookup,
8476                                                &prot, &dst_page, &top_page,
8477                                                (int *)0,
8478                                                &error_code, no_zero_fill,
8479                                                FALSE, &fault_info);
8480
8481                         /* our lookup is no longer valid at this point */
8482                         caller_lookup = FALSE;
8483
8484                         switch (result) {
8485
8486                         case VM_FAULT_SUCCESS:
8487
8488                                 if ( !dst_page->absent) {
8489                                         PAGE_WAKEUP_DONE(dst_page);
8490                                 } else {
8491                                         /*
8492                                          * we only get back an absent page if we
8493                                          * requested that it not be zero-filled
8494                                          * because we are about to fill it via I/O
8495                                          *
8496                                          * absent pages should be left BUSY
8497                                          * to prevent them from being faulted
8498                                          * into an address space before we've
8499                                          * had a chance to complete the I/O on
8500                                          * them since they may contain info that
8501                                          * shouldn't be seen by the faulting task
8502                                          */
8503                                 }
8504                                 /*
8505                                  *      Release paging references and
8506                                  *      top-level placeholder page, if any.
8507                                  */
8508                                 if (top_page != VM_PAGE_NULL) {
8509                                         vm_object_t local_object;
8510
8511                                         local_object = VM_PAGE_OBJECT(top_page);
8512
8513                                         /*
8514                                          * comparing 2 packed pointers
8515                                          */
8516                                         if (top_page->vm_page_object != dst_page->vm_page_object) {
8517                                                 vm_object_lock(local_object);
8518                                                 VM_PAGE_FREE(top_page);
8519                                                 vm_object_paging_end(local_object);
8520                                                 vm_object_unlock(local_object);
8521                                         } else {
8522                                                 VM_PAGE_FREE(top_page);
8523                                                 vm_object_paging_end(local_object);
8524                                         }
8525                                 }
8526                                 vm_object_paging_end(object);
8527                                 break;
8528
8529                         case VM_FAULT_RETRY:
8530                                 vm_object_lock(object);
8531                                 break;
8532
8533                         case VM_FAULT_MEMORY_SHORTAGE:
8534                                 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8535
8536                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8537
8538                                 if (vm_page_wait(interruptible)) {
8539                                         OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8540
8541                                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8542                                         vm_object_lock(object);
8543
8544                                         break;
8545                                 }
8546                                 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8547
8548                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8549
8550                                 /* fall thru */
8551
8552                         case VM_FAULT_INTERRUPTED:
8553                                 error_code = MACH_SEND_INTERRUPTED;
8554                         case VM_FAULT_MEMORY_ERROR:
8555                         memory_error:
8556                                 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8557
8558                                 vm_object_lock(object);
8559                                 goto return_err;
8560
8561                         case VM_FAULT_SUCCESS_NO_VM_PAGE:
8562                                 /* success but no page: fail */
8563                                 vm_object_paging_end(object);
8564                                 vm_object_unlock(object);
8565                                 goto memory_error;
8566
8567                         default:
8568                                 panic("vm_object_iopl_request: unexpected error"
8569                                       " 0x%x from vm_fault_page()\n", result);
8570                         }
8571                    } while (result != VM_FAULT_SUCCESS);
8572
8573                 }
8574                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8575
8576                 if (upl->flags & UPL_KERNEL_OBJECT)
8577                         goto record_phys_addr;
8578
8579                 if (dst_page->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8580                         dst_page->busy = TRUE;
8581                         goto record_phys_addr;
8582                 }
8583
8584                 if (dst_page->cleaning) {
8585                         /*
8586                          * Someone else is cleaning this page in place.
8587                          * In theory, we should be able to  proceed and use this
8588                          * page but they'll probably end up clearing the "busy"
8589                          * bit on it in upl_commit_range() but they didn't set
8590                          * it, so they would clear our "busy" bit and open
8591                          * us to race conditions.
8592                          * We'd better wait for the cleaning to complete and
8593                          * then try again.
8594                          */
8595                         vm_object_iopl_request_sleep_for_cleaning++;
8596                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8597                         continue;
8598                 }
8599                 if (dst_page->laundry)
8600                         vm_pageout_steal_laundry(dst_page, FALSE);
8601
8602                 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8603                      phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8604                         vm_page_t       low_page;
8605                         int             refmod;
8606
8607                         /*
8608                          * support devices that can't DMA above 32 bits
8609                          * by substituting pages from a pool of low address
8610                          * memory for any pages we find above the 4G mark
8611                          * can't substitute if the page is already wired because
8612                          * we don't know whether that physical address has been
8613                          * handed out to some other 64 bit capable DMA device to use
8614                          */
8615                         if (VM_PAGE_WIRED(dst_page)) {
8616                                 ret = KERN_PROTECTION_FAILURE;
8617                                 goto return_err;
8618                         }
8619                         low_page = vm_page_grablo();
8620
8621                         if (low_page == VM_PAGE_NULL) {
8622                                 ret = KERN_RESOURCE_SHORTAGE;
8623                                 goto return_err;
8624                         }
8625                         /*
8626                          * from here until the vm_page_replace completes
8627                          * we musn't drop the object lock... we don't
8628                          * want anyone refaulting this page in and using
8629                          * it after we disconnect it... we want the fault
8630                          * to find the new page being substituted.
8631                          */
8632                         if (dst_page->pmapped)
8633                                 refmod = pmap_disconnect(phys_page);
8634                         else
8635                                 refmod = 0;
8636
8637                         if (!dst_page->absent)
8638                                 vm_page_copy(dst_page, low_page);
8639
8640                         low_page->reference = dst_page->reference;
8641                         low_page->dirty     = dst_page->dirty;
8642                         low_page->absent    = dst_page->absent;
8643
8644                         if (refmod & VM_MEM_REFERENCED)
8645                                 low_page->reference = TRUE;
8646                         if (refmod & VM_MEM_MODIFIED) {
8647                                 SET_PAGE_DIRTY(low_page, FALSE);
8648                         }
8649
8650                         vm_page_replace(low_page, object, dst_offset);
8651
8652                         dst_page = low_page;
8653                         /*
8654                          * vm_page_grablo returned the page marked
8655                          * BUSY... we don't need a PAGE_WAKEUP_DONE
8656                          * here, because we've never dropped the object lock
8657                          */
8658                         if ( !dst_page->absent)
8659                                 dst_page->busy = FALSE;
8660
8661                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8662                 }
8663                 if ( !dst_page->busy)
8664                         dwp->dw_mask |= DW_vm_page_wire;
8665
8666                 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8667                         /*
8668                          * Mark the page "busy" to block any future page fault
8669                          * on this page in addition to wiring it.
8670                          * We'll also remove the mapping
8671                          * of all these pages before leaving this routine.
8672                          */
8673                         assert(!dst_page->fictitious);
8674                         dst_page->busy = TRUE;
8675                 }
8676                 /*
8677                  * expect the page to be used
8678                  * page queues lock must be held to set 'reference'
8679                  */
8680                 dwp->dw_mask |= DW_set_reference;
8681
8682                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8683                         SET_PAGE_DIRTY(dst_page, TRUE);
8684                 }
8685                 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
8686                         pmap_sync_page_attributes_phys(phys_page);
8687                         dst_page->written_by_kernel = FALSE;
8688                 }
8689
8690 record_phys_addr:
8691                 if (dst_page->busy)
8692                         upl->flags |= UPL_HAS_BUSY;
8693
8694                 lite_list[entry>>5] |= 1 << (entry & 31);
8695
8696                 if (phys_page > upl->highest_page)
8697                         upl->highest_page = phys_page;
8698
8699                 if (user_page_list) {
8700                         user_page_list[entry].phys_addr = phys_page;
8701                         user_page_list[entry].free_when_done    = dst_page->free_when_done;
8702                         user_page_list[entry].absent    = dst_page->absent;
8703                         user_page_list[entry].dirty     = dst_page->dirty;
8704                         user_page_list[entry].precious  = dst_page->precious;
8705                         user_page_list[entry].device    = FALSE;
8706                         user_page_list[entry].needed    = FALSE;
8707                         if (dst_page->clustered == TRUE)
8708                                 user_page_list[entry].speculative = (dst_page->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8709                         else
8710                                 user_page_list[entry].speculative = FALSE;
8711                         user_page_list[entry].cs_validated = dst_page->cs_validated;
8712                         user_page_list[entry].cs_tainted = dst_page->cs_tainted;
8713                         user_page_list[entry].cs_nx = dst_page->cs_nx;
8714                         user_page_list[entry].mark      = FALSE;
8715                 }
8716                 if (object != kernel_object && object != compressor_object) {
8717                         /*
8718                          * someone is explicitly grabbing this page...
8719                          * update clustered and speculative state
8720                          *
8721                          */
8722                         if (dst_page->clustered)
8723                                 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8724                 }
8725 skip_page:
8726                 entry++;
8727                 dst_offset += PAGE_SIZE_64;
8728                 xfer_size -= PAGE_SIZE;
8729
8730                 if (dwp->dw_mask) {
8731                         VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8732
8733                         if (dw_count >= dw_limit) {
8734                                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8735
8736                                 dwp = &dw_array[0];
8737                                 dw_count = 0;
8738                         }
8739                 }
8740         }
8741         assert(entry == size_in_pages);
8742
8743         if (dw_count)
8744                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8745 finish:
8746         if (user_page_list && set_cache_attr_needed == TRUE)
8747                 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8748
8749         if (page_list_count != NULL) {
8750                 if (upl->flags & UPL_INTERNAL)
8751                         *page_list_count = 0;
8752                 else if (*page_list_count > size_in_pages)
8753                         *page_list_count = size_in_pages;
8754         }
8755         vm_object_unlock(object);
8756
8757         if (cntrl_flags & UPL_BLOCK_ACCESS) {
8758                 /*
8759                  * We've marked all the pages "busy" so that future
8760                  * page faults will block.
8761                  * Now remove the mapping for these pages, so that they
8762                  * can't be accessed without causing a page fault.
8763                  */
8764                 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8765                                        PMAP_NULL, 0, VM_PROT_NONE);
8766                 assert(!object->blocked_access);
8767                 object->blocked_access = TRUE;
8768         }
8769
8770         return KERN_SUCCESS;
8771
8772 return_err:
8773         dw_index = 0;
8774
8775         for (; offset < dst_offset; offset += PAGE_SIZE) {
8776                 boolean_t need_unwire;
8777
8778                 dst_page = vm_page_lookup(object, offset);
8779
8780                 if (dst_page == VM_PAGE_NULL)
8781                         panic("vm_object_iopl_request: Wired page missing. \n");
8782
8783                 /*
8784                  * if we've already processed this page in an earlier
8785                  * dw_do_work, we need to undo the wiring... we will
8786                  * leave the dirty and reference bits on if they
8787                  * were set, since we don't have a good way of knowing
8788                  * what the previous state was and we won't get here
8789                  * under any normal circumstances...  we will always
8790                  * clear BUSY and wakeup any waiters via vm_page_free
8791                  * or PAGE_WAKEUP_DONE
8792                  */
8793                 need_unwire = TRUE;
8794
8795                 if (dw_count) {
8796                         if (dw_array[dw_index].dw_m == dst_page) {
8797                                 /*
8798                                  * still in the deferred work list
8799                                  * which means we haven't yet called
8800                                  * vm_page_wire on this page
8801                                  */
8802                                 need_unwire = FALSE;
8803
8804                                 dw_index++;
8805                                 dw_count--;
8806                         }
8807                 }
8808                 vm_page_lock_queues();
8809
8810                 if (dst_page->absent || free_wired_pages == TRUE) {
8811                         vm_page_free(dst_page);
8812
8813                         need_unwire = FALSE;
8814                 } else {
8815                         if (need_unwire == TRUE)
8816                                 vm_page_unwire(dst_page, TRUE);
8817
8818                         PAGE_WAKEUP_DONE(dst_page);
8819                 }
8820                 vm_page_unlock_queues();
8821
8822                 if (need_unwire == TRUE)
8823                         VM_STAT_INCR(reactivations);
8824         }
8825 #if UPL_DEBUG
8826         upl->upl_state = 2;
8827 #endif
8828         if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8829                 vm_object_activity_end(object);
8830                 vm_object_collapse(object, 0, TRUE);
8831         }
8832         vm_object_unlock(object);
8833         upl_destroy(upl);
8834
8835         return ret;
8836 }
8837
8838 kern_return_t
8839 upl_transpose(
8840         upl_t           upl1,
8841         upl_t           upl2)
8842 {
8843         kern_return_t           retval;
8844         boolean_t               upls_locked;
8845         vm_object_t             object1, object2;
8846
8847         if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2  || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR)  || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8848                 return KERN_INVALID_ARGUMENT;
8849         }
8850
8851         upls_locked = FALSE;
8852
8853         /*
8854          * Since we need to lock both UPLs at the same time,
8855          * avoid deadlocks by always taking locks in the same order.
8856          */
8857         if (upl1 < upl2) {
8858                 upl_lock(upl1);
8859                 upl_lock(upl2);
8860         } else {
8861                 upl_lock(upl2);
8862                 upl_lock(upl1);
8863         }
8864         upls_locked = TRUE;     /* the UPLs will need to be unlocked */
8865
8866         object1 = upl1->map_object;
8867         object2 = upl2->map_object;
8868
8869         if (upl1->offset != 0 || upl2->offset != 0 ||
8870             upl1->size != upl2->size) {
8871                 /*
8872                  * We deal only with full objects, not subsets.
8873                  * That's because we exchange the entire backing store info
8874                  * for the objects: pager, resident pages, etc...  We can't do
8875                  * only part of it.
8876                  */
8877                 retval = KERN_INVALID_VALUE;
8878                 goto done;
8879         }
8880
8881         /*
8882          * Tranpose the VM objects' backing store.
8883          */
8884         retval = vm_object_transpose(object1, object2,
8885                                      (vm_object_size_t) upl1->size);
8886
8887         if (retval == KERN_SUCCESS) {
8888                 /*
8889                  * Make each UPL point to the correct VM object, i.e. the
8890                  * object holding the pages that the UPL refers to...
8891                  */
8892 #if CONFIG_IOSCHED || UPL_DEBUG
8893                 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8894                         vm_object_lock(object1);
8895                         vm_object_lock(object2);
8896                 }
8897                 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8898                         queue_remove(&object1->uplq, upl1, upl_t, uplq);
8899                 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8900                         queue_remove(&object2->uplq, upl2, upl_t, uplq);
8901 #endif
8902                 upl1->map_object = object2;
8903                 upl2->map_object = object1;
8904
8905 #if CONFIG_IOSCHED || UPL_DEBUG
8906                 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8907                         queue_enter(&object2->uplq, upl1, upl_t, uplq);
8908                 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8909                         queue_enter(&object1->uplq, upl2, upl_t, uplq);
8910                 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8911                         vm_object_unlock(object2);
8912                         vm_object_unlock(object1);
8913                 }
8914 #endif
8915         }
8916
8917 done:
8918         /*
8919          * Cleanup.
8920          */
8921         if (upls_locked) {
8922                 upl_unlock(upl1);
8923                 upl_unlock(upl2);
8924                 upls_locked = FALSE;
8925         }
8926
8927         return retval;
8928 }
8929
8930 void
8931 upl_range_needed(
8932         upl_t           upl,
8933         int             index,
8934         int             count)
8935 {
8936         upl_page_info_t *user_page_list;
8937         int             size_in_pages;
8938
8939         if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8940                 return;
8941
8942         size_in_pages = upl->size / PAGE_SIZE;
8943
8944         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8945
8946         while (count-- && index < size_in_pages)
8947                 user_page_list[index++].needed = TRUE;
8948 }
8949
8950
8951 /*
8952  * Reserve of virtual addresses in the kernel address space.
8953  * We need to map the physical pages in the kernel, so that we
8954  * can call the code-signing or slide routines with a kernel
8955  * virtual address.  We keep this pool of pre-allocated kernel
8956  * virtual addresses so that we don't have to scan the kernel's
8957  * virtaul address space each time we need to work with
8958  * a physical page.
8959  */
8960 decl_simple_lock_data(,vm_paging_lock)
8961 #define VM_PAGING_NUM_PAGES     64
8962 vm_map_offset_t vm_paging_base_address = 0;
8963 boolean_t       vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8964 int             vm_paging_max_index = 0;
8965 int             vm_paging_page_waiter = 0;
8966 int             vm_paging_page_waiter_total = 0;
8967 unsigned long   vm_paging_no_kernel_page = 0;
8968 unsigned long   vm_paging_objects_mapped = 0;
8969 unsigned long   vm_paging_pages_mapped = 0;
8970 unsigned long   vm_paging_objects_mapped_slow = 0;
8971 unsigned long   vm_paging_pages_mapped_slow = 0;
8972
8973 void
8974 vm_paging_map_init(void)
8975 {
8976         kern_return_t   kr;
8977         vm_map_offset_t page_map_offset;
8978         vm_map_entry_t  map_entry;
8979
8980         assert(vm_paging_base_address == 0);
8981
8982         /*
8983          * Initialize our pool of pre-allocated kernel
8984          * virtual addresses.
8985          */
8986         page_map_offset = 0;
8987         kr = vm_map_find_space(kernel_map,
8988                                &page_map_offset,
8989                                VM_PAGING_NUM_PAGES * PAGE_SIZE,
8990                                0,
8991                                0,
8992                                VM_MAP_KERNEL_FLAGS_NONE,
8993                                VM_KERN_MEMORY_NONE,
8994                                &map_entry);
8995         if (kr != KERN_SUCCESS) {
8996                 panic("vm_paging_map_init: kernel_map full\n");
8997         }
8998         VME_OBJECT_SET(map_entry, kernel_object);
8999         VME_OFFSET_SET(map_entry, page_map_offset);
9000         map_entry->protection = VM_PROT_NONE;
9001         map_entry->max_protection = VM_PROT_NONE;
9002         map_entry->permanent = TRUE;
9003         vm_object_reference(kernel_object);
9004         vm_map_unlock(kernel_map);
9005
9006         assert(vm_paging_base_address == 0);
9007         vm_paging_base_address = page_map_offset;
9008 }
9009
9010 /*
9011  * vm_paging_map_object:
9012  *      Maps part of a VM object's pages in the kernel
9013  *      virtual address space, using the pre-allocated
9014  *      kernel virtual addresses, if possible.
9015  * Context:
9016  *      The VM object is locked.  This lock will get
9017  *      dropped and re-acquired though, so the caller
9018  *      must make sure the VM object is kept alive
9019  *      (by holding a VM map that has a reference
9020  *      on it, for example, or taking an extra reference).
9021  *      The page should also be kept busy to prevent
9022  *      it from being reclaimed.
9023  */
9024 kern_return_t
9025 vm_paging_map_object(
9026         vm_page_t               page,
9027         vm_object_t             object,
9028         vm_object_offset_t      offset,
9029         vm_prot_t               protection,
9030         boolean_t               can_unlock_object,
9031         vm_map_size_t           *size,          /* IN/OUT */
9032         vm_map_offset_t         *address,       /* OUT */
9033         boolean_t               *need_unmap)    /* OUT */
9034 {
9035         kern_return_t           kr;
9036         vm_map_offset_t         page_map_offset;
9037         vm_map_size_t           map_size;
9038         vm_object_offset_t      object_offset;
9039         int                     i;
9040
9041         if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
9042                 /* use permanent 1-to-1 kernel mapping of physical memory ? */
9043 #if __x86_64__
9044                 *address = (vm_map_offset_t)
9045                         PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
9046                                      PAGE_SHIFT);
9047                 *need_unmap = FALSE;
9048                 return KERN_SUCCESS;
9049 #elif __arm__ || __arm64__
9050                 *address = (vm_map_offset_t)
9051                         phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
9052                 *need_unmap = FALSE;
9053                 return KERN_SUCCESS;
9054 #else
9055 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
9056 #endif
9057
9058                 assert(page->busy);
9059                 /*
9060                  * Use one of the pre-allocated kernel virtual addresses
9061                  * and just enter the VM page in the kernel address space
9062                  * at that virtual address.
9063                  */
9064                 simple_lock(&vm_paging_lock);
9065
9066                 /*
9067                  * Try and find an available kernel virtual address
9068                  * from our pre-allocated pool.
9069                  */
9070                 page_map_offset = 0;
9071                 for (;;) {
9072                         for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
9073                                 if (vm_paging_page_inuse[i] == FALSE) {
9074                                         page_map_offset =
9075                                                 vm_paging_base_address +
9076                                                 (i * PAGE_SIZE);
9077                                         break;
9078                                 }
9079                         }
9080                         if (page_map_offset != 0) {
9081                                 /* found a space to map our page ! */
9082                                 break;
9083                         }
9084
9085                         if (can_unlock_object) {
9086                                 /*
9087                                  * If we can afford to unlock the VM object,
9088                                  * let's take the slow path now...
9089                                  */
9090                                 break;
9091                         }
9092                         /*
9093                          * We can't afford to unlock the VM object, so
9094                          * let's wait for a space to become available...
9095                          */
9096                         vm_paging_page_waiter_total++;
9097                         vm_paging_page_waiter++;
9098                         kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
9099                         if (kr == THREAD_WAITING) {
9100                                 simple_unlock(&vm_paging_lock);
9101                                 kr = thread_block(THREAD_CONTINUE_NULL);
9102                                 simple_lock(&vm_paging_lock);
9103                         }
9104                         vm_paging_page_waiter--;
9105                         /* ... and try again */
9106                 }
9107
9108                 if (page_map_offset != 0) {
9109                         /*
9110                          * We found a kernel virtual address;
9111                          * map the physical page to that virtual address.
9112                          */
9113                         if (i > vm_paging_max_index) {
9114                                 vm_paging_max_index = i;
9115                         }
9116                         vm_paging_page_inuse[i] = TRUE;
9117                         simple_unlock(&vm_paging_lock);
9118
9119                         page->pmapped = TRUE;
9120
9121                         /*
9122                          * Keep the VM object locked over the PMAP_ENTER
9123                          * and the actual use of the page by the kernel,
9124                          * or this pmap mapping might get undone by a
9125                          * vm_object_pmap_protect() call...
9126                          */
9127                         PMAP_ENTER(kernel_pmap,
9128                                    page_map_offset,
9129                                    page,
9130                                    protection,
9131                                    VM_PROT_NONE,
9132                                    0,
9133                                    TRUE,
9134                                    kr);
9135                         assert(kr == KERN_SUCCESS);
9136                         vm_paging_objects_mapped++;
9137                         vm_paging_pages_mapped++;
9138                         *address = page_map_offset;
9139                         *need_unmap = TRUE;
9140
9141 #if KASAN
9142                         kasan_notify_address(page_map_offset, PAGE_SIZE);
9143 #endif
9144
9145                         /* all done and mapped, ready to use ! */
9146                         return KERN_SUCCESS;
9147                 }
9148
9149                 /*
9150                  * We ran out of pre-allocated kernel virtual
9151                  * addresses.  Just map the page in the kernel
9152                  * the slow and regular way.
9153                  */
9154                 vm_paging_no_kernel_page++;
9155                 simple_unlock(&vm_paging_lock);
9156         }
9157
9158         if (! can_unlock_object) {
9159                 *address = 0;
9160                 *size = 0;
9161                 *need_unmap = FALSE;
9162                 return KERN_NOT_SUPPORTED;
9163         }
9164
9165         object_offset = vm_object_trunc_page(offset);
9166         map_size = vm_map_round_page(*size,
9167                                      VM_MAP_PAGE_MASK(kernel_map));
9168
9169         /*
9170          * Try and map the required range of the object
9171          * in the kernel_map
9172          */
9173
9174         vm_object_reference_locked(object);     /* for the map entry */
9175         vm_object_unlock(object);
9176
9177         kr = vm_map_enter(kernel_map,
9178                           address,
9179                           map_size,
9180                           0,
9181                           VM_FLAGS_ANYWHERE,
9182                           VM_MAP_KERNEL_FLAGS_NONE,
9183                           VM_KERN_MEMORY_NONE,
9184                           object,
9185                           object_offset,
9186                           FALSE,
9187                           protection,
9188                           VM_PROT_ALL,
9189                           VM_INHERIT_NONE);
9190         if (kr != KERN_SUCCESS) {
9191                 *address = 0;
9192                 *size = 0;
9193                 *need_unmap = FALSE;
9194                 vm_object_deallocate(object);   /* for the map entry */
9195                 vm_object_lock(object);
9196                 return kr;
9197         }
9198
9199         *size = map_size;
9200
9201         /*
9202          * Enter the mapped pages in the page table now.
9203          */
9204         vm_object_lock(object);
9205         /*
9206          * VM object must be kept locked from before PMAP_ENTER()
9207          * until after the kernel is done accessing the page(s).
9208          * Otherwise, the pmap mappings in the kernel could be
9209          * undone by a call to vm_object_pmap_protect().
9210          */
9211
9212         for (page_map_offset = 0;
9213              map_size != 0;
9214              map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9215
9216                 page = vm_page_lookup(object, offset + page_map_offset);
9217                 if (page == VM_PAGE_NULL) {
9218                         printf("vm_paging_map_object: no page !?");
9219                         vm_object_unlock(object);
9220                         kr = vm_map_remove(kernel_map, *address, *size,
9221                                            VM_MAP_NO_FLAGS);
9222                         assert(kr == KERN_SUCCESS);
9223                         *address = 0;
9224                         *size = 0;
9225                         *need_unmap = FALSE;
9226                         vm_object_lock(object);
9227                         return KERN_MEMORY_ERROR;
9228                 }
9229                 page->pmapped = TRUE;
9230
9231                 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9232                 PMAP_ENTER(kernel_pmap,
9233                            *address + page_map_offset,
9234                            page,
9235                            protection,
9236                            VM_PROT_NONE,
9237                            0,
9238                            TRUE,
9239                            kr);
9240                 assert(kr == KERN_SUCCESS);
9241 #if KASAN
9242                 kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9243 #endif
9244         }
9245
9246         vm_paging_objects_mapped_slow++;
9247         vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9248
9249         *need_unmap = TRUE;
9250
9251         return KERN_SUCCESS;
9252 }
9253
9254 /*
9255  * vm_paging_unmap_object:
9256  *      Unmaps part of a VM object's pages from the kernel
9257  *      virtual address space.
9258  * Context:
9259  *      The VM object is locked.  This lock will get
9260  *      dropped and re-acquired though.
9261  */
9262 void
9263 vm_paging_unmap_object(
9264         vm_object_t     object,
9265         vm_map_offset_t start,
9266         vm_map_offset_t end)
9267 {
9268         kern_return_t   kr;
9269         int             i;
9270
9271         if ((vm_paging_base_address == 0) ||
9272             (start < vm_paging_base_address) ||
9273             (end > (vm_paging_base_address
9274                      + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9275                 /*
9276                  * We didn't use our pre-allocated pool of
9277                  * kernel virtual address.  Deallocate the
9278                  * virtual memory.
9279                  */
9280                 if (object != VM_OBJECT_NULL) {
9281                         vm_object_unlock(object);
9282                 }
9283                 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
9284                 if (object != VM_OBJECT_NULL) {
9285                         vm_object_lock(object);
9286                 }
9287                 assert(kr == KERN_SUCCESS);
9288         } else {
9289                 /*
9290                  * We used a kernel virtual address from our
9291                  * pre-allocated pool.  Put it back in the pool
9292                  * for next time.
9293                  */
9294                 assert(end - start == PAGE_SIZE);
9295                 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9296                 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9297
9298                 /* undo the pmap mapping */
9299                 pmap_remove(kernel_pmap, start, end);
9300
9301                 simple_lock(&vm_paging_lock);
9302                 vm_paging_page_inuse[i] = FALSE;
9303                 if (vm_paging_page_waiter) {
9304                         thread_wakeup(&vm_paging_page_waiter);
9305                 }
9306                 simple_unlock(&vm_paging_lock);
9307         }
9308 }
9309
9310
9311 /*
9312  * page->object must be locked
9313  */
9314 void
9315 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9316 {
9317         if (!queues_locked) {
9318                 vm_page_lockspin_queues();
9319         }
9320
9321         page->free_when_done = FALSE;
9322         /*
9323          * need to drop the laundry count...
9324          * we may also need to remove it
9325          * from the I/O paging queue...
9326          * vm_pageout_throttle_up handles both cases
9327          *
9328          * the laundry and pageout_queue flags are cleared...
9329          */
9330         vm_pageout_throttle_up(page);
9331
9332         vm_page_steal_pageout_page++;
9333
9334         if (!queues_locked) {
9335                 vm_page_unlock_queues();
9336         }
9337 }
9338
9339 upl_t
9340 vector_upl_create(vm_offset_t upl_offset)
9341 {
9342         int     vector_upl_size  = sizeof(struct _vector_upl);
9343         int i=0;
9344         upl_t   upl;
9345         vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9346
9347         upl = upl_create(0,UPL_VECTOR,0);
9348         upl->vector_upl = vector_upl;
9349         upl->offset = upl_offset;
9350         vector_upl->size = 0;
9351         vector_upl->offset = upl_offset;
9352         vector_upl->invalid_upls=0;
9353         vector_upl->num_upls=0;
9354         vector_upl->pagelist = NULL;
9355
9356         for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9357                 vector_upl->upl_iostates[i].size = 0;
9358                 vector_upl->upl_iostates[i].offset = 0;
9359
9360         }
9361         return upl;
9362 }
9363
9364 void
9365 vector_upl_deallocate(upl_t upl)
9366 {
9367         if(upl) {
9368                 vector_upl_t vector_upl = upl->vector_upl;
9369                 if(vector_upl) {
9370                         if(vector_upl->invalid_upls != vector_upl->num_upls)
9371                                 panic("Deallocating non-empty Vectored UPL\n");
9372                         kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9373                         vector_upl->invalid_upls=0;
9374                         vector_upl->num_upls = 0;
9375                         vector_upl->pagelist = NULL;
9376                         vector_upl->size = 0;
9377                         vector_upl->offset = 0;
9378                         kfree(vector_upl, sizeof(struct _vector_upl));
9379                         vector_upl = (vector_upl_t)0xfeedfeed;
9380                 }
9381                 else
9382                         panic("vector_upl_deallocate was passed a non-vectored upl\n");
9383         }
9384         else
9385                 panic("vector_upl_deallocate was passed a NULL upl\n");
9386 }
9387
9388 boolean_t
9389 vector_upl_is_valid(upl_t upl)
9390 {
9391         if(upl &&  ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9392                 vector_upl_t vector_upl = upl->vector_upl;
9393                 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9394                         return FALSE;
9395                 else
9396                         return TRUE;
9397         }
9398         return FALSE;
9399 }
9400
9401 boolean_t
9402 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9403 {
9404         if(vector_upl_is_valid(upl)) {
9405                 vector_upl_t vector_upl = upl->vector_upl;
9406
9407                 if(vector_upl) {
9408                         if(subupl) {
9409                                 if(io_size) {
9410                                         if(io_size < PAGE_SIZE)
9411                                                 io_size = PAGE_SIZE;
9412                                         subupl->vector_upl = (void*)vector_upl;
9413                                         vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9414                                         vector_upl->size += io_size;
9415                                         upl->size += io_size;
9416                                 }
9417                                 else {
9418                                         uint32_t i=0,invalid_upls=0;
9419                                         for(i = 0; i < vector_upl->num_upls; i++) {
9420                                                 if(vector_upl->upl_elems[i] == subupl)
9421                                                         break;
9422                                         }
9423                                         if(i == vector_upl->num_upls)
9424                                                 panic("Trying to remove sub-upl when none exists");
9425
9426                                         vector_upl->upl_elems[i] = NULL;
9427                                         invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9428                                         if(invalid_upls == vector_upl->num_upls)
9429                                                 return TRUE;
9430                                         else
9431                                                 return FALSE;
9432                                 }
9433                         }
9434                         else
9435                                 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9436                 }
9437                 else
9438                         panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9439         }
9440         else
9441                 panic("vector_upl_set_subupl was passed a NULL upl\n");
9442
9443         return FALSE;
9444 }
9445
9446 void
9447 vector_upl_set_pagelist(upl_t upl)
9448 {
9449         if(vector_upl_is_valid(upl)) {
9450                 uint32_t i=0;
9451                 vector_upl_t vector_upl = upl->vector_upl;
9452
9453                 if(vector_upl) {
9454                         vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
9455
9456                         vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9457
9458                         for(i=0; i < vector_upl->num_upls; i++) {
9459                                 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9460                                 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9461                                 pagelist_size += cur_upl_pagelist_size;
9462                                 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9463                                         upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9464                         }
9465                         assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9466                 }
9467                 else
9468                         panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9469         }
9470         else
9471                 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9472
9473 }
9474
9475 upl_t
9476 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9477 {
9478         if(vector_upl_is_valid(upl)) {
9479                 vector_upl_t vector_upl = upl->vector_upl;
9480                 if(vector_upl) {
9481                         if(index < vector_upl->num_upls)
9482                                 return vector_upl->upl_elems[index];
9483                 }
9484                 else
9485                         panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9486         }
9487         return NULL;
9488 }
9489
9490 upl_t
9491 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9492 {
9493         if(vector_upl_is_valid(upl)) {
9494                 uint32_t i=0;
9495                 vector_upl_t vector_upl = upl->vector_upl;
9496
9497                 if(vector_upl) {
9498                         upl_t subupl = NULL;
9499                         vector_upl_iostates_t subupl_state;
9500
9501                         for(i=0; i < vector_upl->num_upls; i++) {
9502                                 subupl = vector_upl->upl_elems[i];
9503                                 subupl_state = vector_upl->upl_iostates[i];
9504                                 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9505                                         /* We could have been passed an offset/size pair that belongs
9506                                          * to an UPL element that has already been committed/aborted.
9507                                          * If so, return NULL.
9508                                          */
9509                                         if(subupl == NULL)
9510                                                 return NULL;
9511                                         if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9512                                                 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9513                                                 if(*upl_size > subupl_state.size)
9514                                                         *upl_size = subupl_state.size;
9515                                         }
9516                                         if(*upl_offset >= subupl_state.offset)
9517                                                 *upl_offset -= subupl_state.offset;
9518                                         else if(i)
9519                                                 panic("Vector UPL offset miscalculation\n");
9520                                         return subupl;
9521                                 }
9522                         }
9523                 }
9524                 else
9525                         panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9526         }
9527         return NULL;
9528 }
9529
9530 void
9531 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9532 {
9533         *v_upl_submap = NULL;
9534
9535         if(vector_upl_is_valid(upl)) {
9536                 vector_upl_t vector_upl = upl->vector_upl;
9537                 if(vector_upl) {
9538                         *v_upl_submap = vector_upl->submap;
9539                         *submap_dst_addr = vector_upl->submap_dst_addr;
9540                 }
9541                 else
9542                         panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9543         }
9544         else
9545                 panic("vector_upl_get_submap was passed a null UPL\n");
9546 }
9547
9548 void
9549 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9550 {
9551         if(vector_upl_is_valid(upl)) {
9552                 vector_upl_t vector_upl = upl->vector_upl;
9553                 if(vector_upl) {
9554                         vector_upl->submap = submap;
9555                         vector_upl->submap_dst_addr = submap_dst_addr;
9556                 }
9557                 else
9558                         panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9559         }
9560         else
9561                 panic("vector_upl_get_submap was passed a NULL UPL\n");
9562 }
9563
9564 void
9565 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9566 {
9567         if(vector_upl_is_valid(upl)) {
9568                 uint32_t i = 0;
9569                 vector_upl_t vector_upl = upl->vector_upl;
9570
9571                 if(vector_upl) {
9572                         for(i = 0; i < vector_upl->num_upls; i++) {
9573                                 if(vector_upl->upl_elems[i] == subupl)
9574                                         break;
9575                         }
9576
9577                         if(i == vector_upl->num_upls)
9578                                 panic("setting sub-upl iostate when none exists");
9579
9580                         vector_upl->upl_iostates[i].offset = offset;
9581                         if(size < PAGE_SIZE)
9582                                 size = PAGE_SIZE;
9583                         vector_upl->upl_iostates[i].size = size;
9584                 }
9585                 else
9586                         panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9587         }
9588         else
9589                 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9590 }
9591
9592 void
9593 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9594 {
9595         if(vector_upl_is_valid(upl)) {
9596                 uint32_t i = 0;
9597                 vector_upl_t vector_upl = upl->vector_upl;
9598
9599                 if(vector_upl) {
9600                         for(i = 0; i < vector_upl->num_upls; i++) {
9601                                 if(vector_upl->upl_elems[i] == subupl)
9602                                         break;
9603                         }
9604
9605                         if(i == vector_upl->num_upls)
9606                                 panic("getting sub-upl iostate when none exists");
9607
9608                         *offset = vector_upl->upl_iostates[i].offset;
9609                         *size = vector_upl->upl_iostates[i].size;
9610                 }
9611                 else
9612                         panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9613         }
9614         else
9615                 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9616 }
9617
9618 void
9619 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9620 {
9621         if(vector_upl_is_valid(upl)) {
9622                 vector_upl_t vector_upl = upl->vector_upl;
9623                 if(vector_upl) {
9624                         if(index < vector_upl->num_upls) {
9625                                 *offset = vector_upl->upl_iostates[index].offset;
9626                                 *size = vector_upl->upl_iostates[index].size;
9627                         }
9628                         else
9629                                 *offset = *size = 0;
9630                 }
9631                 else
9632                         panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9633         }
9634         else
9635                 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9636 }
9637
9638 upl_page_info_t *
9639 upl_get_internal_vectorupl_pagelist(upl_t upl)
9640 {
9641         return ((vector_upl_t)(upl->vector_upl))->pagelist;
9642 }
9643
9644 void *
9645 upl_get_internal_vectorupl(upl_t upl)
9646 {
9647         return upl->vector_upl;
9648 }
9649
9650 vm_size_t
9651 upl_get_internal_pagelist_offset(void)
9652 {
9653         return sizeof(struct upl);
9654 }
9655
9656 void
9657 upl_clear_dirty(
9658         upl_t           upl,
9659         boolean_t       value)
9660 {
9661         if (value) {
9662                 upl->flags |= UPL_CLEAR_DIRTY;
9663         } else {
9664                 upl->flags &= ~UPL_CLEAR_DIRTY;
9665         }
9666 }
9667
9668 void
9669 upl_set_referenced(
9670         upl_t           upl,
9671         boolean_t       value)
9672 {
9673         upl_lock(upl);
9674         if (value) {
9675                 upl->ext_ref_count++;
9676         } else {
9677                 if (!upl->ext_ref_count) {
9678                         panic("upl_set_referenced not %p\n", upl);
9679                 }
9680                 upl->ext_ref_count--;
9681         }
9682         upl_unlock(upl);
9683 }
9684
9685 #if CONFIG_IOSCHED
9686 void
9687 upl_set_blkno(
9688         upl_t           upl,
9689         vm_offset_t     upl_offset,
9690         int             io_size,
9691         int64_t         blkno)
9692 {
9693                 int i,j;
9694                 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
9695                         return;
9696
9697                 assert(upl->upl_reprio_info != 0);
9698                 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9699                         UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9700                 }
9701 }
9702 #endif
9703
9704 boolean_t
9705 vm_page_is_slideable(vm_page_t m)
9706 {
9707         boolean_t result = FALSE;
9708         vm_shared_region_slide_info_t si;
9709         vm_object_t     m_object;
9710
9711         m_object = VM_PAGE_OBJECT(m);
9712
9713         vm_object_lock_assert_held(m_object);
9714
9715         /* make sure our page belongs to the one object allowed to do this */
9716         if (!m_object->object_slid) {
9717                 goto done;
9718         }
9719
9720         si = m_object->vo_slide_info;
9721         if (si == NULL) {
9722                 goto done;
9723         }
9724
9725         if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
9726                 result = TRUE;
9727         }
9728
9729 done:
9730         return result;
9731 }
9732
9733 int vm_page_slide_counter = 0;
9734 int vm_page_slide_errors = 0;
9735 kern_return_t
9736 vm_page_slide(
9737         vm_page_t       page,
9738         vm_map_offset_t kernel_mapping_offset)
9739 {
9740         kern_return_t           kr;
9741         vm_map_size_t           kernel_mapping_size;
9742         boolean_t               kernel_mapping_needs_unmap;
9743         vm_offset_t             kernel_vaddr;
9744         uint32_t                pageIndex;
9745         uint32_t                slide_chunk;
9746         vm_object_t             page_object;
9747
9748         page_object = VM_PAGE_OBJECT(page);
9749
9750         assert(!page->slid);
9751         assert(page_object->object_slid);
9752         vm_object_lock_assert_exclusive(page_object);
9753
9754         if (page->error)
9755                 return KERN_FAILURE;
9756
9757         /*
9758          * Take a paging-in-progress reference to keep the object
9759          * alive even if we have to unlock it (in vm_paging_map_object()
9760          * for example)...
9761          */
9762         vm_object_paging_begin(page_object);
9763
9764         if (kernel_mapping_offset == 0) {
9765                 /*
9766                  * The page hasn't already been mapped in kernel space
9767                  * by the caller.  Map it now, so that we can access
9768                  * its contents and decrypt them.
9769                  */
9770                 kernel_mapping_size = PAGE_SIZE;
9771                 kernel_mapping_needs_unmap = FALSE;
9772                 kr = vm_paging_map_object(page,
9773                                           page_object,
9774                                           page->offset,
9775                                           VM_PROT_READ | VM_PROT_WRITE,
9776                                           FALSE,
9777                                           &kernel_mapping_size,
9778                                           &kernel_mapping_offset,
9779                                           &kernel_mapping_needs_unmap);
9780                 if (kr != KERN_SUCCESS) {
9781                         panic("vm_page_slide: "
9782                               "could not map page in kernel: 0x%x\n",
9783                               kr);
9784                 }
9785         } else {
9786                 kernel_mapping_size = 0;
9787                 kernel_mapping_needs_unmap = FALSE;
9788         }
9789         kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
9790
9791         /*
9792          * Slide the pointers on the page.
9793          */
9794
9795         /*assert that slide_file_info.start/end are page-aligned?*/
9796
9797         assert(!page->slid);
9798         assert(page_object->object_slid);
9799
9800         pageIndex = (uint32_t)((page->offset -
9801                                 page_object->vo_slide_info->start) /
9802                                PAGE_SIZE_FOR_SR_SLIDE);
9803         for (slide_chunk = 0;
9804              slide_chunk < PAGE_SIZE / PAGE_SIZE_FOR_SR_SLIDE;
9805              slide_chunk++) {
9806                 kr = vm_shared_region_slide_page(page_object->vo_slide_info,
9807                                                  (kernel_vaddr +
9808                                                   (slide_chunk *
9809                                                    PAGE_SIZE_FOR_SR_SLIDE)),
9810                                                  (pageIndex + slide_chunk));
9811                 if (kr != KERN_SUCCESS) {
9812                         break;
9813                 }
9814         }
9815
9816         vm_page_slide_counter++;
9817
9818         /*
9819          * Unmap the page from the kernel's address space,
9820          */
9821         if (kernel_mapping_needs_unmap) {
9822                 vm_paging_unmap_object(page_object,
9823                                        kernel_vaddr,
9824                                        kernel_vaddr + PAGE_SIZE);
9825         }
9826
9827         page->dirty = FALSE;
9828         pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(page), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
9829
9830         if (kr != KERN_SUCCESS || cs_debug > 1) {
9831                 printf("vm_page_slide(%p): "
9832                        "obj %p off 0x%llx mobj %p moff 0x%llx\n",
9833                        page,
9834                        page_object, page->offset,
9835                        page_object->pager,
9836                        page->offset + page_object->paging_offset);
9837         }
9838
9839         if (kr == KERN_SUCCESS) {
9840                 page->slid = TRUE;
9841         } else {
9842                 page->error = TRUE;
9843                 vm_page_slide_errors++;
9844         }
9845
9846         vm_object_paging_end(page_object);
9847
9848         return kr;
9849 }
9850
9851 void inline memoryshot(unsigned int event, unsigned int control)
9852 {
9853         if (vm_debug_events) {
9854                 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9855                                         vm_page_active_count, vm_page_inactive_count,
9856                                         vm_page_free_count, vm_page_speculative_count,
9857                                         vm_page_throttled_count);
9858         } else {
9859                 (void) event;
9860                 (void) control;
9861         }
9862
9863 }
9864
9865 #ifdef MACH_BSD
9866
9867 boolean_t  upl_device_page(upl_page_info_t *upl)
9868 {
9869         return(UPL_DEVICE_PAGE(upl));
9870 }
9871 boolean_t  upl_page_present(upl_page_info_t *upl, int index)
9872 {
9873         return(UPL_PAGE_PRESENT(upl, index));
9874 }
9875 boolean_t  upl_speculative_page(upl_page_info_t *upl, int index)
9876 {
9877         return(UPL_SPECULATIVE_PAGE(upl, index));
9878 }
9879 boolean_t  upl_dirty_page(upl_page_info_t *upl, int index)
9880 {
9881         return(UPL_DIRTY_PAGE(upl, index));
9882 }
9883 boolean_t  upl_valid_page(upl_page_info_t *upl, int index)
9884 {
9885         return(UPL_VALID_PAGE(upl, index));
9886 }
9887 ppnum_t  upl_phys_page(upl_page_info_t *upl, int index)
9888 {
9889         return(UPL_PHYS_PAGE(upl, index));
9890 }
9891
9892 void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9893 {
9894         upl[index].mark = v;
9895 }
9896
9897 boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
9898 {
9899         return upl[index].mark;
9900 }
9901
9902 void
9903 vm_countdirtypages(void)
9904 {
9905         vm_page_t m;
9906         int dpages;
9907         int pgopages;
9908         int precpages;
9909
9910
9911         dpages=0;
9912         pgopages=0;
9913         precpages=0;
9914
9915         vm_page_lock_queues();
9916         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9917         do {
9918                 if (m ==(vm_page_t )0) break;
9919
9920                 if(m->dirty) dpages++;
9921                 if(m->free_when_done) pgopages++;
9922                 if(m->precious) precpages++;
9923
9924                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9925                 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9926                 if (m ==(vm_page_t )0) break;
9927
9928         } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9929         vm_page_unlock_queues();
9930
9931         vm_page_lock_queues();
9932         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9933         do {
9934                 if (m ==(vm_page_t )0) break;
9935
9936                 dpages++;
9937                 assert(m->dirty);
9938                 assert(!m->free_when_done);
9939                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9940                 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9941                 if (m ==(vm_page_t )0) break;
9942
9943         } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9944         vm_page_unlock_queues();
9945
9946         vm_page_lock_queues();
9947         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9948         do {
9949                 if (m ==(vm_page_t )0) break;
9950
9951                 if(m->dirty) dpages++;
9952                 if(m->free_when_done) pgopages++;
9953                 if(m->precious) precpages++;
9954
9955                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9956                 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9957                 if (m ==(vm_page_t )0) break;
9958
9959         } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9960         vm_page_unlock_queues();
9961
9962         printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9963
9964         dpages=0;
9965         pgopages=0;
9966         precpages=0;
9967
9968         vm_page_lock_queues();
9969         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9970
9971         do {
9972                 if(m == (vm_page_t )0) break;
9973                 if(m->dirty) dpages++;
9974                 if(m->free_when_done) pgopages++;
9975                 if(m->precious) precpages++;
9976
9977                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9978                 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9979                 if(m == (vm_page_t )0) break;
9980
9981         } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9982         vm_page_unlock_queues();
9983
9984         printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9985
9986 }
9987 #endif /* MACH_BSD */
9988
9989
9990 #if CONFIG_IOSCHED
9991 int upl_get_cached_tier(upl_t  upl)
9992 {
9993        assert(upl);
9994        if (upl->flags & UPL_TRACKED_BY_OBJECT)
9995                return (upl->upl_priority);
9996        return (-1);
9997 }
9998 #endif /* CONFIG_IOSCHED */
9999
10000 ppnum_t upl_get_highest_page(
10001                              upl_t                      upl)
10002 {
10003         return upl->highest_page;
10004 }
10005
10006 upl_size_t upl_get_size(
10007                              upl_t                      upl)
10008 {
10009         return upl->size;
10010 }
10011
10012 upl_t upl_associated_upl(upl_t upl)
10013 {
10014         return upl->associated_upl;
10015 }
10016
10017 void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
10018 {
10019         upl->associated_upl = associated_upl;
10020 }
10021
10022 struct vnode * upl_lookup_vnode(upl_t upl)
10023 {
10024         if (!upl->map_object->internal)
10025                 return vnode_pager_lookup_vnode(upl->map_object->pager);
10026         else
10027                 return NULL;
10028 }
10029
10030 #if UPL_DEBUG
10031 kern_return_t  upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
10032 {
10033         upl->ubc_alias1 = alias1;
10034         upl->ubc_alias2 = alias2;
10035         return KERN_SUCCESS;
10036 }
10037 int  upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
10038 {
10039         if(al)
10040                 *al = upl->ubc_alias1;
10041         if(al2)
10042                 *al2 = upl->ubc_alias2;
10043         return KERN_SUCCESS;
10044 }
10045 #endif /* UPL_DEBUG */
10046
10047 #if VM_PRESSURE_EVENTS
10048 /*
10049  * Upward trajectory.
10050  */
10051 extern boolean_t vm_compressor_low_on_space(void);
10052
10053 boolean_t
10054 VM_PRESSURE_NORMAL_TO_WARNING(void)     {
10055
10056         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10057
10058                 /* Available pages below our threshold */
10059                 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
10060                         /* No frozen processes to kill */
10061                         if (memorystatus_frozen_count == 0) {
10062                                 /* Not enough suspended processes available. */
10063                                 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
10064                                         return TRUE;
10065                                 }
10066                         }
10067                 }
10068                 return FALSE;
10069
10070         } else {
10071                 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
10072         }
10073 }
10074
10075 boolean_t
10076 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
10077
10078         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10079
10080                 /* Available pages below our threshold */
10081                 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
10082                         return TRUE;
10083                 }
10084                 return FALSE;
10085         } else {
10086                 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10087         }
10088 }
10089
10090 /*
10091  * Downward trajectory.
10092  */
10093 boolean_t
10094 VM_PRESSURE_WARNING_TO_NORMAL(void) {
10095
10096         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10097
10098                 /* Available pages above our threshold */
10099                 unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100));
10100                 if (memorystatus_available_pages > target_threshold) {
10101                         return TRUE;
10102                 }
10103                 return FALSE;
10104         } else {
10105                 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
10106         }
10107 }
10108
10109 boolean_t
10110 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
10111
10112         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10113
10114                 /* Available pages above our threshold */
10115                 unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100));
10116                 if (memorystatus_available_pages > target_threshold) {
10117                         return TRUE;
10118                 }
10119                 return FALSE;
10120         } else {
10121                 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10122         }
10123 }
10124 #endif /* VM_PRESSURE_EVENTS */
10125
10126
10127
10128 #define VM_TEST_COLLAPSE_COMPRESSOR             0
10129 #define VM_TEST_WIRE_AND_EXTRACT                0
10130 #define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC        0
10131 #if __arm64__
10132 #define VM_TEST_KERNEL_OBJECT_FAULT             0
10133 #endif /* __arm64__ */
10134 #define VM_TEST_DEVICE_PAGER_TRANSPOSE          (DEVELOPMENT || DEBUG)
10135
10136 #if VM_TEST_COLLAPSE_COMPRESSOR
10137 extern boolean_t vm_object_collapse_compressor_allowed;
10138 #include <IOKit/IOLib.h>
10139 static void
10140 vm_test_collapse_compressor(void)
10141 {
10142         vm_object_size_t        backing_size, top_size;
10143         vm_object_t             backing_object, top_object;
10144         vm_map_offset_t         backing_offset, top_offset;
10145         unsigned char           *backing_address, *top_address;
10146         kern_return_t           kr;
10147
10148         printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
10149
10150         /* create backing object */
10151         backing_size = 15 * PAGE_SIZE;
10152         backing_object = vm_object_allocate(backing_size);
10153         assert(backing_object != VM_OBJECT_NULL);
10154         printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
10155                 backing_object);
10156         /* map backing object */
10157         backing_offset = 0;
10158         kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
10159                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
10160                           backing_object, 0, FALSE,
10161                           VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
10162         assert(kr == KERN_SUCCESS);
10163         backing_address = (unsigned char *) backing_offset;
10164         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10165                "mapped backing object %p at 0x%llx\n",
10166                backing_object, (uint64_t) backing_offset);
10167         /* populate with pages to be compressed in backing object */
10168         backing_address[0x1*PAGE_SIZE] = 0xB1;
10169         backing_address[0x4*PAGE_SIZE] = 0xB4;
10170         backing_address[0x7*PAGE_SIZE] = 0xB7;
10171         backing_address[0xa*PAGE_SIZE] = 0xBA;
10172         backing_address[0xd*PAGE_SIZE] = 0xBD;
10173         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10174                "populated pages to be compressed in "
10175                "backing_object %p\n", backing_object);
10176         /* compress backing object */
10177         vm_object_pageout(backing_object);
10178         printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
10179                backing_object);
10180         /* wait for all the pages to be gone */
10181         while (*(volatile int *)&backing_object->resident_page_count != 0)
10182                 IODelay(10);
10183         printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
10184                backing_object);
10185         /* populate with pages to be resident in backing object */
10186         backing_address[0x0*PAGE_SIZE] = 0xB0;
10187         backing_address[0x3*PAGE_SIZE] = 0xB3;
10188         backing_address[0x6*PAGE_SIZE] = 0xB6;
10189         backing_address[0x9*PAGE_SIZE] = 0xB9;
10190         backing_address[0xc*PAGE_SIZE] = 0xBC;
10191         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10192                "populated pages to be resident in "
10193                "backing_object %p\n", backing_object);
10194         /* leave the other pages absent */
10195         /* mess with the paging_offset of the backing_object */
10196         assert(backing_object->paging_offset == 0);
10197         backing_object->paging_offset = 0x3000;
10198
10199         /* create top object */
10200         top_size = 9 * PAGE_SIZE;
10201         top_object = vm_object_allocate(top_size);
10202         assert(top_object != VM_OBJECT_NULL);
10203         printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
10204                 top_object);
10205         /* map top object */
10206         top_offset = 0;
10207         kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
10208                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
10209                           top_object, 0, FALSE,
10210                           VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
10211         assert(kr == KERN_SUCCESS);
10212         top_address = (unsigned char *) top_offset;
10213         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10214                "mapped top object %p at 0x%llx\n",
10215                top_object, (uint64_t) top_offset);
10216         /* populate with pages to be compressed in top object */
10217         top_address[0x3*PAGE_SIZE] = 0xA3;
10218         top_address[0x4*PAGE_SIZE] = 0xA4;
10219         top_address[0x5*PAGE_SIZE] = 0xA5;
10220         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10221                "populated pages to be compressed in "
10222                "top_object %p\n", top_object);
10223         /* compress top object */
10224         vm_object_pageout(top_object);
10225         printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
10226                top_object);
10227         /* wait for all the pages to be gone */
10228         while (top_object->resident_page_count != 0)
10229                 IODelay(10);
10230         printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
10231                top_object);
10232         /* populate with pages to be resident in top object */
10233         top_address[0x0*PAGE_SIZE] = 0xA0;
10234         top_address[0x1*PAGE_SIZE] = 0xA1;
10235         top_address[0x2*PAGE_SIZE] = 0xA2;
10236         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10237                "populated pages to be resident in "
10238                "top_object %p\n", top_object);
10239         /* leave the other pages absent */
10240
10241         /* link the 2 objects */
10242         vm_object_reference(backing_object);
10243         top_object->shadow = backing_object;
10244         top_object->vo_shadow_offset = 0x3000;
10245         printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
10246                top_object, backing_object);
10247
10248         /* unmap backing object */
10249         vm_map_remove(kernel_map,
10250                       backing_offset,
10251                       backing_offset + backing_size,
10252                       0);
10253         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10254                "unmapped backing_object %p [0x%llx:0x%llx]\n",
10255                backing_object,
10256                (uint64_t) backing_offset,
10257                (uint64_t) (backing_offset + backing_size));
10258
10259         /* collapse */
10260         printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
10261         vm_object_lock(top_object);
10262         vm_object_collapse(top_object, 0, FALSE);
10263         vm_object_unlock(top_object);
10264         printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
10265
10266         /* did it work? */
10267         if (top_object->shadow != VM_OBJECT_NULL) {
10268                 printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
10269                 printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10270                 if (vm_object_collapse_compressor_allowed) {
10271                         panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10272                 }
10273         } else {
10274                 /* check the contents of the mapping */
10275                 unsigned char expect[9] =
10276                         { 0xA0, 0xA1, 0xA2,     /* resident in top */
10277                           0xA3, 0xA4, 0xA5,     /* compressed in top */
10278                           0xB9, /* resident in backing + shadow_offset */
10279                           0xBD, /* compressed in backing + shadow_offset + paging_offset */
10280                           0x00 };               /* absent in both */
10281                 unsigned char actual[9];
10282                 unsigned int i, errors;
10283
10284                 errors = 0;
10285                 for (i = 0; i < sizeof (actual); i++) {
10286                         actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
10287                         if (actual[i] != expect[i]) {
10288                                 errors++;
10289                         }
10290                 }
10291                 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10292                        "actual [%x %x %x %x %x %x %x %x %x] "
10293                        "expect [%x %x %x %x %x %x %x %x %x] "
10294                        "%d errors\n",
10295                        actual[0], actual[1], actual[2], actual[3],
10296                        actual[4], actual[5], actual[6], actual[7],
10297                        actual[8],
10298                        expect[0], expect[1], expect[2], expect[3],
10299                        expect[4], expect[5], expect[6], expect[7],
10300                        expect[8],
10301                        errors);
10302                 if (errors) {
10303                         panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10304                 } else {
10305                         printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10306                 }
10307         }
10308 }
10309 #else /* VM_TEST_COLLAPSE_COMPRESSOR */
10310 #define vm_test_collapse_compressor()
10311 #endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10312
10313 #if VM_TEST_WIRE_AND_EXTRACT
10314 extern ledger_template_t        task_ledger_template;
10315 #include <mach/mach_vm.h>
10316 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
10317                                     vm_offset_t offset);
10318 static void
10319 vm_test_wire_and_extract(void)
10320 {
10321         ledger_t                ledger;
10322         vm_map_t                user_map, wire_map;
10323         mach_vm_address_t       user_addr, wire_addr;
10324         mach_vm_size_t          user_size, wire_size;
10325         mach_vm_offset_t        cur_offset;
10326         vm_prot_t               cur_prot, max_prot;
10327         ppnum_t                 user_ppnum, wire_ppnum;
10328         kern_return_t           kr;
10329
10330         ledger = ledger_instantiate(task_ledger_template,
10331                                     LEDGER_CREATE_ACTIVE_ENTRIES);
10332         user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
10333                                  0x100000000ULL,
10334                                  0x200000000ULL,
10335                                  TRUE);
10336         wire_map = vm_map_create(NULL,
10337                                  0x100000000ULL,
10338                                  0x200000000ULL,
10339                                  TRUE);
10340         user_addr = 0;
10341         user_size = 0x10000;
10342         kr = mach_vm_allocate(user_map,
10343                               &user_addr,
10344                               user_size,
10345                               VM_FLAGS_ANYWHERE);
10346         assert(kr == KERN_SUCCESS);
10347         wire_addr = 0;
10348         wire_size = user_size;
10349         kr = mach_vm_remap(wire_map,
10350                            &wire_addr,
10351                            wire_size,
10352                            0,
10353                            VM_FLAGS_ANYWHERE,
10354                            user_map,
10355                            user_addr,
10356                            FALSE,
10357                            &cur_prot,
10358                            &max_prot,
10359                            VM_INHERIT_NONE);
10360         assert(kr == KERN_SUCCESS);
10361         for (cur_offset = 0;
10362              cur_offset < wire_size;
10363              cur_offset += PAGE_SIZE) {
10364                 kr = vm_map_wire_and_extract(wire_map,
10365                                              wire_addr + cur_offset,
10366                                              VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
10367                                              TRUE,
10368                                              &wire_ppnum);
10369                 assert(kr == KERN_SUCCESS);
10370                 user_ppnum = vm_map_get_phys_page(user_map,
10371                                                   user_addr + cur_offset);
10372                 printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10373                        "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10374                        kr,
10375                        user_map, user_addr + cur_offset, user_ppnum,
10376                        wire_map, wire_addr + cur_offset, wire_ppnum);
10377                 if (kr != KERN_SUCCESS ||
10378                     wire_ppnum == 0 ||
10379                     wire_ppnum != user_ppnum) {
10380                         panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10381                 }
10382         }
10383         cur_offset -= PAGE_SIZE;
10384         kr = vm_map_wire_and_extract(wire_map,
10385                                      wire_addr + cur_offset,
10386                                      VM_PROT_DEFAULT,
10387                                      TRUE,
10388                                      &wire_ppnum);
10389         assert(kr == KERN_SUCCESS);
10390         printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10391                "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10392                kr,
10393                user_map, user_addr + cur_offset, user_ppnum,
10394                wire_map, wire_addr + cur_offset, wire_ppnum);
10395         if (kr != KERN_SUCCESS ||
10396             wire_ppnum == 0 ||
10397             wire_ppnum != user_ppnum) {
10398                 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10399         }
10400
10401         printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10402 }
10403 #else /* VM_TEST_WIRE_AND_EXTRACT */
10404 #define vm_test_wire_and_extract()
10405 #endif /* VM_TEST_WIRE_AND_EXTRACT */
10406
10407 #if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10408 static void
10409 vm_test_page_wire_overflow_panic(void)
10410 {
10411         vm_object_t object;
10412         vm_page_t page;
10413
10414         printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10415
10416         object = vm_object_allocate(PAGE_SIZE);
10417         vm_object_lock(object);
10418         page = vm_page_alloc(object, 0x0);
10419         vm_page_lock_queues();
10420         do {
10421                 vm_page_wire(page, 1, FALSE);
10422         } while (page->wire_count != 0);
10423         vm_page_unlock_queues();
10424         vm_object_unlock(object);
10425         panic("FBDP(%p,%p): wire_count overflow not detected\n",
10426               object, page);
10427 }
10428 #else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10429 #define vm_test_page_wire_overflow_panic()
10430 #endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10431
10432 #if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10433 extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
10434 static void
10435 vm_test_kernel_object_fault(void)
10436 {
10437         kern_return_t kr;
10438         vm_offset_t stack;
10439         uintptr_t frameb[2];
10440         int ret;
10441
10442         kr = kernel_memory_allocate(kernel_map, &stack,
10443                                     kernel_stack_size + (2*PAGE_SIZE),
10444                                     0,
10445                                     (KMA_KSTACK | KMA_KOBJECT |
10446                                      KMA_GUARD_FIRST | KMA_GUARD_LAST),
10447                                     VM_KERN_MEMORY_STACK);
10448         if (kr != KERN_SUCCESS) {
10449                 panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
10450         }
10451         ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
10452         if (ret != 0) {
10453                 printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10454         } else {
10455                 printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10456         }
10457         vm_map_remove(kernel_map,
10458                       stack,
10459                       stack + kernel_stack_size + (2*PAGE_SIZE),
10460                       VM_MAP_REMOVE_KUNWIRE);
10461         stack = 0;
10462 }
10463 #else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10464 #define vm_test_kernel_object_fault()
10465 #endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10466
10467 #if VM_TEST_DEVICE_PAGER_TRANSPOSE
10468 static void
10469 vm_test_device_pager_transpose(void)
10470 {
10471         memory_object_t device_pager;
10472         vm_object_t     anon_object, device_object;
10473         vm_size_t       size;
10474         vm_map_offset_t anon_mapping, device_mapping;
10475         kern_return_t   kr;
10476
10477         size = 3 * PAGE_SIZE;
10478         anon_object = vm_object_allocate(size);
10479         assert(anon_object != VM_OBJECT_NULL);
10480         device_pager = device_pager_setup(NULL, 0, size, 0);
10481         assert(device_pager != NULL);
10482         device_object = memory_object_to_vm_object(device_pager);
10483         assert(device_object != VM_OBJECT_NULL);
10484         anon_mapping = 0;
10485         kr = vm_map_enter(kernel_map, &anon_mapping, size, 0,
10486                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
10487                           anon_object, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
10488                           VM_INHERIT_DEFAULT);
10489         assert(kr == KERN_SUCCESS);
10490         device_mapping = 0;
10491         kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, 0,
10492                                      VM_FLAGS_ANYWHERE,
10493                                      VM_MAP_KERNEL_FLAGS_NONE,
10494                                      VM_KERN_MEMORY_NONE,
10495                                      (void *)device_pager, 0, FALSE,
10496                                      VM_PROT_DEFAULT, VM_PROT_ALL,
10497                                      VM_INHERIT_DEFAULT);
10498         assert(kr == KERN_SUCCESS);
10499         memory_object_deallocate(device_pager);
10500
10501         vm_object_lock(anon_object);
10502         vm_object_activity_begin(anon_object);
10503         anon_object->blocked_access = TRUE;
10504         vm_object_unlock(anon_object);
10505         vm_object_lock(device_object);
10506         vm_object_activity_begin(device_object);
10507         device_object->blocked_access = TRUE;
10508         vm_object_unlock(device_object);
10509
10510         assert(anon_object->ref_count == 1);
10511         assert(!anon_object->named);
10512         assert(device_object->ref_count == 2);
10513         assert(device_object->named);
10514
10515         kr = vm_object_transpose(device_object, anon_object, size);
10516         assert(kr == KERN_SUCCESS);
10517
10518         vm_object_lock(anon_object);
10519         vm_object_activity_end(anon_object);
10520         anon_object->blocked_access = FALSE;
10521         vm_object_unlock(anon_object);
10522         vm_object_lock(device_object);
10523         vm_object_activity_end(device_object);
10524         device_object->blocked_access = FALSE;
10525         vm_object_unlock(device_object);
10526
10527         assert(anon_object->ref_count == 2);
10528         assert(anon_object->named);
10529         kr = vm_deallocate(kernel_map, anon_mapping, size);
10530         assert(kr == KERN_SUCCESS);
10531         assert(device_object->ref_count == 1);
10532         assert(!device_object->named);
10533         kr = vm_deallocate(kernel_map, device_mapping, size);
10534         assert(kr == KERN_SUCCESS);
10535
10536         printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10537 }
10538 #else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10539 #define vm_test_device_pager_transpose()
10540 #endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10541
10542 void
10543 vm_tests(void)
10544 {
10545         vm_test_collapse_compressor();
10546         vm_test_wire_and_extract();
10547         vm_test_page_wire_overflow_panic();
10548         vm_test_kernel_object_fault();
10549         vm_test_device_pager_transpose();
10550 }