osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67 #include <libkern/OSDebug.h>
  68
  69 #include <mach/clock_types.h>
  70 #include <mach/vm_prot.h>
  71 #include <mach/vm_statistics.h>
  72 #include <mach/sdt.h>
  73 #include <kern/counters.h>
  74 #include <kern/sched_prim.h>
  75 #include <kern/policy_internal.h>
  76 #include <kern/task.h>
  77 #include <kern/thread.h>
  78 #include <kern/kalloc.h>
  79 #include <kern/zalloc.h>
  80 #include <kern/ledger.h>
  81 #include <vm/pmap.h>
  82 #include <vm/vm_init.h>
  83 #include <vm/vm_map.h>
  84 #include <vm/vm_page.h>
  85 #include <vm/vm_pageout.h>
  86 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  87 #include <kern/misc_protos.h>
  88 #include <zone_debug.h>
  89 #include <mach_debug/zone_info.h>
  90 #include <vm/cpm.h>
  91 #include <pexpert/pexpert.h>
  92 #include <san/kasan.h>
  93
  94 #include <vm/vm_protos.h>
  95 #include <vm/memory_object.h>
  96 #include <vm/vm_purgeable_internal.h>
  97 #include <vm/vm_compressor.h>
  98 #if defined (__x86_64__)
  99 #include <i386/misc_protos.h>
 100 #endif
 101
 102 #if CONFIG_PHANTOM_CACHE
 103 #include <vm/vm_phantom_cache.h>
 104 #endif
 105
 106 #include <IOKit/IOHibernatePrivate.h>
 107
 108 #include <sys/kdebug.h>
 109
 110 #if defined(HAS_APPLE_PAC)
 111 #include <ptrauth.h>
 112 #endif
 113
 114 #if MACH_ASSERT
 115
 116 #define ASSERT_PMAP_FREE(mem) pmap_assert_free(VM_PAGE_GET_PHYS_PAGE(mem))
 117
 118 #else /* MACH_ASSERT */
 119
 120 #define ASSERT_PMAP_FREE(mem) /* nothing */
 121
 122 #endif /* MACH_ASSERT */
 123
 124 extern boolean_t vm_pageout_running;
 125 extern thread_t  vm_pageout_scan_thread;
 126 extern boolean_t vps_dynamic_priority_enabled;
 127
 128 char    vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 129 char    vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 130 char    vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 131 char    vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 132
 133 #if CONFIG_SECLUDED_MEMORY
 134 struct vm_page_secluded_data vm_page_secluded;
 135 void secluded_suppression_init(void);
 136 #endif /* CONFIG_SECLUDED_MEMORY */
 137
 138 boolean_t       hibernate_cleaning_in_progress = FALSE;
 139 boolean_t       vm_page_free_verify = TRUE;
 140
 141 uint32_t        vm_lopage_free_count = 0;
 142 uint32_t        vm_lopage_free_limit = 0;
 143 uint32_t        vm_lopage_lowater    = 0;
 144 boolean_t       vm_lopage_refill = FALSE;
 145 boolean_t       vm_lopage_needed = FALSE;
 146
 147 lck_mtx_ext_t   vm_page_queue_lock_ext;
 148 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 149 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 150
 151 int             speculative_age_index = 0;
 152 int             speculative_steal_index = 0;
 153 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 154
 155 boolean_t       hibernation_vmqueues_inspection = FALSE; /* Tracks if the hibernation code is looking at the VM queues.
 156                                                           * Updated and checked behind the vm_page_queues_lock. */
 157
 158 __private_extern__ void         vm_page_init_lck_grp(void);
 159
 160 static void             vm_page_free_prepare(vm_page_t  page);
 161 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 162
 163 static void vm_tag_init(void);
 164
 165 uint64_t        vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
 166 uint32_t        vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
 167 uint32_t        vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
 168
 169 /*
 170  *      Associated with page of user-allocatable memory is a
 171  *      page structure.
 172  */
 173
 174 /*
 175  *      These variables record the values returned by vm_page_bootstrap,
 176  *      for debugging purposes.  The implementation of pmap_steal_memory
 177  *      and pmap_startup here also uses them internally.
 178  */
 179
 180 vm_offset_t virtual_space_start;
 181 vm_offset_t virtual_space_end;
 182 uint32_t        vm_page_pages;
 183
 184 /*
 185  *      The vm_page_lookup() routine, which provides for fast
 186  *      (virtual memory object, offset) to page lookup, employs
 187  *      the following hash table.  The vm_page_{insert,remove}
 188  *      routines install and remove associations in the table.
 189  *      [This table is often called the virtual-to-physical,
 190  *      or VP, table.]
 191  */
 192 typedef struct {
 193         vm_page_packed_t page_list;
 194 #if     MACH_PAGE_HASH_STATS
 195         int             cur_count;              /* current count */
 196         int             hi_count;               /* high water mark */
 197 #endif /* MACH_PAGE_HASH_STATS */
 198 } vm_page_bucket_t;
 199
 200
 201 #define BUCKETS_PER_LOCK        16
 202
 203 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 204 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 205 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 206 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 207 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 208 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 209
 210 #ifndef VM_TAG_ACTIVE_UPDATE
 211 #error VM_TAG_ACTIVE_UPDATE
 212 #endif
 213 #ifndef VM_MAX_TAG_ZONES
 214 #error VM_MAX_TAG_ZONES
 215 #endif
 216
 217 boolean_t   vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
 218 lck_spin_t      *vm_page_bucket_locks;
 219 lck_spin_t      vm_objects_wired_lock;
 220 lck_spin_t      vm_allocation_sites_lock;
 221
 222 vm_allocation_site_t            vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
 223 vm_allocation_site_t *          vm_allocation_sites[VM_MAX_TAG_VALUE];
 224 #if VM_MAX_TAG_ZONES
 225 vm_allocation_zone_total_t **   vm_allocation_zone_totals;
 226 #endif /* VM_MAX_TAG_ZONES */
 227
 228 vm_tag_t vm_allocation_tag_highest;
 229
 230 #if VM_PAGE_BUCKETS_CHECK
 231 boolean_t vm_page_buckets_check_ready = FALSE;
 232 #if VM_PAGE_FAKE_BUCKETS
 233 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
 234 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
 235 #endif /* VM_PAGE_FAKE_BUCKETS */
 236 #endif /* VM_PAGE_BUCKETS_CHECK */
 237
 238
 239
 240 #if     MACH_PAGE_HASH_STATS
 241 /* This routine is only for debug.  It is intended to be called by
 242  * hand by a developer using a kernel debugger.  This routine prints
 243  * out vm_page_hash table statistics to the kernel debug console.
 244  */
 245 void
 246 hash_debug(void)
 247 {
 248         int     i;
 249         int     numbuckets = 0;
 250         int     highsum = 0;
 251         int     maxdepth = 0;
 252
 253         for (i = 0; i < vm_page_bucket_count; i++) {
 254                 if (vm_page_buckets[i].hi_count) {
 255                         numbuckets++;
 256                         highsum += vm_page_buckets[i].hi_count;
 257                         if (vm_page_buckets[i].hi_count > maxdepth) {
 258                                 maxdepth = vm_page_buckets[i].hi_count;
 259                         }
 260                 }
 261         }
 262         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 263         printf("Number used buckets:     %d = %d%%\n",
 264             numbuckets, 100 * numbuckets / vm_page_bucket_count);
 265         printf("Number unused buckets:   %d = %d%%\n",
 266             vm_page_bucket_count - numbuckets,
 267             100 * (vm_page_bucket_count - numbuckets) / vm_page_bucket_count);
 268         printf("Sum of bucket max depth: %d\n", highsum);
 269         printf("Average bucket depth:    %d.%2d\n",
 270             highsum / vm_page_bucket_count,
 271             highsum % vm_page_bucket_count);
 272         printf("Maximum bucket depth:    %d\n", maxdepth);
 273 }
 274 #endif /* MACH_PAGE_HASH_STATS */
 275
 276 /*
 277  *      The virtual page size is currently implemented as a runtime
 278  *      variable, but is constant once initialized using vm_set_page_size.
 279  *      This initialization must be done in the machine-dependent
 280  *      bootstrap sequence, before calling other machine-independent
 281  *      initializations.
 282  *
 283  *      All references to the virtual page size outside this
 284  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 285  *      constants.
 286  */
 287 #if defined(__arm__) || defined(__arm64__)
 288 vm_size_t       page_size;
 289 vm_size_t       page_mask;
 290 int             page_shift;
 291 #else
 292 vm_size_t       page_size  = PAGE_SIZE;
 293 vm_size_t       page_mask  = PAGE_MASK;
 294 int             page_shift = PAGE_SHIFT;
 295 #endif
 296
 297 vm_page_t       vm_pages = VM_PAGE_NULL;
 298 vm_page_t       vm_page_array_beginning_addr;
 299 vm_page_t       vm_page_array_ending_addr;
 300
 301 unsigned int    vm_pages_count = 0;
 302
 303 /*
 304  *      Resident pages that represent real memory
 305  *      are allocated from a set of free lists,
 306  *      one per color.
 307  */
 308 unsigned int    vm_colors;
 309 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 310 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 311 unsigned int    vm_free_magazine_refill_limit = 0;
 312
 313
 314 struct vm_page_queue_free_head {
 315         vm_page_queue_head_t    qhead;
 316 } __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 317
 318 struct vm_page_queue_free_head  vm_page_queue_free[MAX_COLORS];
 319
 320
 321 unsigned int    vm_page_free_wanted;
 322 unsigned int    vm_page_free_wanted_privileged;
 323 #if CONFIG_SECLUDED_MEMORY
 324 unsigned int    vm_page_free_wanted_secluded;
 325 #endif /* CONFIG_SECLUDED_MEMORY */
 326 unsigned int    vm_page_free_count;
 327
 328 /*
 329  *      Occasionally, the virtual memory system uses
 330  *      resident page structures that do not refer to
 331  *      real pages, for example to leave a page with
 332  *      important state information in the VP table.
 333  *
 334  *      These page structures are allocated the way
 335  *      most other kernel structures are.
 336  */
 337 zone_t  vm_page_array_zone;
 338 zone_t  vm_page_zone;
 339 vm_locks_array_t vm_page_locks;
 340 decl_lck_mtx_data(, vm_page_alloc_lock);
 341 lck_mtx_ext_t vm_page_alloc_lock_ext;
 342
 343 unsigned int    vm_page_local_q_count = 0;
 344 unsigned int    vm_page_local_q_soft_limit = 250;
 345 unsigned int    vm_page_local_q_hard_limit = 500;
 346 struct vplq     *vm_page_local_q = NULL;
 347
 348 /* N.B. Guard and fictitious pages must not
 349  * be assigned a zero phys_page value.
 350  */
 351 /*
 352  *      Fictitious pages don't have a physical address,
 353  *      but we must initialize phys_page to something.
 354  *      For debugging, this should be a strange value
 355  *      that the pmap module can recognize in assertions.
 356  */
 357 const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 358
 359 /*
 360  *      Guard pages are not accessible so they don't
 361  *      need a physical address, but we need to enter
 362  *      one in the pmap.
 363  *      Let's make it recognizable and make sure that
 364  *      we don't use a real physical page with that
 365  *      physical address.
 366  */
 367 const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 368
 369 /*
 370  *      Resident page structures are also chained on
 371  *      queues that are used by the page replacement
 372  *      system (pageout daemon).  These queues are
 373  *      defined here, but are shared by the pageout
 374  *      module.  The inactive queue is broken into
 375  *      file backed and anonymous for convenience as the
 376  *      pageout daemon often assignes a higher
 377  *      importance to anonymous pages (less likely to pick)
 378  */
 379 vm_page_queue_head_t    vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 380 vm_page_queue_head_t    vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 381 #if CONFIG_SECLUDED_MEMORY
 382 vm_page_queue_head_t    vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 383 #endif /* CONFIG_SECLUDED_MEMORY */
 384 vm_page_queue_head_t    vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));  /* inactive memory queue for anonymous pages */
 385 vm_page_queue_head_t    vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 386
 387 queue_head_t    vm_objects_wired;
 388
 389 void vm_update_darkwake_mode(boolean_t);
 390
 391 #if CONFIG_BACKGROUND_QUEUE
 392 vm_page_queue_head_t    vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 393 uint32_t        vm_page_background_target;
 394 uint32_t        vm_page_background_target_snapshot;
 395 uint32_t        vm_page_background_count;
 396 uint64_t        vm_page_background_promoted_count;
 397
 398 uint32_t        vm_page_background_internal_count;
 399 uint32_t        vm_page_background_external_count;
 400
 401 uint32_t        vm_page_background_mode;
 402 uint32_t        vm_page_background_exclude_external;
 403 #endif
 404
 405 unsigned int    vm_page_active_count;
 406 unsigned int    vm_page_inactive_count;
 407 #if CONFIG_SECLUDED_MEMORY
 408 unsigned int    vm_page_secluded_count;
 409 unsigned int    vm_page_secluded_count_free;
 410 unsigned int    vm_page_secluded_count_inuse;
 411 unsigned int    vm_page_secluded_count_over_target;
 412 #endif /* CONFIG_SECLUDED_MEMORY */
 413 unsigned int    vm_page_anonymous_count;
 414 unsigned int    vm_page_throttled_count;
 415 unsigned int    vm_page_speculative_count;
 416
 417 unsigned int    vm_page_wire_count;
 418 unsigned int    vm_page_wire_count_on_boot = 0;
 419 unsigned int    vm_page_stolen_count = 0;
 420 unsigned int    vm_page_wire_count_initial;
 421 unsigned int    vm_page_gobble_count = 0;
 422 unsigned int    vm_page_kern_lpage_count = 0;
 423
 424 uint64_t        booter_size;  /* external so it can be found in core dumps */
 425
 426 #define VM_PAGE_WIRE_COUNT_WARNING      0
 427 #define VM_PAGE_GOBBLE_COUNT_WARNING    0
 428
 429 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 430 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 431 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 432
 433 unsigned int    vm_page_xpmapped_external_count = 0;
 434 unsigned int    vm_page_external_count = 0;
 435 unsigned int    vm_page_internal_count = 0;
 436 unsigned int    vm_page_pageable_external_count = 0;
 437 unsigned int    vm_page_pageable_internal_count = 0;
 438
 439 #if DEVELOPMENT || DEBUG
 440 unsigned int    vm_page_speculative_recreated = 0;
 441 unsigned int    vm_page_speculative_created = 0;
 442 unsigned int    vm_page_speculative_used = 0;
 443 #endif
 444
 445 vm_page_queue_head_t    vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 446
 447 unsigned int    vm_page_cleaned_count = 0;
 448
 449 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 450 ppnum_t         max_valid_low_ppnum = PPNUM_MAX;
 451
 452
 453 /*
 454  *      Several page replacement parameters are also
 455  *      shared with this module, so that page allocation
 456  *      (done here in vm_page_alloc) can trigger the
 457  *      pageout daemon.
 458  */
 459 unsigned int    vm_page_free_target = 0;
 460 unsigned int    vm_page_free_min = 0;
 461 unsigned int    vm_page_throttle_limit = 0;
 462 unsigned int    vm_page_inactive_target = 0;
 463 #if CONFIG_SECLUDED_MEMORY
 464 unsigned int    vm_page_secluded_target = 0;
 465 #endif /* CONFIG_SECLUDED_MEMORY */
 466 unsigned int    vm_page_anonymous_min = 0;
 467 unsigned int    vm_page_free_reserved = 0;
 468
 469
 470 /*
 471  *      The VM system has a couple of heuristics for deciding
 472  *      that pages are "uninteresting" and should be placed
 473  *      on the inactive queue as likely candidates for replacement.
 474  *      These variables let the heuristics be controlled at run-time
 475  *      to make experimentation easier.
 476  */
 477
 478 boolean_t vm_page_deactivate_hint = TRUE;
 479
 480 struct vm_page_stats_reusable vm_page_stats_reusable;
 481
 482 /*
 483  *      vm_set_page_size:
 484  *
 485  *      Sets the page size, perhaps based upon the memory
 486  *      size.  Must be called before any use of page-size
 487  *      dependent functions.
 488  *
 489  *      Sets page_shift and page_mask from page_size.
 490  */
 491 void
 492 vm_set_page_size(void)
 493 {
 494         page_size  = PAGE_SIZE;
 495         page_mask  = PAGE_MASK;
 496         page_shift = PAGE_SHIFT;
 497
 498         if ((page_mask & page_size) != 0) {
 499                 panic("vm_set_page_size: page size not a power of two");
 500         }
 501
 502         for (page_shift = 0;; page_shift++) {
 503                 if ((1U << page_shift) == page_size) {
 504                         break;
 505                 }
 506         }
 507 }
 508
 509 #if defined (__x86_64__)
 510
 511 #define MAX_CLUMP_SIZE      16
 512 #define DEFAULT_CLUMP_SIZE  4
 513
 514 unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
 515
 516 #if DEVELOPMENT || DEBUG
 517 unsigned long vm_clump_stats[MAX_CLUMP_SIZE + 1];
 518 unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
 519
 520 static inline void
 521 vm_clump_update_stats(unsigned int c)
 522 {
 523         assert(c <= vm_clump_size);
 524         if (c > 0 && c <= vm_clump_size) {
 525                 vm_clump_stats[c] += c;
 526         }
 527         vm_clump_allocs += c;
 528 }
 529 #endif  /*  if DEVELOPMENT || DEBUG */
 530
 531 /* Called once to setup the VM clump knobs */
 532 static void
 533 vm_page_setup_clump( void )
 534 {
 535         unsigned int override, n;
 536
 537         vm_clump_size = DEFAULT_CLUMP_SIZE;
 538         if (PE_parse_boot_argn("clump_size", &override, sizeof(override))) {
 539                 vm_clump_size = override;
 540         }
 541
 542         if (vm_clump_size > MAX_CLUMP_SIZE) {
 543                 panic("vm_page_setup_clump:: clump_size is too large!");
 544         }
 545         if (vm_clump_size < 1) {
 546                 panic("vm_page_setup_clump:: clump_size must be >= 1");
 547         }
 548         if ((vm_clump_size & (vm_clump_size - 1)) != 0) {
 549                 panic("vm_page_setup_clump:: clump_size must be a power of 2");
 550         }
 551
 552         vm_clump_promote_threshold = vm_clump_size;
 553         vm_clump_mask = vm_clump_size - 1;
 554         for (vm_clump_shift = 0, n = vm_clump_size; n > 1; n >>= 1, vm_clump_shift++) {
 555                 ;
 556         }
 557
 558 #if DEVELOPMENT || DEBUG
 559         bzero(vm_clump_stats, sizeof(vm_clump_stats));
 560         vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
 561 #endif  /*  if DEVELOPMENT || DEBUG */
 562 }
 563
 564 #endif  /* #if defined (__x86_64__) */
 565
 566 #define COLOR_GROUPS_TO_STEAL   4
 567
 568 /* Called once during statup, once the cache geometry is known.
 569  */
 570 static void
 571 vm_page_set_colors( void )
 572 {
 573         unsigned int    n, override;
 574
 575 #if defined (__x86_64__)
 576         /* adjust #colors because we need to color outside the clump boundary */
 577         vm_cache_geometry_colors >>= vm_clump_shift;
 578 #endif
 579         if (PE_parse_boot_argn("colors", &override, sizeof(override))) {                /* colors specified as a boot-arg? */
 580                 n = override;
 581         } else if (vm_cache_geometry_colors) {                  /* do we know what the cache geometry is? */
 582                 n = vm_cache_geometry_colors;
 583         } else {
 584                 n = DEFAULT_COLORS;                             /* use default if all else fails */
 585         }
 586         if (n == 0) {
 587                 n = 1;
 588         }
 589         if (n > MAX_COLORS) {
 590                 n = MAX_COLORS;
 591         }
 592
 593         /* the count must be a power of 2  */
 594         if ((n & (n - 1)) != 0) {
 595                 n = DEFAULT_COLORS;                             /* use default if all else fails */
 596         }
 597         vm_colors = n;
 598         vm_color_mask = n - 1;
 599
 600         vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
 601
 602 #if defined (__x86_64__)
 603         /* adjust for reduction in colors due to clumping and multiple cores */
 604         if (real_ncpus) {
 605                 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
 606         }
 607 #endif
 608 }
 609
 610 /*
 611  * During single threaded early boot we don't initialize all pages.
 612  * This avoids some delay during boot. They'll be initialized and
 613  * added to the free list as needed or after we are multithreaded by
 614  * what becomes the pageout thread.
 615  */
 616 static boolean_t fill = FALSE;
 617 static unsigned int fillval;
 618 uint_t vm_delayed_count = 0;    /* when non-zero, indicates we may have more pages to init */
 619 ppnum_t delay_above_pnum = PPNUM_MAX;
 620
 621 /*
 622  * For x86 first 8 Gig initializes quickly and gives us lots of lowmem + mem above to start off with.
 623  * If ARM ever uses delayed page initialization, this value may need to be quite different.
 624  */
 625 #define DEFAULT_DELAY_ABOVE_PHYS_GB (8)
 626
 627 /*
 628  * When we have to dip into more delayed pages due to low memory, free up
 629  * a large chunk to get things back to normal. This avoids contention on the
 630  * delayed code allocating page by page.
 631  */
 632 #define VM_DELAY_PAGE_CHUNK ((1024 * 1024 * 1024) / PAGE_SIZE)
 633
 634 /*
 635  * Get and initialize the next delayed page.
 636  */
 637 static vm_page_t
 638 vm_get_delayed_page(int grab_options)
 639 {
 640         vm_page_t p;
 641         ppnum_t   pnum;
 642
 643         /*
 644          * Get a new page if we have one.
 645          */
 646         lck_mtx_lock(&vm_page_queue_free_lock);
 647         if (vm_delayed_count == 0) {
 648                 lck_mtx_unlock(&vm_page_queue_free_lock);
 649                 return NULL;
 650         }
 651         if (!pmap_next_page(&pnum)) {
 652                 vm_delayed_count = 0;
 653                 lck_mtx_unlock(&vm_page_queue_free_lock);
 654                 return NULL;
 655         }
 656
 657         assert(vm_delayed_count > 0);
 658         --vm_delayed_count;
 659
 660 #if defined(__x86_64__)
 661         /* x86 cluster code requires increasing phys_page in vm_pages[] */
 662         if (vm_pages_count > 0) {
 663                 assert(pnum > vm_pages[vm_pages_count - 1].vmp_phys_page);
 664         }
 665 #endif
 666         p = &vm_pages[vm_pages_count];
 667         assert(p < vm_page_array_ending_addr);
 668         vm_page_init(p, pnum, FALSE);
 669         ++vm_pages_count;
 670         ++vm_page_pages;
 671         lck_mtx_unlock(&vm_page_queue_free_lock);
 672
 673         /*
 674          * These pages were initially counted as wired, undo that now.
 675          */
 676         if (grab_options & VM_PAGE_GRAB_Q_LOCK_HELD) {
 677                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 678         } else {
 679                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
 680                 vm_page_lockspin_queues();
 681         }
 682         --vm_page_wire_count;
 683         --vm_page_wire_count_initial;
 684         if (vm_page_wire_count_on_boot != 0) {
 685                 --vm_page_wire_count_on_boot;
 686         }
 687         if (!(grab_options & VM_PAGE_GRAB_Q_LOCK_HELD)) {
 688                 vm_page_unlock_queues();
 689         }
 690
 691
 692         if (fill) {
 693                 fillPage(pnum, fillval);
 694         }
 695         return p;
 696 }
 697
 698 static void vm_page_module_init_delayed(void);
 699
 700 /*
 701  * Free all remaining delayed pages to the free lists.
 702  */
 703 void
 704 vm_free_delayed_pages(void)
 705 {
 706         vm_page_t   p;
 707         vm_page_t   list = NULL;
 708         uint_t      cnt = 0;
 709         vm_offset_t start_free_va;
 710         int64_t     free_size;
 711
 712         while ((p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE)) != NULL) {
 713                 if (vm_himemory_mode) {
 714                         vm_page_release(p, FALSE);
 715                 } else {
 716                         p->vmp_snext = list;
 717                         list = p;
 718                 }
 719                 ++cnt;
 720         }
 721
 722         /*
 723          * Free the pages in reverse order if not himemory mode.
 724          * Hence the low memory pages will be first on free lists. (LIFO)
 725          */
 726         while (list != NULL) {
 727                 p = list;
 728                 list = p->vmp_snext;
 729                 p->vmp_snext = NULL;
 730                 vm_page_release(p, FALSE);
 731         }
 732 #if DEVELOPMENT || DEBUG
 733         kprintf("vm_free_delayed_pages: initialized %d free pages\n", cnt);
 734 #endif
 735
 736         /*
 737          * Free up any unused full pages at the end of the vm_pages[] array
 738          */
 739         start_free_va = round_page((vm_offset_t)&vm_pages[vm_pages_count]);
 740
 741 #if defined(__x86_64__)
 742         /*
 743          * Since x86 might have used large pages for vm_pages[], we can't
 744          * free starting in the middle of a partially used large page.
 745          */
 746         if (pmap_query_pagesize(kernel_pmap, start_free_va) == I386_LPGBYTES) {
 747                 start_free_va = ((start_free_va + I386_LPGMASK) & ~I386_LPGMASK);
 748         }
 749 #endif
 750         if (start_free_va < (vm_offset_t)vm_page_array_ending_addr) {
 751                 free_size = trunc_page((vm_offset_t)vm_page_array_ending_addr - start_free_va);
 752                 if (free_size > 0) {
 753                         ml_static_mfree(start_free_va, (vm_offset_t)free_size);
 754                         vm_page_array_ending_addr = (void *)start_free_va;
 755
 756                         /*
 757                          * Note there's no locking here, as only this thread will ever change this value.
 758                          * The reader, vm_page_diagnose, doesn't grab any locks for the counts it looks at.
 759                          */
 760                         vm_page_stolen_count -= (free_size >> PAGE_SHIFT);
 761
 762 #if DEVELOPMENT || DEBUG
 763                         kprintf("Freeing final unused %ld bytes from vm_pages[] at 0x%lx\n",
 764                             (long)free_size, (long)start_free_va);
 765 #endif
 766                 }
 767         }
 768
 769
 770         /*
 771          * now we can create the VM page array zone
 772          */
 773         vm_page_module_init_delayed();
 774 }
 775
 776 /*
 777  * Try and free up enough delayed pages to match a contig memory allocation.
 778  */
 779 static void
 780 vm_free_delayed_pages_contig(
 781         uint_t    npages,
 782         ppnum_t   max_pnum,
 783         ppnum_t   pnum_mask)
 784 {
 785         vm_page_t p;
 786         ppnum_t   pnum;
 787         uint_t    cnt = 0;
 788
 789         /*
 790          * Treat 0 as the absolute max page number.
 791          */
 792         if (max_pnum == 0) {
 793                 max_pnum = PPNUM_MAX;
 794         }
 795
 796         /*
 797          * Free till we get a properly aligned start page
 798          */
 799         for (;;) {
 800                 p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE);
 801                 if (p == NULL) {
 802                         return;
 803                 }
 804                 pnum = VM_PAGE_GET_PHYS_PAGE(p);
 805                 vm_page_release(p, FALSE);
 806                 if (pnum >= max_pnum) {
 807                         return;
 808                 }
 809                 if ((pnum & pnum_mask) == 0) {
 810                         break;
 811                 }
 812         }
 813
 814         /*
 815          * Having a healthy pool of free pages will help performance. We don't
 816          * want to fall back to the delayed code for every page allocation.
 817          */
 818         if (vm_page_free_count < VM_DELAY_PAGE_CHUNK) {
 819                 npages += VM_DELAY_PAGE_CHUNK;
 820         }
 821
 822         /*
 823          * Now free up the pages
 824          */
 825         for (cnt = 1; cnt < npages; ++cnt) {
 826                 p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE);
 827                 if (p == NULL) {
 828                         return;
 829                 }
 830                 vm_page_release(p, FALSE);
 831         }
 832 }
 833
 834
 835 lck_grp_t               vm_page_lck_grp_free;
 836 lck_grp_t               vm_page_lck_grp_queue;
 837 lck_grp_t               vm_page_lck_grp_local;
 838 lck_grp_t               vm_page_lck_grp_purge;
 839 lck_grp_t               vm_page_lck_grp_alloc;
 840 lck_grp_t               vm_page_lck_grp_bucket;
 841 lck_grp_attr_t          vm_page_lck_grp_attr;
 842 lck_attr_t              vm_page_lck_attr;
 843
 844
 845 __private_extern__ void
 846 vm_page_init_lck_grp(void)
 847 {
 848         /*
 849          * initialze the vm_page lock world
 850          */
 851         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 852         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 853         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 854         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 855         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 856         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 857         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 858         lck_attr_setdefault(&vm_page_lck_attr);
 859         lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 860
 861         vm_compressor_init_locks();
 862 }
 863
 864 #define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
 865
 866 void
 867 vm_page_init_local_q()
 868 {
 869         unsigned int            num_cpus;
 870         unsigned int            i;
 871         struct vplq             *t_local_q;
 872
 873         num_cpus = ml_get_max_cpus();
 874
 875         /*
 876          * no point in this for a uni-processor system
 877          */
 878         if (num_cpus >= 2) {
 879 #if KASAN
 880                 /* KASAN breaks the expectation of a size-aligned object by adding a
 881                  * redzone, so explicitly align. */
 882                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq) + VM_PACKED_POINTER_ALIGNMENT);
 883                 t_local_q = (void *)(((uintptr_t)t_local_q + (VM_PACKED_POINTER_ALIGNMENT - 1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1));
 884 #else
 885                 /* round the size up to the nearest power of two */
 886                 t_local_q = (struct vplq *)kalloc(ROUNDUP_NEXTP2(num_cpus * sizeof(struct vplq)));
 887 #endif
 888
 889                 for (i = 0; i < num_cpus; i++) {
 890                         struct vpl      *lq;
 891
 892                         lq = &t_local_q[i].vpl_un.vpl;
 893                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 894                         vm_page_queue_init(&lq->vpl_queue);
 895                         lq->vpl_count = 0;
 896                         lq->vpl_internal_count = 0;
 897                         lq->vpl_external_count = 0;
 898                 }
 899                 vm_page_local_q_count = num_cpus;
 900
 901                 vm_page_local_q = (struct vplq *)t_local_q;
 902         }
 903 }
 904
 905 /*
 906  * vm_init_before_launchd
 907  *
 908  * This should be called right before launchd is loaded.
 909  */
 910 void
 911 vm_init_before_launchd()
 912 {
 913         vm_page_lockspin_queues();
 914         vm_page_wire_count_on_boot = vm_page_wire_count;
 915         vm_page_unlock_queues();
 916 }
 917
 918
 919 /*
 920  *      vm_page_bootstrap:
 921  *
 922  *      Initializes the resident memory module.
 923  *
 924  *      Allocates memory for the page cells, and
 925  *      for the object/offset-to-page hash table headers.
 926  *      Each page cell is initialized and placed on the free list.
 927  *      Returns the range of available kernel virtual memory.
 928  */
 929
 930 void
 931 vm_page_bootstrap(
 932         vm_offset_t             *startp,
 933         vm_offset_t             *endp)
 934 {
 935         unsigned int            i;
 936         unsigned int            log1;
 937         unsigned int            log2;
 938         unsigned int            size;
 939
 940         /*
 941          *      Initialize the page queues.
 942          */
 943         vm_page_init_lck_grp();
 944
 945         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 946         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 947         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 948
 949         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 950                 int group;
 951
 952                 purgeable_queues[i].token_q_head = 0;
 953                 purgeable_queues[i].token_q_tail = 0;
 954                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
 955                         queue_init(&purgeable_queues[i].objq[group]);
 956                 }
 957
 958                 purgeable_queues[i].type = i;
 959                 purgeable_queues[i].new_pages = 0;
 960 #if MACH_ASSERT
 961                 purgeable_queues[i].debug_count_tokens = 0;
 962                 purgeable_queues[i].debug_count_objects = 0;
 963 #endif
 964         }
 965         ;
 966         purgeable_nonvolatile_count = 0;
 967         queue_init(&purgeable_nonvolatile_queue);
 968
 969         for (i = 0; i < MAX_COLORS; i++) {
 970                 vm_page_queue_init(&vm_page_queue_free[i].qhead);
 971         }
 972
 973         vm_page_queue_init(&vm_lopage_queue_free);
 974         vm_page_queue_init(&vm_page_queue_active);
 975         vm_page_queue_init(&vm_page_queue_inactive);
 976 #if CONFIG_SECLUDED_MEMORY
 977         vm_page_queue_init(&vm_page_queue_secluded);
 978 #endif /* CONFIG_SECLUDED_MEMORY */
 979         vm_page_queue_init(&vm_page_queue_cleaned);
 980         vm_page_queue_init(&vm_page_queue_throttled);
 981         vm_page_queue_init(&vm_page_queue_anonymous);
 982         queue_init(&vm_objects_wired);
 983
 984         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
 985                 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
 986
 987                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 988                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 989         }
 990 #if CONFIG_BACKGROUND_QUEUE
 991         vm_page_queue_init(&vm_page_queue_background);
 992
 993         vm_page_background_count = 0;
 994         vm_page_background_internal_count = 0;
 995         vm_page_background_external_count = 0;
 996         vm_page_background_promoted_count = 0;
 997
 998         vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
 999
1000         if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX) {
1001                 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
1002         }
1003
1004         vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
1005         vm_page_background_exclude_external = 0;
1006
1007         PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
1008         PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
1009         PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
1010
1011         if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1) {
1012                 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
1013         }
1014 #endif
1015         vm_page_free_wanted = 0;
1016         vm_page_free_wanted_privileged = 0;
1017 #if CONFIG_SECLUDED_MEMORY
1018         vm_page_free_wanted_secluded = 0;
1019 #endif /* CONFIG_SECLUDED_MEMORY */
1020
1021 #if defined (__x86_64__)
1022         /* this must be called before vm_page_set_colors() */
1023         vm_page_setup_clump();
1024 #endif
1025
1026         vm_page_set_colors();
1027
1028         bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
1029         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1030         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1031         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1032
1033         bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
1034         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1035         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1036         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1037         vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1038         vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
1039         vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
1040 #if CONFIG_SECLUDED_MEMORY
1041         vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1042 #endif /* CONFIG_SECLUDED_MEMORY */
1043
1044         bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
1045         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1046         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1047         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1048         vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1049         vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
1050 #if CONFIG_SECLUDED_MEMORY
1051         vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1052 #endif /* CONFIG_SECLUDED_MEMORY */
1053
1054         bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
1055         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1056         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1057         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1058         vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1059 #if CONFIG_SECLUDED_MEMORY
1060         vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1061 #endif /* CONFIG_SECLUDED_MEMORY */
1062
1063         for (i = 0; i < VM_KERN_MEMORY_FIRST_DYNAMIC; i++) {
1064                 vm_allocation_sites_static[i].refcount = 2;
1065                 vm_allocation_sites_static[i].tag = i;
1066                 vm_allocation_sites[i] = &vm_allocation_sites_static[i];
1067         }
1068         vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
1069         vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
1070         vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
1071
1072         /*
1073          *      Steal memory for the map and zone subsystems.
1074          */
1075 #if CONFIG_GZALLOC
1076         gzalloc_configure();
1077 #endif
1078         kernel_debug_string_early("vm_map_steal_memory");
1079         vm_map_steal_memory();
1080
1081         /*
1082          *      Allocate (and initialize) the virtual-to-physical
1083          *      table hash buckets.
1084          *
1085          *      The number of buckets should be a power of two to
1086          *      get a good hash function.  The following computation
1087          *      chooses the first power of two that is greater
1088          *      than the number of physical pages in the system.
1089          */
1090
1091         if (vm_page_bucket_count == 0) {
1092                 unsigned int npages = pmap_free_pages();
1093
1094                 vm_page_bucket_count = 1;
1095                 while (vm_page_bucket_count < npages) {
1096                         vm_page_bucket_count <<= 1;
1097                 }
1098         }
1099         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1100
1101         vm_page_hash_mask = vm_page_bucket_count - 1;
1102
1103         /*
1104          *      Calculate object shift value for hashing algorithm:
1105          *              O = log2(sizeof(struct vm_object))
1106          *              B = log2(vm_page_bucket_count)
1107          *              hash shifts the object left by
1108          *              B/2 - O
1109          */
1110         size = vm_page_bucket_count;
1111         for (log1 = 0; size > 1; log1++) {
1112                 size /= 2;
1113         }
1114         size = sizeof(struct vm_object);
1115         for (log2 = 0; size > 1; log2++) {
1116                 size /= 2;
1117         }
1118         vm_page_hash_shift = log1 / 2 - log2 + 1;
1119
1120         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
1121         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
1122         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
1123
1124         if (vm_page_hash_mask & vm_page_bucket_count) {
1125                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
1126         }
1127
1128 #if VM_PAGE_BUCKETS_CHECK
1129 #if VM_PAGE_FAKE_BUCKETS
1130         /*
1131          * Allocate a decoy set of page buckets, to detect
1132          * any stomping there.
1133          */
1134         vm_page_fake_buckets = (vm_page_bucket_t *)
1135             pmap_steal_memory(vm_page_bucket_count *
1136             sizeof(vm_page_bucket_t));
1137         vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
1138         vm_page_fake_buckets_end =
1139             vm_map_round_page((vm_page_fake_buckets_start +
1140             (vm_page_bucket_count *
1141             sizeof(vm_page_bucket_t))),
1142             PAGE_MASK);
1143         char *cp;
1144         for (cp = (char *)vm_page_fake_buckets_start;
1145             cp < (char *)vm_page_fake_buckets_end;
1146             cp++) {
1147                 *cp = 0x5a;
1148         }
1149 #endif /* VM_PAGE_FAKE_BUCKETS */
1150 #endif /* VM_PAGE_BUCKETS_CHECK */
1151
1152         kernel_debug_string_early("vm_page_buckets");
1153         vm_page_buckets = (vm_page_bucket_t *)
1154             pmap_steal_memory(vm_page_bucket_count *
1155             sizeof(vm_page_bucket_t));
1156
1157         kernel_debug_string_early("vm_page_bucket_locks");
1158         vm_page_bucket_locks = (lck_spin_t *)
1159             pmap_steal_memory(vm_page_bucket_lock_count *
1160             sizeof(lck_spin_t));
1161
1162         for (i = 0; i < vm_page_bucket_count; i++) {
1163                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
1164
1165                 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1166 #if     MACH_PAGE_HASH_STATS
1167                 bucket->cur_count = 0;
1168                 bucket->hi_count = 0;
1169 #endif /* MACH_PAGE_HASH_STATS */
1170         }
1171
1172         for (i = 0; i < vm_page_bucket_lock_count; i++) {
1173                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
1174         }
1175
1176         lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
1177         lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
1178         vm_tag_init();
1179
1180 #if VM_PAGE_BUCKETS_CHECK
1181         vm_page_buckets_check_ready = TRUE;
1182 #endif /* VM_PAGE_BUCKETS_CHECK */
1183
1184         /*
1185          *      Machine-dependent code allocates the resident page table.
1186          *      It uses vm_page_init to initialize the page frames.
1187          *      The code also returns to us the virtual space available
1188          *      to the kernel.  We don't trust the pmap module
1189          *      to get the alignment right.
1190          */
1191
1192         kernel_debug_string_early("pmap_startup");
1193         pmap_startup(&virtual_space_start, &virtual_space_end);
1194         virtual_space_start = round_page(virtual_space_start);
1195         virtual_space_end = trunc_page(virtual_space_end);
1196
1197         *startp = virtual_space_start;
1198         *endp = virtual_space_end;
1199
1200         /*
1201          *      Compute the initial "wire" count.
1202          *      Up until now, the pages which have been set aside are not under
1203          *      the VM system's control, so although they aren't explicitly
1204          *      wired, they nonetheless can't be moved. At this moment,
1205          *      all VM managed pages are "free", courtesy of pmap_startup.
1206          */
1207         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
1208         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) -
1209             vm_page_free_count - vm_lopage_free_count;
1210 #if CONFIG_SECLUDED_MEMORY
1211         vm_page_wire_count -= vm_page_secluded_count;
1212 #endif
1213         vm_page_wire_count_initial = vm_page_wire_count;
1214
1215         /* capture this for later use */
1216         booter_size = ml_get_booter_memory_size();
1217
1218         printf("vm_page_bootstrap: %d free pages, %d wired pages, (up to %d of which are delayed free)\n",
1219             vm_page_free_count, vm_page_wire_count, vm_delayed_count);
1220
1221         kernel_debug_string_early("vm_page_bootstrap complete");
1222         simple_lock_init(&vm_paging_lock, 0);
1223 }
1224
1225 #ifndef MACHINE_PAGES
1226 /*
1227  * This is the early boot time allocator for data structures needed to bootstrap the VM system.
1228  * On x86 it will allocate large pages if size is sufficiently large. We don't need to do this
1229  * on ARM yet, due to the combination of a large base page size and smaller RAM devices.
1230  */
1231 static void *
1232 pmap_steal_memory_internal(
1233         vm_size_t size,
1234         boolean_t might_free)
1235 {
1236         kern_return_t kr;
1237         vm_offset_t addr;
1238         vm_offset_t map_addr;
1239         ppnum_t phys_page;
1240
1241         /*
1242          * Size needs to be aligned to word size.
1243          */
1244         size = (size + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
1245
1246         /*
1247          * On the first call, get the initial values for virtual address space
1248          * and page align them.
1249          */
1250         if (virtual_space_start == virtual_space_end) {
1251                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
1252                 virtual_space_start = round_page(virtual_space_start);
1253                 virtual_space_end = trunc_page(virtual_space_end);
1254
1255 #if defined(__x86_64__)
1256                 /*
1257                  * Release remaining unused section of preallocated KVA and the 4K page tables
1258                  * that map it. This makes the VA available for large page mappings.
1259                  */
1260                 Idle_PTs_release(virtual_space_start, virtual_space_end);
1261 #endif
1262         }
1263
1264         /*
1265          * Allocate the virtual space for this request. On x86, we'll align to a large page
1266          * address if the size is big enough to back with at least 1 large page.
1267          */
1268 #if defined(__x86_64__)
1269         if (size >= I386_LPGBYTES) {
1270                 virtual_space_start = ((virtual_space_start + I386_LPGMASK) & ~I386_LPGMASK);
1271         }
1272 #endif
1273         addr = virtual_space_start;
1274         virtual_space_start += size;
1275
1276         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1277
1278         /*
1279          * Allocate and map physical pages to back the new virtual space.
1280          */
1281         map_addr = round_page(addr);
1282         while (map_addr < addr + size) {
1283 #if defined(__x86_64__)
1284                 /*
1285                  * Back with a large page if properly aligned on x86
1286                  */
1287                 if ((map_addr & I386_LPGMASK) == 0 &&
1288                     map_addr + I386_LPGBYTES <= addr + size &&
1289                     pmap_pre_expand_large(kernel_pmap, map_addr) == KERN_SUCCESS &&
1290                     pmap_next_page_large(&phys_page) == KERN_SUCCESS) {
1291                         kr = pmap_enter(kernel_pmap, map_addr, phys_page,
1292                             VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
1293                             VM_WIMG_USE_DEFAULT | VM_MEM_SUPERPAGE, FALSE);
1294
1295                         if (kr != KERN_SUCCESS) {
1296                                 panic("pmap_steal_memory: pmap_enter() large failed, new_addr=%#lx, phys_page=%u",
1297                                     (unsigned long)map_addr, phys_page);
1298                         }
1299                         map_addr += I386_LPGBYTES;
1300                         vm_page_wire_count += I386_LPGBYTES >> PAGE_SHIFT;
1301                         vm_page_stolen_count += I386_LPGBYTES >> PAGE_SHIFT;
1302                         vm_page_kern_lpage_count++;
1303                         continue;
1304                 }
1305 #endif
1306
1307                 if (!pmap_next_page_hi(&phys_page, might_free)) {
1308                         panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1309                 }
1310
1311 #if defined(__x86_64__)
1312                 pmap_pre_expand(kernel_pmap, map_addr);
1313 #endif
1314
1315                 kr = pmap_enter(kernel_pmap, map_addr, phys_page,
1316                     VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
1317                     VM_WIMG_USE_DEFAULT, FALSE);
1318
1319                 if (kr != KERN_SUCCESS) {
1320                         panic("pmap_steal_memory() pmap_enter failed, map_addr=%#lx, phys_page=%u",
1321                             (unsigned long)map_addr, phys_page);
1322                 }
1323                 map_addr += PAGE_SIZE;
1324
1325                 /*
1326                  * Account for newly stolen memory
1327                  */
1328                 vm_page_wire_count++;
1329                 vm_page_stolen_count++;
1330         }
1331
1332 #if defined(__x86_64__)
1333         /*
1334          * The call with might_free is currently the last use of pmap_steal_memory*().
1335          * Notify the pmap layer to record which high pages were allocated so far.
1336          */
1337         if (might_free) {
1338                 pmap_hi_pages_done();
1339         }
1340 #endif
1341 #if KASAN
1342         kasan_notify_address(round_page(addr), size);
1343 #endif
1344         return (void *) addr;
1345 }
1346
1347 void *
1348 pmap_steal_memory(
1349         vm_size_t size)
1350 {
1351         return pmap_steal_memory_internal(size, FALSE);
1352 }
1353
1354 void *
1355 pmap_steal_freeable_memory(
1356         vm_size_t size)
1357 {
1358         return pmap_steal_memory_internal(size, TRUE);
1359 }
1360
1361 #if CONFIG_SECLUDED_MEMORY
1362 /* boot-args to control secluded memory */
1363 unsigned int secluded_mem_mb = 0;       /* # of MBs of RAM to seclude */
1364 int secluded_for_iokit = 1;             /* IOKit can use secluded memory */
1365 int secluded_for_apps = 1;              /* apps can use secluded memory */
1366 int secluded_for_filecache = 2;         /* filecache can use seclude memory */
1367 #if 11
1368 int secluded_for_fbdp = 0;
1369 #endif
1370 uint64_t secluded_shutoff_trigger = 0;
1371 #endif /* CONFIG_SECLUDED_MEMORY */
1372
1373
1374 #if defined(__arm__) || defined(__arm64__)
1375 extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1376 unsigned int vm_first_phys_ppnum = 0;
1377 #endif
1378
1379 void vm_page_release_startup(vm_page_t mem);
1380 void
1381 pmap_startup(
1382         vm_offset_t     *startp,
1383         vm_offset_t     *endp)
1384 {
1385         unsigned int    i, npages;
1386         ppnum_t         phys_page;
1387         uint64_t        mem_sz;
1388         uint64_t        start_ns;
1389         uint64_t        now_ns;
1390         uint_t          low_page_count = 0;
1391
1392 #if    defined(__LP64__)
1393         /*
1394          * make sure we are aligned on a 64 byte boundary
1395          * for VM_PAGE_PACK_PTR (it clips off the low-order
1396          * 6 bits of the pointer)
1397          */
1398         if (virtual_space_start != virtual_space_end) {
1399                 virtual_space_start = round_page(virtual_space_start);
1400         }
1401 #endif
1402
1403         /*
1404          * We calculate how many page frames we will have
1405          * and then allocate the page structures in one chunk.
1406          *
1407          * Note that the calculation here doesn't take into account
1408          * the memory needed to map what's being allocated, i.e. the page
1409          * table entries. So the actual number of pages we get will be
1410          * less than this. To do someday: include that in the computation.
1411          */
1412         mem_sz = pmap_free_pages() * (uint64_t)PAGE_SIZE;
1413         mem_sz += round_page(virtual_space_start) - virtual_space_start;        /* Account for any slop */
1414         npages = (uint_t)(mem_sz / (PAGE_SIZE + sizeof(*vm_pages)));    /* scaled to include the vm_page_ts */
1415
1416         vm_pages = (vm_page_t) pmap_steal_freeable_memory(npages * sizeof *vm_pages);
1417
1418         /*
1419          * Check if we want to initialize pages to a known value
1420          */
1421         if (PE_parse_boot_argn("fill", &fillval, sizeof(fillval))) {
1422                 fill = TRUE;
1423         }
1424 #if     DEBUG
1425         /* This slows down booting the DEBUG kernel, particularly on
1426          * large memory systems, but is worthwhile in deterministically
1427          * trapping uninitialized memory usage.
1428          */
1429         if (!fill) {
1430                 fill = TRUE;
1431                 fillval = 0xDEB8F177;
1432         }
1433 #endif
1434         if (fill) {
1435                 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
1436         }
1437
1438 #if CONFIG_SECLUDED_MEMORY
1439         /*
1440          * Figure out how much secluded memory to have before we start
1441          * release pages to free lists.
1442          * The default, if specified nowhere else, is no secluded mem.
1443          */
1444         secluded_mem_mb = 0;
1445         if (max_mem > 1 * 1024 * 1024 * 1024) {
1446                 /* default to 90MB for devices with > 1GB of RAM */
1447                 secluded_mem_mb = 90;
1448         }
1449         /* override with value from device tree, if provided */
1450         PE_get_default("kern.secluded_mem_mb",
1451             &secluded_mem_mb, sizeof(secluded_mem_mb));
1452         /* override with value from boot-args, if provided */
1453         PE_parse_boot_argn("secluded_mem_mb",
1454             &secluded_mem_mb,
1455             sizeof(secluded_mem_mb));
1456
1457         vm_page_secluded_target = (unsigned int)
1458             ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1459         PE_parse_boot_argn("secluded_for_iokit",
1460             &secluded_for_iokit,
1461             sizeof(secluded_for_iokit));
1462         PE_parse_boot_argn("secluded_for_apps",
1463             &secluded_for_apps,
1464             sizeof(secluded_for_apps));
1465         PE_parse_boot_argn("secluded_for_filecache",
1466             &secluded_for_filecache,
1467             sizeof(secluded_for_filecache));
1468 #if 11
1469         PE_parse_boot_argn("secluded_for_fbdp",
1470             &secluded_for_fbdp,
1471             sizeof(secluded_for_fbdp));
1472 #endif
1473
1474         /*
1475          * On small devices, allow a large app to effectively suppress
1476          * secluded memory until it exits.
1477          */
1478         if (max_mem <= 1 * 1024 * 1024 * 1024 && vm_page_secluded_target != 0) {
1479                 /*
1480                  * Get an amount from boot-args, else use 500MB.
1481                  * 500MB was chosen from a Peace daemon tentpole test which used munch
1482                  * to induce jetsam thrashing of false idle daemons.
1483                  */
1484                 int secluded_shutoff_mb;
1485                 if (PE_parse_boot_argn("secluded_shutoff_mb", &secluded_shutoff_mb,
1486                     sizeof(secluded_shutoff_mb))) {
1487                         secluded_shutoff_trigger = (uint64_t)secluded_shutoff_mb * 1024 * 1024;
1488                 } else {
1489                         secluded_shutoff_trigger = 500 * 1024 * 1024;
1490                 }
1491
1492                 if (secluded_shutoff_trigger != 0) {
1493                         secluded_suppression_init();
1494                 }
1495         }
1496
1497 #endif /* CONFIG_SECLUDED_MEMORY */
1498
1499 #if defined(__x86_64__)
1500
1501         /*
1502          * Decide how much memory we delay freeing at boot time.
1503          */
1504         uint32_t delay_above_gb;
1505         if (!PE_parse_boot_argn("delay_above_gb", &delay_above_gb, sizeof(delay_above_gb))) {
1506                 delay_above_gb = DEFAULT_DELAY_ABOVE_PHYS_GB;
1507         }
1508
1509         if (delay_above_gb == 0) {
1510                 delay_above_pnum = PPNUM_MAX;
1511         } else {
1512                 delay_above_pnum = delay_above_gb * (1024 * 1024 * 1024 / PAGE_SIZE);
1513         }
1514
1515         /* make sure we have sane breathing room: 1G above low memory */
1516         if (delay_above_pnum <= max_valid_low_ppnum) {
1517                 delay_above_pnum = max_valid_low_ppnum + ((1024 * 1024 * 1024) >> PAGE_SHIFT);
1518         }
1519
1520         if (delay_above_pnum < PPNUM_MAX) {
1521                 printf("pmap_startup() delaying init/free of page nums > 0x%x\n", delay_above_pnum);
1522         }
1523
1524 #endif /* defined(__x86_64__) */
1525
1526         /*
1527          * Initialize and release the page frames.
1528          */
1529         kernel_debug_string_early("Initialize and free the page frames");
1530
1531         vm_page_array_beginning_addr = &vm_pages[0];
1532         vm_page_array_ending_addr = &vm_pages[npages];  /* used by ptr packing/unpacking code */
1533
1534         vm_delayed_count = 0;
1535
1536         absolutetime_to_nanoseconds(mach_absolute_time(), &start_ns);
1537         vm_pages_count = 0;
1538         for (i = 0; i < npages; i++) {
1539                 /* Did we run out of pages? */
1540                 if (!pmap_next_page(&phys_page)) {
1541                         break;
1542                 }
1543
1544                 if (phys_page < max_valid_low_ppnum) {
1545                         ++low_page_count;
1546                 }
1547
1548                 /* Are we at high enough pages to delay the rest? */
1549                 if (low_page_count > vm_lopage_free_limit && phys_page > delay_above_pnum) {
1550                         vm_delayed_count = pmap_free_pages();
1551                         break;
1552                 }
1553
1554 #if defined(__arm__) || defined(__arm64__)
1555                 if (i == 0) {
1556                         vm_first_phys_ppnum = phys_page;
1557                         patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr,
1558                             (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1559                 }
1560                 assert((i + vm_first_phys_ppnum) == phys_page);
1561 #endif
1562
1563 #if defined(__x86_64__)
1564                 /* The x86 clump freeing code requires increasing ppn's to work correctly */
1565                 if (i > 0) {
1566                         assert(phys_page > vm_pages[i - 1].vmp_phys_page);
1567                 }
1568 #endif
1569                 ++vm_pages_count;
1570                 vm_page_init(&vm_pages[i], phys_page, FALSE);
1571                 if (fill) {
1572                         fillPage(phys_page, fillval);
1573                 }
1574                 if (vm_himemory_mode) {
1575                         vm_page_release_startup(&vm_pages[i]);
1576                 }
1577         }
1578         vm_page_pages = vm_pages_count; /* used to report to user space */
1579
1580         if (!vm_himemory_mode) {
1581                 do {
1582                         vm_page_release_startup(&vm_pages[--i]);
1583                 } while (i != 0);
1584         }
1585
1586         absolutetime_to_nanoseconds(mach_absolute_time(), &now_ns);
1587         printf("pmap_startup() init/release time: %lld microsec\n", (now_ns - start_ns) / NSEC_PER_USEC);
1588         printf("pmap_startup() delayed init/release of %d pages\n", vm_delayed_count);
1589
1590 #if    defined(__LP64__)
1591
1592         if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0]) {
1593                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1594         }
1595
1596         if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count - 1]))) != &vm_pages[vm_pages_count - 1]) {
1597                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count - 1]);
1598         }
1599 #endif
1600
1601         VM_CHECK_MEMORYSTATUS;
1602
1603         /*
1604          * We have to re-align virtual_space_start,
1605          * because pmap_steal_memory has been using it.
1606          */
1607         virtual_space_start = round_page(virtual_space_start);
1608         *startp = virtual_space_start;
1609         *endp = virtual_space_end;
1610 }
1611 #endif  /* MACHINE_PAGES */
1612
1613 /*
1614  * Create the zone that represents the vm_pages[] array. Nothing ever allocates
1615  * or frees to this zone. It's just here for reporting purposes via zprint command.
1616  * This needs to be done after all initially delayed pages are put on the free lists.
1617  */
1618 static void
1619 vm_page_module_init_delayed(void)
1620 {
1621         uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1622
1623         vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1624             0, PAGE_SIZE, "vm pages array");
1625
1626         zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1627         zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1628         zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1629         zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1630         zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
1631
1632         /*
1633          * Reflect size and usage information for vm_pages[].
1634          */
1635         vm_page_array_zone->count = vm_pages_count;
1636         vm_page_array_zone->countfree = (int)(vm_page_array_ending_addr - &vm_pages[vm_pages_count]);
1637         vm_page_array_zone->sum_count = vm_pages_count;
1638         vm_page_array_zone_data_size = (uintptr_t)((void *)vm_page_array_ending_addr - (void *)vm_pages);
1639         vm_page_array_zone->cur_size = vm_page_array_zone_data_size;
1640         vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1641         OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
1642         /* since zone accounts for these, take them out of stolen */
1643         VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1644 }
1645
1646 /*
1647  * Create the vm_pages zone. This is used for the vm_page structures for the pages
1648  * that are scavanged from other boot time usages by ml_static_mfree(). As such,
1649  * this needs to happen in early VM bootstrap.
1650  */
1651 void
1652 vm_page_module_init(void)
1653 {
1654         vm_size_t vm_page_with_ppnum_size;
1655
1656         /*
1657          * Since the pointers to elements in this zone will be packed, they
1658          * must have appropriate size. Not strictly what sizeof() reports.
1659          */
1660         vm_page_with_ppnum_size =
1661             (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT - 1)) &
1662             ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1663
1664         vm_page_zone = zinit(vm_page_with_ppnum_size, 0, PAGE_SIZE, "vm pages");
1665
1666         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1667         zone_change(vm_page_zone, Z_EXPAND, FALSE);
1668         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1669         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1670         zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1671         zone_change(vm_page_zone, Z_ALIGNMENT_REQUIRED, TRUE);
1672 }
1673
1674 /*
1675  *      Routine:        vm_page_create
1676  *      Purpose:
1677  *              After the VM system is up, machine-dependent code
1678  *              may stumble across more physical memory.  For example,
1679  *              memory that it was reserving for a frame buffer.
1680  *              vm_page_create turns this memory into available pages.
1681  */
1682
1683 void
1684 vm_page_create(
1685         ppnum_t start,
1686         ppnum_t end)
1687 {
1688         ppnum_t         phys_page;
1689         vm_page_t       m;
1690
1691         for (phys_page = start;
1692             phys_page < end;
1693             phys_page++) {
1694                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1695                     == VM_PAGE_NULL) {
1696                         vm_page_more_fictitious();
1697                 }
1698
1699                 m->vmp_fictitious = FALSE;
1700                 pmap_clear_noencrypt(phys_page);
1701
1702                 lck_mtx_lock(&vm_page_queue_free_lock);
1703                 vm_page_pages++;
1704                 lck_mtx_unlock(&vm_page_queue_free_lock);
1705                 vm_page_release(m, FALSE);
1706         }
1707 }
1708
1709 /*
1710  *      vm_page_hash:
1711  *
1712  *      Distributes the object/offset key pair among hash buckets.
1713  *
1714  *      NOTE:   The bucket count must be a power of 2
1715  */
1716 #define vm_page_hash(object, offset) (\
1717         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1718          & vm_page_hash_mask)
1719
1720
1721 /*
1722  *      vm_page_insert:         [ internal use only ]
1723  *
1724  *      Inserts the given mem entry into the object/object-page
1725  *      table and object list.
1726  *
1727  *      The object must be locked.
1728  */
1729 void
1730 vm_page_insert(
1731         vm_page_t               mem,
1732         vm_object_t             object,
1733         vm_object_offset_t      offset)
1734 {
1735         vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1736 }
1737
1738 void
1739 vm_page_insert_wired(
1740         vm_page_t               mem,
1741         vm_object_t             object,
1742         vm_object_offset_t      offset,
1743         vm_tag_t                tag)
1744 {
1745         vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1746 }
1747
1748 void
1749 vm_page_insert_internal(
1750         vm_page_t               mem,
1751         vm_object_t             object,
1752         vm_object_offset_t      offset,
1753         vm_tag_t                tag,
1754         boolean_t               queues_lock_held,
1755         boolean_t               insert_in_hash,
1756         boolean_t               batch_pmap_op,
1757         boolean_t               batch_accounting,
1758         uint64_t                *delayed_ledger_update)
1759 {
1760         vm_page_bucket_t        *bucket;
1761         lck_spin_t              *bucket_lock;
1762         int                     hash_id;
1763         task_t                  owner;
1764         int                     ledger_idx_volatile;
1765         int                     ledger_idx_nonvolatile;
1766         int                     ledger_idx_volatile_compressed;
1767         int                     ledger_idx_nonvolatile_compressed;
1768         boolean_t               do_footprint;
1769
1770 #if 0
1771         /*
1772          * we may not hold the page queue lock
1773          * so this check isn't safe to make
1774          */
1775         VM_PAGE_CHECK(mem);
1776 #endif
1777
1778         assert(page_aligned(offset));
1779
1780         assert(!VM_PAGE_WIRED(mem) || mem->vmp_private || mem->vmp_fictitious || (tag != VM_KERN_MEMORY_NONE));
1781
1782         /* the vm_submap_object is only a placeholder for submaps */
1783         assert(object != vm_submap_object);
1784
1785         vm_object_lock_assert_exclusive(object);
1786         LCK_MTX_ASSERT(&vm_page_queue_lock,
1787             queues_lock_held ? LCK_MTX_ASSERT_OWNED
1788             : LCK_MTX_ASSERT_NOTOWNED);
1789
1790         if (queues_lock_held == FALSE) {
1791                 assert(!VM_PAGE_PAGEABLE(mem));
1792         }
1793
1794         if (insert_in_hash == TRUE) {
1795 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1796                 if (mem->vmp_tabled || mem->vmp_object) {
1797                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1798                             "already in (obj=%p,off=0x%llx)",
1799                             mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
1800                 }
1801 #endif
1802                 if (object->internal && (offset >= object->vo_size)) {
1803                         panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1804                             mem, object, offset, object->vo_size);
1805                 }
1806
1807                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1808
1809                 /*
1810                  *      Record the object/offset pair in this page
1811                  */
1812
1813                 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1814                 mem->vmp_offset = offset;
1815
1816 #if CONFIG_SECLUDED_MEMORY
1817                 if (object->eligible_for_secluded) {
1818                         vm_page_secluded.eligible_for_secluded++;
1819                 }
1820 #endif /* CONFIG_SECLUDED_MEMORY */
1821
1822                 /*
1823                  *      Insert it into the object_object/offset hash table
1824                  */
1825                 hash_id = vm_page_hash(object, offset);
1826                 bucket = &vm_page_buckets[hash_id];
1827                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1828
1829                 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
1830
1831                 mem->vmp_next_m = bucket->page_list;
1832                 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1833                 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1834
1835 #if     MACH_PAGE_HASH_STATS
1836                 if (++bucket->cur_count > bucket->hi_count) {
1837                         bucket->hi_count = bucket->cur_count;
1838                 }
1839 #endif /* MACH_PAGE_HASH_STATS */
1840                 mem->vmp_hashed = TRUE;
1841                 lck_spin_unlock(bucket_lock);
1842         }
1843
1844         {
1845                 unsigned int    cache_attr;
1846
1847                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1848
1849                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1850                         PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1851                 }
1852         }
1853         /*
1854          *      Now link into the object's list of backed pages.
1855          */
1856         vm_page_queue_enter(&object->memq, mem, vmp_listq);
1857         object->memq_hint = mem;
1858         mem->vmp_tabled = TRUE;
1859
1860         /*
1861          *      Show that the object has one more resident page.
1862          */
1863
1864         object->resident_page_count++;
1865         if (VM_PAGE_WIRED(mem)) {
1866                 assert(mem->vmp_wire_count > 0);
1867                 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1868                 VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1869                 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
1870         }
1871         assert(object->resident_page_count >= object->wired_page_count);
1872
1873         if (batch_accounting == FALSE) {
1874                 if (object->internal) {
1875                         OSAddAtomic(1, &vm_page_internal_count);
1876                 } else {
1877                         OSAddAtomic(1, &vm_page_external_count);
1878                 }
1879         }
1880
1881         /*
1882          * It wouldn't make sense to insert a "reusable" page in
1883          * an object (the page would have been marked "reusable" only
1884          * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1885          * in the object at that time).
1886          * But a page could be inserted in a "all_reusable" object, if
1887          * something faults it in (a vm_read() from another task or a
1888          * "use-after-free" issue in user space, for example).  It can
1889          * also happen if we're relocating a page from that object to
1890          * a different physical page during a physically-contiguous
1891          * allocation.
1892          */
1893         assert(!mem->vmp_reusable);
1894         if (object->all_reusable) {
1895                 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1896         }
1897
1898         if (object->purgable == VM_PURGABLE_DENY &&
1899             !object->vo_ledger_tag) {
1900                 owner = TASK_NULL;
1901         } else {
1902                 owner = VM_OBJECT_OWNER(object);
1903                 vm_object_ledger_tag_ledgers(object,
1904                     &ledger_idx_volatile,
1905                     &ledger_idx_nonvolatile,
1906                     &ledger_idx_volatile_compressed,
1907                     &ledger_idx_nonvolatile_compressed,
1908                     &do_footprint);
1909         }
1910         if (owner &&
1911             (object->purgable == VM_PURGABLE_NONVOLATILE ||
1912             object->purgable == VM_PURGABLE_DENY ||
1913             VM_PAGE_WIRED(mem))) {
1914                 if (delayed_ledger_update) {
1915                         *delayed_ledger_update += PAGE_SIZE;
1916                 } else {
1917                         /* more non-volatile bytes */
1918                         ledger_credit(owner->ledger,
1919                             ledger_idx_nonvolatile,
1920                             PAGE_SIZE);
1921                         if (do_footprint) {
1922                                 /* more footprint */
1923                                 ledger_credit(owner->ledger,
1924                                     task_ledgers.phys_footprint,
1925                                     PAGE_SIZE);
1926                         }
1927                 }
1928         } else if (owner &&
1929             (object->purgable == VM_PURGABLE_VOLATILE ||
1930             object->purgable == VM_PURGABLE_EMPTY)) {
1931                 assert(!VM_PAGE_WIRED(mem));
1932                 /* more volatile bytes */
1933                 ledger_credit(owner->ledger,
1934                     ledger_idx_volatile,
1935                     PAGE_SIZE);
1936         }
1937
1938         if (object->purgable == VM_PURGABLE_VOLATILE) {
1939                 if (VM_PAGE_WIRED(mem)) {
1940                         OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1941                 } else {
1942                         OSAddAtomic(+1, &vm_page_purgeable_count);
1943                 }
1944         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1945             mem->vmp_q_state == VM_PAGE_ON_THROTTLED_Q) {
1946                 /*
1947                  * This page belongs to a purged VM object but hasn't
1948                  * been purged (because it was "busy").
1949                  * It's in the "throttled" queue and hence not
1950                  * visible to vm_pageout_scan().  Move it to a pageable
1951                  * queue, so that it can eventually be reclaimed, instead
1952                  * of lingering in the "empty" object.
1953                  */
1954                 if (queues_lock_held == FALSE) {
1955                         vm_page_lockspin_queues();
1956                 }
1957                 vm_page_deactivate(mem);
1958                 if (queues_lock_held == FALSE) {
1959                         vm_page_unlock_queues();
1960                 }
1961         }
1962
1963 #if VM_OBJECT_TRACKING_OP_MODIFIED
1964         if (vm_object_tracking_inited &&
1965             object->internal &&
1966             object->resident_page_count == 0 &&
1967             object->pager == NULL &&
1968             object->shadow != NULL &&
1969             object->shadow->copy == object) {
1970                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1971                 int numsaved = 0;
1972
1973                 numsaved = OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1974                 btlog_add_entry(vm_object_tracking_btlog,
1975                     object,
1976                     VM_OBJECT_TRACKING_OP_MODIFIED,
1977                     bt,
1978                     numsaved);
1979         }
1980 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1981 }
1982
1983 /*
1984  *      vm_page_replace:
1985  *
1986  *      Exactly like vm_page_insert, except that we first
1987  *      remove any existing page at the given offset in object.
1988  *
1989  *      The object must be locked.
1990  */
1991 void
1992 vm_page_replace(
1993         vm_page_t               mem,
1994         vm_object_t             object,
1995         vm_object_offset_t      offset)
1996 {
1997         vm_page_bucket_t *bucket;
1998         vm_page_t        found_m = VM_PAGE_NULL;
1999         lck_spin_t      *bucket_lock;
2000         int             hash_id;
2001
2002 #if 0
2003         /*
2004          * we don't hold the page queue lock
2005          * so this check isn't safe to make
2006          */
2007         VM_PAGE_CHECK(mem);
2008 #endif
2009         vm_object_lock_assert_exclusive(object);
2010 #if DEBUG || VM_PAGE_CHECK_BUCKETS
2011         if (mem->vmp_tabled || mem->vmp_object) {
2012                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
2013                     "already in (obj=%p,off=0x%llx)",
2014                     mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
2015         }
2016 #endif
2017         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2018
2019         assert(!VM_PAGE_PAGEABLE(mem));
2020
2021         /*
2022          *      Record the object/offset pair in this page
2023          */
2024         mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
2025         mem->vmp_offset = offset;
2026
2027         /*
2028          *      Insert it into the object_object/offset hash table,
2029          *      replacing any page that might have been there.
2030          */
2031
2032         hash_id = vm_page_hash(object, offset);
2033         bucket = &vm_page_buckets[hash_id];
2034         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2035
2036         lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
2037
2038         if (bucket->page_list) {
2039                 vm_page_packed_t *mp = &bucket->page_list;
2040                 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
2041
2042                 do {
2043                         /*
2044                          * compare packed object pointers
2045                          */
2046                         if (m->vmp_object == mem->vmp_object && m->vmp_offset == offset) {
2047                                 /*
2048                                  * Remove old page from hash list
2049                                  */
2050                                 *mp = m->vmp_next_m;
2051                                 m->vmp_hashed = FALSE;
2052                                 m->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
2053
2054                                 found_m = m;
2055                                 break;
2056                         }
2057                         mp = &m->vmp_next_m;
2058                 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
2059
2060                 mem->vmp_next_m = bucket->page_list;
2061         } else {
2062                 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
2063         }
2064         /*
2065          * insert new page at head of hash list
2066          */
2067         bucket->page_list = VM_PAGE_PACK_PTR(mem);
2068         mem->vmp_hashed = TRUE;
2069
2070         lck_spin_unlock(bucket_lock);
2071
2072         if (found_m) {
2073                 /*
2074                  * there was already a page at the specified
2075                  * offset for this object... remove it from
2076                  * the object and free it back to the free list
2077                  */
2078                 vm_page_free_unlocked(found_m, FALSE);
2079         }
2080         vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
2081 }
2082
2083 /*
2084  *      vm_page_remove:         [ internal use only ]
2085  *
2086  *      Removes the given mem entry from the object/offset-page
2087  *      table and the object page list.
2088  *
2089  *      The object must be locked.
2090  */
2091
2092 void
2093 vm_page_remove(
2094         vm_page_t       mem,
2095         boolean_t       remove_from_hash)
2096 {
2097         vm_page_bucket_t *bucket;
2098         vm_page_t       this;
2099         lck_spin_t      *bucket_lock;
2100         int             hash_id;
2101         task_t          owner;
2102         vm_object_t     m_object;
2103         int             ledger_idx_volatile;
2104         int             ledger_idx_nonvolatile;
2105         int             ledger_idx_volatile_compressed;
2106         int             ledger_idx_nonvolatile_compressed;
2107         int             do_footprint;
2108
2109         m_object = VM_PAGE_OBJECT(mem);
2110
2111         vm_object_lock_assert_exclusive(m_object);
2112         assert(mem->vmp_tabled);
2113         assert(!mem->vmp_cleaning);
2114         assert(!mem->vmp_laundry);
2115
2116         if (VM_PAGE_PAGEABLE(mem)) {
2117                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2118         }
2119 #if 0
2120         /*
2121          * we don't hold the page queue lock
2122          * so this check isn't safe to make
2123          */
2124         VM_PAGE_CHECK(mem);
2125 #endif
2126         if (remove_from_hash == TRUE) {
2127                 /*
2128                  *      Remove from the object_object/offset hash table
2129                  */
2130                 hash_id = vm_page_hash(m_object, mem->vmp_offset);
2131                 bucket = &vm_page_buckets[hash_id];
2132                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2133
2134                 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
2135
2136                 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
2137                         /* optimize for common case */
2138
2139                         bucket->page_list = mem->vmp_next_m;
2140                 } else {
2141                         vm_page_packed_t        *prev;
2142
2143                         for (prev = &this->vmp_next_m;
2144                             (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
2145                             prev = &this->vmp_next_m) {
2146                                 continue;
2147                         }
2148                         *prev = this->vmp_next_m;
2149                 }
2150 #if     MACH_PAGE_HASH_STATS
2151                 bucket->cur_count--;
2152 #endif /* MACH_PAGE_HASH_STATS */
2153                 mem->vmp_hashed = FALSE;
2154                 this->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
2155                 lck_spin_unlock(bucket_lock);
2156         }
2157         /*
2158          *      Now remove from the object's list of backed pages.
2159          */
2160
2161         vm_page_remove_internal(mem);
2162
2163         /*
2164          *      And show that the object has one fewer resident
2165          *      page.
2166          */
2167
2168         assert(m_object->resident_page_count > 0);
2169         m_object->resident_page_count--;
2170
2171         if (m_object->internal) {
2172 #if DEBUG
2173                 assert(vm_page_internal_count);
2174 #endif /* DEBUG */
2175
2176                 OSAddAtomic(-1, &vm_page_internal_count);
2177         } else {
2178                 assert(vm_page_external_count);
2179                 OSAddAtomic(-1, &vm_page_external_count);
2180
2181                 if (mem->vmp_xpmapped) {
2182                         assert(vm_page_xpmapped_external_count);
2183                         OSAddAtomic(-1, &vm_page_xpmapped_external_count);
2184                 }
2185         }
2186         if (!m_object->internal &&
2187             m_object->cached_list.next &&
2188             m_object->cached_list.prev) {
2189                 if (m_object->resident_page_count == 0) {
2190                         vm_object_cache_remove(m_object);
2191                 }
2192         }
2193
2194         if (VM_PAGE_WIRED(mem)) {
2195                 assert(mem->vmp_wire_count > 0);
2196                 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
2197                 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
2198                 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
2199         }
2200         assert(m_object->resident_page_count >=
2201             m_object->wired_page_count);
2202         if (mem->vmp_reusable) {
2203                 assert(m_object->reusable_page_count > 0);
2204                 m_object->reusable_page_count--;
2205                 assert(m_object->reusable_page_count <=
2206                     m_object->resident_page_count);
2207                 mem->vmp_reusable = FALSE;
2208                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
2209                 vm_page_stats_reusable.reused_remove++;
2210         } else if (m_object->all_reusable) {
2211                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
2212                 vm_page_stats_reusable.reused_remove++;
2213         }
2214
2215         if (m_object->purgable == VM_PURGABLE_DENY &&
2216             !m_object->vo_ledger_tag) {
2217                 owner = TASK_NULL;
2218         } else {
2219                 owner = VM_OBJECT_OWNER(m_object);
2220                 vm_object_ledger_tag_ledgers(m_object,
2221                     &ledger_idx_volatile,
2222                     &ledger_idx_nonvolatile,
2223                     &ledger_idx_volatile_compressed,
2224                     &ledger_idx_nonvolatile_compressed,
2225                     &do_footprint);
2226         }
2227         if (owner &&
2228             (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
2229             m_object->purgable == VM_PURGABLE_DENY ||
2230             VM_PAGE_WIRED(mem))) {
2231                 /* less non-volatile bytes */
2232                 ledger_debit(owner->ledger,
2233                     ledger_idx_nonvolatile,
2234                     PAGE_SIZE);
2235                 if (do_footprint) {
2236                         /* less footprint */
2237                         ledger_debit(owner->ledger,
2238                             task_ledgers.phys_footprint,
2239                             PAGE_SIZE);
2240                 }
2241         } else if (owner &&
2242             (m_object->purgable == VM_PURGABLE_VOLATILE ||
2243             m_object->purgable == VM_PURGABLE_EMPTY)) {
2244                 assert(!VM_PAGE_WIRED(mem));
2245                 /* less volatile bytes */
2246                 ledger_debit(owner->ledger,
2247                     ledger_idx_volatile,
2248                     PAGE_SIZE);
2249         }
2250         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
2251                 if (VM_PAGE_WIRED(mem)) {
2252                         assert(vm_page_purgeable_wired_count > 0);
2253                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2254                 } else {
2255                         assert(vm_page_purgeable_count > 0);
2256                         OSAddAtomic(-1, &vm_page_purgeable_count);
2257                 }
2258         }
2259
2260         if (m_object->set_cache_attr == TRUE) {
2261                 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
2262         }
2263
2264         mem->vmp_tabled = FALSE;
2265         mem->vmp_object = 0;
2266         mem->vmp_offset = (vm_object_offset_t) -1;
2267 }
2268
2269
2270 /*
2271  *      vm_page_lookup:
2272  *
2273  *      Returns the page associated with the object/offset
2274  *      pair specified; if none is found, VM_PAGE_NULL is returned.
2275  *
2276  *      The object must be locked.  No side effects.
2277  */
2278
2279 #define VM_PAGE_HASH_LOOKUP_THRESHOLD   10
2280
2281 #if DEBUG_VM_PAGE_LOOKUP
2282
2283 struct {
2284         uint64_t        vpl_total;
2285         uint64_t        vpl_empty_obj;
2286         uint64_t        vpl_bucket_NULL;
2287         uint64_t        vpl_hit_hint;
2288         uint64_t        vpl_hit_hint_next;
2289         uint64_t        vpl_hit_hint_prev;
2290         uint64_t        vpl_fast;
2291         uint64_t        vpl_slow;
2292         uint64_t        vpl_hit;
2293         uint64_t        vpl_miss;
2294
2295         uint64_t        vpl_fast_elapsed;
2296         uint64_t        vpl_slow_elapsed;
2297 } vm_page_lookup_stats __attribute__((aligned(8)));
2298
2299 #endif
2300
2301 #define KDP_VM_PAGE_WALK_MAX    1000
2302
2303 vm_page_t
2304 kdp_vm_page_lookup(
2305         vm_object_t             object,
2306         vm_object_offset_t      offset)
2307 {
2308         vm_page_t cur_page;
2309         int num_traversed = 0;
2310
2311         if (not_in_kdp) {
2312                 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
2313         }
2314
2315         vm_page_queue_iterate(&object->memq, cur_page, vmp_listq) {
2316                 if (cur_page->vmp_offset == offset) {
2317                         return cur_page;
2318                 }
2319                 num_traversed++;
2320
2321                 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
2322                         return VM_PAGE_NULL;
2323                 }
2324         }
2325
2326         return VM_PAGE_NULL;
2327 }
2328
2329 vm_page_t
2330 vm_page_lookup(
2331         vm_object_t             object,
2332         vm_object_offset_t      offset)
2333 {
2334         vm_page_t       mem;
2335         vm_page_bucket_t *bucket;
2336         vm_page_queue_entry_t   qe;
2337         lck_spin_t      *bucket_lock = NULL;
2338         int             hash_id;
2339 #if DEBUG_VM_PAGE_LOOKUP
2340         uint64_t        start, elapsed;
2341
2342         OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
2343 #endif
2344         vm_object_lock_assert_held(object);
2345
2346         if (object->resident_page_count == 0) {
2347 #if DEBUG_VM_PAGE_LOOKUP
2348                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2349 #endif
2350                 return VM_PAGE_NULL;
2351         }
2352
2353         mem = object->memq_hint;
2354
2355         if (mem != VM_PAGE_NULL) {
2356                 assert(VM_PAGE_OBJECT(mem) == object);
2357
2358                 if (mem->vmp_offset == offset) {
2359 #if DEBUG_VM_PAGE_LOOKUP
2360                         OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2361 #endif
2362                         return mem;
2363                 }
2364                 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->vmp_listq);
2365
2366                 if (!vm_page_queue_end(&object->memq, qe)) {
2367                         vm_page_t       next_page;
2368
2369                         next_page = (vm_page_t)((uintptr_t)qe);
2370                         assert(VM_PAGE_OBJECT(next_page) == object);
2371
2372                         if (next_page->vmp_offset == offset) {
2373                                 object->memq_hint = next_page; /* new hint */
2374 #if DEBUG_VM_PAGE_LOOKUP
2375                                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2376 #endif
2377                                 return next_page;
2378                         }
2379                 }
2380                 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->vmp_listq);
2381
2382                 if (!vm_page_queue_end(&object->memq, qe)) {
2383                         vm_page_t prev_page;
2384
2385                         prev_page = (vm_page_t)((uintptr_t)qe);
2386                         assert(VM_PAGE_OBJECT(prev_page) == object);
2387
2388                         if (prev_page->vmp_offset == offset) {
2389                                 object->memq_hint = prev_page; /* new hint */
2390 #if DEBUG_VM_PAGE_LOOKUP
2391                                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2392 #endif
2393                                 return prev_page;
2394                         }
2395                 }
2396         }
2397         /*
2398          * Search the hash table for this object/offset pair
2399          */
2400         hash_id = vm_page_hash(object, offset);
2401         bucket = &vm_page_buckets[hash_id];
2402
2403         /*
2404          * since we hold the object lock, we are guaranteed that no
2405          * new pages can be inserted into this object... this in turn
2406          * guarantess that the page we're looking for can't exist
2407          * if the bucket it hashes to is currently NULL even when looked
2408          * at outside the scope of the hash bucket lock... this is a
2409          * really cheap optimiztion to avoid taking the lock
2410          */
2411         if (!bucket->page_list) {
2412 #if DEBUG_VM_PAGE_LOOKUP
2413                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2414 #endif
2415                 return VM_PAGE_NULL;
2416         }
2417
2418 #if DEBUG_VM_PAGE_LOOKUP
2419         start = mach_absolute_time();
2420 #endif
2421         if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
2422                 /*
2423                  * on average, it's roughly 3 times faster to run a short memq list
2424                  * than to take the spin lock and go through the hash list
2425                  */
2426                 mem = (vm_page_t)vm_page_queue_first(&object->memq);
2427
2428                 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2429                         if (mem->vmp_offset == offset) {
2430                                 break;
2431                         }
2432
2433                         mem = (vm_page_t)vm_page_queue_next(&mem->vmp_listq);
2434                 }
2435                 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2436                         mem = NULL;
2437                 }
2438         } else {
2439                 vm_page_object_t        packed_object;
2440
2441                 packed_object = VM_PAGE_PACK_OBJECT(object);
2442
2443                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2444
2445                 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
2446
2447                 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2448                     mem != VM_PAGE_NULL;
2449                     mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m))) {
2450 #if 0
2451                         /*
2452                          * we don't hold the page queue lock
2453                          * so this check isn't safe to make
2454                          */
2455                         VM_PAGE_CHECK(mem);
2456 #endif
2457                         if ((mem->vmp_object == packed_object) && (mem->vmp_offset == offset)) {
2458                                 break;
2459                         }
2460                 }
2461                 lck_spin_unlock(bucket_lock);
2462         }
2463
2464 #if DEBUG_VM_PAGE_LOOKUP
2465         elapsed = mach_absolute_time() - start;
2466
2467         if (bucket_lock) {
2468                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2469                 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2470         } else {
2471                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2472                 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2473         }
2474         if (mem != VM_PAGE_NULL) {
2475                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2476         } else {
2477                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2478         }
2479 #endif
2480         if (mem != VM_PAGE_NULL) {
2481                 assert(VM_PAGE_OBJECT(mem) == object);
2482
2483                 object->memq_hint = mem;
2484         }
2485         return mem;
2486 }
2487
2488
2489 /*
2490  *      vm_page_rename:
2491  *
2492  *      Move the given memory entry from its
2493  *      current object to the specified target object/offset.
2494  *
2495  *      The object must be locked.
2496  */
2497 void
2498 vm_page_rename(
2499         vm_page_t               mem,
2500         vm_object_t             new_object,
2501         vm_object_offset_t      new_offset)
2502 {
2503         boolean_t       internal_to_external, external_to_internal;
2504         vm_tag_t        tag;
2505         vm_object_t     m_object;
2506
2507         m_object = VM_PAGE_OBJECT(mem);
2508
2509         assert(m_object != new_object);
2510         assert(m_object);
2511
2512         /*
2513          *      Changes to mem->vmp_object require the page lock because
2514          *      the pageout daemon uses that lock to get the object.
2515          */
2516         vm_page_lockspin_queues();
2517
2518         internal_to_external = FALSE;
2519         external_to_internal = FALSE;
2520
2521         if (mem->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2522                 /*
2523                  * it's much easier to get the vm_page_pageable_xxx accounting correct
2524                  * if we first move the page to the active queue... it's going to end
2525                  * up there anyway, and we don't do vm_page_rename's frequently enough
2526                  * for this to matter.
2527                  */
2528                 vm_page_queues_remove(mem, FALSE);
2529                 vm_page_activate(mem);
2530         }
2531         if (VM_PAGE_PAGEABLE(mem)) {
2532                 if (m_object->internal && !new_object->internal) {
2533                         internal_to_external = TRUE;
2534                 }
2535                 if (!m_object->internal && new_object->internal) {
2536                         external_to_internal = TRUE;
2537                 }
2538         }
2539
2540         tag = m_object->wire_tag;
2541         vm_page_remove(mem, TRUE);
2542         vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2543
2544         if (internal_to_external) {
2545                 vm_page_pageable_internal_count--;
2546                 vm_page_pageable_external_count++;
2547         } else if (external_to_internal) {
2548                 vm_page_pageable_external_count--;
2549                 vm_page_pageable_internal_count++;
2550         }
2551
2552         vm_page_unlock_queues();
2553 }
2554
2555 /*
2556  *      vm_page_init:
2557  *
2558  *      Initialize the fields in a new page.
2559  *      This takes a structure with random values and initializes it
2560  *      so that it can be given to vm_page_release or vm_page_insert.
2561  */
2562 void
2563 vm_page_init(
2564         vm_page_t mem,
2565         ppnum_t   phys_page,
2566         boolean_t lopage)
2567 {
2568         uint_t    i;
2569         uintptr_t *p;
2570
2571         assert(phys_page);
2572
2573 #if DEBUG
2574         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2575                 if (!(pmap_valid_page(phys_page))) {
2576                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2577                 }
2578         }
2579 #endif /* DEBUG */
2580
2581         /*
2582          * Initialize the fields of the vm_page. If adding any new fields to vm_page,
2583          * try to use initial values which match 0. This minimizes the number of writes
2584          * needed for boot-time initialization.
2585          *
2586          * Kernel bzero() isn't an inline yet, so do it by hand for performance.
2587          */
2588         assert(VM_PAGE_NOT_ON_Q == 0);
2589         assert(sizeof(*mem) % sizeof(uintptr_t) == 0);
2590         for (p = (uintptr_t *)(void *)mem, i = sizeof(*mem) / sizeof(uintptr_t); i != 0; --i) {
2591                 *p++ = 0;
2592         }
2593         mem->vmp_offset = (vm_object_offset_t)-1;
2594         mem->vmp_busy = TRUE;
2595         mem->vmp_lopage = lopage;
2596
2597         VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2598 #if 0
2599         /*
2600          * we're leaving this turned off for now... currently pages
2601          * come off the free list and are either immediately dirtied/referenced
2602          * due to zero-fill or COW faults, or are used to read or write files...
2603          * in the file I/O case, the UPL mechanism takes care of clearing
2604          * the state of the HW ref/mod bits in a somewhat fragile way.
2605          * Since we may change the way this works in the future (to toughen it up),
2606          * I'm leaving this as a reminder of where these bits could get cleared
2607          */
2608
2609         /*
2610          * make sure both the h/w referenced and modified bits are
2611          * clear at this point... we are especially dependent on
2612          * not finding a 'stale' h/w modified in a number of spots
2613          * once this page goes back into use
2614          */
2615         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2616 #endif
2617 }
2618
2619 /*
2620  *      vm_page_grab_fictitious:
2621  *
2622  *      Remove a fictitious page from the free list.
2623  *      Returns VM_PAGE_NULL if there are no free pages.
2624  */
2625 int     c_vm_page_grab_fictitious = 0;
2626 int     c_vm_page_grab_fictitious_failed = 0;
2627 int     c_vm_page_release_fictitious = 0;
2628 int     c_vm_page_more_fictitious = 0;
2629
2630 vm_page_t
2631 vm_page_grab_fictitious_common(
2632         ppnum_t phys_addr)
2633 {
2634         vm_page_t       m;
2635
2636         if ((m = (vm_page_t)zget(vm_page_zone))) {
2637                 vm_page_init(m, phys_addr, FALSE);
2638                 m->vmp_fictitious = TRUE;
2639
2640                 c_vm_page_grab_fictitious++;
2641         } else {
2642                 c_vm_page_grab_fictitious_failed++;
2643         }
2644
2645         return m;
2646 }
2647
2648 vm_page_t
2649 vm_page_grab_fictitious(void)
2650 {
2651         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2652 }
2653
2654 int vm_guard_count;
2655
2656
2657 vm_page_t
2658 vm_page_grab_guard(void)
2659 {
2660         vm_page_t page;
2661         page = vm_page_grab_fictitious_common(vm_page_guard_addr);
2662         if (page) {
2663                 OSAddAtomic(1, &vm_guard_count);
2664         }
2665         return page;
2666 }
2667
2668
2669 /*
2670  *      vm_page_release_fictitious:
2671  *
2672  *      Release a fictitious page to the zone pool
2673  */
2674 void
2675 vm_page_release_fictitious(
2676         vm_page_t m)
2677 {
2678         assert((m->vmp_q_state == VM_PAGE_NOT_ON_Q) || (m->vmp_q_state == VM_PAGE_IS_WIRED));
2679         assert(m->vmp_fictitious);
2680         assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2681             VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2682
2683
2684         if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {
2685                 OSAddAtomic(-1, &vm_guard_count);
2686         }
2687
2688         c_vm_page_release_fictitious++;
2689
2690         zfree(vm_page_zone, m);
2691 }
2692
2693 /*
2694  *      vm_page_more_fictitious:
2695  *
2696  *      Add more fictitious pages to the zone.
2697  *      Allowed to block. This routine is way intimate
2698  *      with the zones code, for several reasons:
2699  *      1. we need to carve some page structures out of physical
2700  *         memory before zones work, so they _cannot_ come from
2701  *         the zone_map.
2702  *      2. the zone needs to be collectable in order to prevent
2703  *         growth without bound. These structures are used by
2704  *         the device pager (by the hundreds and thousands), as
2705  *         private pages for pageout, and as blocking pages for
2706  *         pagein. Temporary bursts in demand should not result in
2707  *         permanent allocation of a resource.
2708  *      3. To smooth allocation humps, we allocate single pages
2709  *         with kernel_memory_allocate(), and cram them into the
2710  *         zone.
2711  */
2712
2713 void
2714 vm_page_more_fictitious(void)
2715 {
2716         vm_offset_t     addr;
2717         kern_return_t   retval;
2718
2719         c_vm_page_more_fictitious++;
2720
2721         /*
2722          * Allocate a single page from the zone_map. Do not wait if no physical
2723          * pages are immediately available, and do not zero the space. We need
2724          * our own blocking lock here to prevent having multiple,
2725          * simultaneous requests from piling up on the zone_map lock. Exactly
2726          * one (of our) threads should be potentially waiting on the map lock.
2727          * If winner is not vm-privileged, then the page allocation will fail,
2728          * and it will temporarily block here in the vm_page_wait().
2729          */
2730         lck_mtx_lock(&vm_page_alloc_lock);
2731         /*
2732          * If another thread allocated space, just bail out now.
2733          */
2734         if (zone_free_count(vm_page_zone) > 5) {
2735                 /*
2736                  * The number "5" is a small number that is larger than the
2737                  * number of fictitious pages that any single caller will
2738                  * attempt to allocate. Otherwise, a thread will attempt to
2739                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
2740                  * release all of the resources and locks already acquired,
2741                  * and then call this routine. This routine finds the pages
2742                  * that the caller released, so fails to allocate new space.
2743                  * The process repeats infinitely. The largest known number
2744                  * of fictitious pages required in this manner is 2. 5 is
2745                  * simply a somewhat larger number.
2746                  */
2747                 lck_mtx_unlock(&vm_page_alloc_lock);
2748                 return;
2749         }
2750
2751         retval = kernel_memory_allocate(zone_map,
2752             &addr, PAGE_SIZE, 0,
2753             KMA_KOBJECT | KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2754         if (retval != KERN_SUCCESS) {
2755                 /*
2756                  * No page was available. Drop the
2757                  * lock to give another thread a chance at it, and
2758                  * wait for the pageout daemon to make progress.
2759                  */
2760                 lck_mtx_unlock(&vm_page_alloc_lock);
2761                 vm_page_wait(THREAD_UNINT);
2762                 return;
2763         }
2764
2765         zcram(vm_page_zone, addr, PAGE_SIZE);
2766
2767         lck_mtx_unlock(&vm_page_alloc_lock);
2768 }
2769
2770
2771 /*
2772  *      vm_pool_low():
2773  *
2774  *      Return true if it is not likely that a non-vm_privileged thread
2775  *      can get memory without blocking.  Advisory only, since the
2776  *      situation may change under us.
2777  */
2778 int
2779 vm_pool_low(void)
2780 {
2781         /* No locking, at worst we will fib. */
2782         return vm_page_free_count <= vm_page_free_reserved;
2783 }
2784
2785 boolean_t vm_darkwake_mode = FALSE;
2786
2787 /*
2788  * vm_update_darkwake_mode():
2789  *
2790  * Tells the VM that the system is in / out of darkwake.
2791  *
2792  * Today, the VM only lowers/raises the background queue target
2793  * so as to favor consuming more/less background pages when
2794  * darwake is ON/OFF.
2795  *
2796  * We might need to do more things in the future.
2797  */
2798
2799 void
2800 vm_update_darkwake_mode(boolean_t darkwake_mode)
2801 {
2802         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2803
2804         vm_page_lockspin_queues();
2805
2806         if (vm_darkwake_mode == darkwake_mode) {
2807                 /*
2808                  * No change.
2809                  */
2810                 vm_page_unlock_queues();
2811                 return;
2812         }
2813
2814         vm_darkwake_mode = darkwake_mode;
2815
2816         if (vm_darkwake_mode == TRUE) {
2817 #if CONFIG_BACKGROUND_QUEUE
2818
2819                 /* save background target to restore later */
2820                 vm_page_background_target_snapshot = vm_page_background_target;
2821
2822                 /* target is set to 0...no protection for background pages */
2823                 vm_page_background_target = 0;
2824
2825 #endif /* CONFIG_BACKGROUND_QUEUE */
2826         } else if (vm_darkwake_mode == FALSE) {
2827 #if CONFIG_BACKGROUND_QUEUE
2828
2829                 if (vm_page_background_target_snapshot) {
2830                         vm_page_background_target = vm_page_background_target_snapshot;
2831                 }
2832 #endif /* CONFIG_BACKGROUND_QUEUE */
2833         }
2834         vm_page_unlock_queues();
2835 }
2836
2837 #if CONFIG_BACKGROUND_QUEUE
2838
2839 void
2840 vm_page_update_background_state(vm_page_t mem)
2841 {
2842         if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
2843                 return;
2844         }
2845
2846         if (mem->vmp_in_background == FALSE) {
2847                 return;
2848         }
2849
2850         task_t  my_task = current_task();
2851
2852         if (my_task) {
2853                 if (task_get_darkwake_mode(my_task)) {
2854                         return;
2855                 }
2856         }
2857
2858 #if BACKGROUNDQ_BASED_ON_QOS
2859         if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY) {
2860                 return;
2861         }
2862 #else
2863         if (my_task) {
2864                 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG)) {
2865                         return;
2866                 }
2867         }
2868 #endif
2869         vm_page_lockspin_queues();
2870
2871         mem->vmp_in_background = FALSE;
2872         vm_page_background_promoted_count++;
2873
2874         vm_page_remove_from_backgroundq(mem);
2875
2876         vm_page_unlock_queues();
2877 }
2878
2879
2880 void
2881 vm_page_assign_background_state(vm_page_t mem)
2882 {
2883         if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
2884                 return;
2885         }
2886
2887         task_t  my_task = current_task();
2888
2889         if (my_task) {
2890                 if (task_get_darkwake_mode(my_task)) {
2891                         mem->vmp_in_background = TRUE;
2892                         return;
2893                 }
2894         }
2895
2896 #if BACKGROUNDQ_BASED_ON_QOS
2897         if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY) {
2898                 mem->vmp_in_background = TRUE;
2899         } else {
2900                 mem->vmp_in_background = FALSE;
2901         }
2902 #else
2903         if (my_task) {
2904                 mem->vmp_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2905         }
2906 #endif
2907 }
2908
2909
2910 void
2911 vm_page_remove_from_backgroundq(
2912         vm_page_t       mem)
2913 {
2914         vm_object_t     m_object;
2915
2916         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2917
2918         if (mem->vmp_on_backgroundq) {
2919                 vm_page_queue_remove(&vm_page_queue_background, mem, vmp_backgroundq);
2920
2921                 mem->vmp_backgroundq.next = 0;
2922                 mem->vmp_backgroundq.prev = 0;
2923                 mem->vmp_on_backgroundq = FALSE;
2924
2925                 vm_page_background_count--;
2926
2927                 m_object = VM_PAGE_OBJECT(mem);
2928
2929                 if (m_object->internal) {
2930                         vm_page_background_internal_count--;
2931                 } else {
2932                         vm_page_background_external_count--;
2933                 }
2934         } else {
2935                 assert(VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.next) == (uintptr_t)NULL &&
2936                     VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.prev) == (uintptr_t)NULL);
2937         }
2938 }
2939
2940
2941 void
2942 vm_page_add_to_backgroundq(
2943         vm_page_t       mem,
2944         boolean_t       first)
2945 {
2946         vm_object_t     m_object;
2947
2948         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2949
2950         if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
2951                 return;
2952         }
2953
2954         if (mem->vmp_on_backgroundq == FALSE) {
2955                 m_object = VM_PAGE_OBJECT(mem);
2956
2957                 if (vm_page_background_exclude_external && !m_object->internal) {
2958                         return;
2959                 }
2960
2961                 if (first == TRUE) {
2962                         vm_page_queue_enter_first(&vm_page_queue_background, mem, vmp_backgroundq);
2963                 } else {
2964                         vm_page_queue_enter(&vm_page_queue_background, mem, vmp_backgroundq);
2965                 }
2966                 mem->vmp_on_backgroundq = TRUE;
2967
2968                 vm_page_background_count++;
2969
2970                 if (m_object->internal) {
2971                         vm_page_background_internal_count++;
2972                 } else {
2973                         vm_page_background_external_count++;
2974                 }
2975         }
2976 }
2977
2978 #endif /* CONFIG_BACKGROUND_QUEUE */
2979
2980 /*
2981  * This can be switched to FALSE to help debug drivers
2982  * that are having problems with memory > 4G.
2983  */
2984 boolean_t       vm_himemory_mode = TRUE;
2985
2986 /*
2987  * this interface exists to support hardware controllers
2988  * incapable of generating DMAs with more than 32 bits
2989  * of address on platforms with physical memory > 4G...
2990  */
2991 unsigned int    vm_lopages_allocated_q = 0;
2992 unsigned int    vm_lopages_allocated_cpm_success = 0;
2993 unsigned int    vm_lopages_allocated_cpm_failed = 0;
2994 vm_page_queue_head_t    vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2995
2996 vm_page_t
2997 vm_page_grablo(void)
2998 {
2999         vm_page_t       mem;
3000
3001         if (vm_lopage_needed == FALSE) {
3002                 return vm_page_grab();
3003         }
3004
3005         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3006
3007         if (!vm_page_queue_empty(&vm_lopage_queue_free)) {
3008                 vm_page_queue_remove_first(&vm_lopage_queue_free, mem, vmp_pageq);
3009                 assert(vm_lopage_free_count);
3010                 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
3011                 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3012
3013                 vm_lopage_free_count--;
3014                 vm_lopages_allocated_q++;
3015
3016                 if (vm_lopage_free_count < vm_lopage_lowater) {
3017                         vm_lopage_refill = TRUE;
3018                 }
3019
3020                 lck_mtx_unlock(&vm_page_queue_free_lock);
3021
3022 #if CONFIG_BACKGROUND_QUEUE
3023                 vm_page_assign_background_state(mem);
3024 #endif
3025         } else {
3026                 lck_mtx_unlock(&vm_page_queue_free_lock);
3027
3028                 if (cpm_allocate(PAGE_SIZE, &mem, atop(PPNUM_MAX), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
3029                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3030                         vm_lopages_allocated_cpm_failed++;
3031                         lck_mtx_unlock(&vm_page_queue_free_lock);
3032
3033                         return VM_PAGE_NULL;
3034                 }
3035                 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3036
3037                 mem->vmp_busy = TRUE;
3038
3039                 vm_page_lockspin_queues();
3040
3041                 mem->vmp_gobbled = FALSE;
3042                 vm_page_gobble_count--;
3043                 vm_page_wire_count--;
3044
3045                 vm_lopages_allocated_cpm_success++;
3046                 vm_page_unlock_queues();
3047         }
3048         assert(mem->vmp_busy);
3049         assert(!mem->vmp_pmapped);
3050         assert(!mem->vmp_wpmapped);
3051         assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
3052
3053         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3054
3055         disable_preemption();
3056         PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
3057         VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, 0, 1, 0, 0);
3058         enable_preemption();
3059
3060         return mem;
3061 }
3062
3063
3064 /*
3065  *      vm_page_grab:
3066  *
3067  *      first try to grab a page from the per-cpu free list...
3068  *      this must be done while pre-emption is disabled... if
3069  *      a page is available, we're done...
3070  *      if no page is available, grab the vm_page_queue_free_lock
3071  *      and see if current number of free pages would allow us
3072  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
3073  *      if there are pages available, disable preemption and
3074  *      recheck the state of the per-cpu free list... we could
3075  *      have been preempted and moved to a different cpu, or
3076  *      some other thread could have re-filled it... if still
3077  *      empty, figure out how many pages we can steal from the
3078  *      global free queue and move to the per-cpu queue...
3079  *      return 1 of these pages when done... only wakeup the
3080  *      pageout_scan thread if we moved pages from the global
3081  *      list... no need for the wakeup if we've satisfied the
3082  *      request from the per-cpu queue.
3083  */
3084
3085 #if CONFIG_SECLUDED_MEMORY
3086 vm_page_t vm_page_grab_secluded(void);
3087 #endif /* CONFIG_SECLUDED_MEMORY */
3088
3089 static inline void
3090 vm_page_grab_diags(void);
3091
3092 vm_page_t
3093 vm_page_grab(void)
3094 {
3095         return vm_page_grab_options(VM_PAGE_GRAB_OPTIONS_NONE);
3096 }
3097
3098 #if HIBERNATION
3099 boolean_t       hibernate_rebuild_needed = FALSE;
3100 #endif /* HIBERNATION */
3101
3102 vm_page_t
3103 vm_page_grab_options(
3104         int grab_options)
3105 {
3106         vm_page_t       mem;
3107
3108         disable_preemption();
3109
3110         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
3111 return_page_from_cpu_list:
3112                 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
3113
3114 #if HIBERNATION
3115                 if (hibernate_rebuild_needed) {
3116                         panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
3117                 }
3118 #endif /* HIBERNATION */
3119
3120                 vm_page_grab_diags();
3121                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
3122                 PROCESSOR_DATA(current_processor(), free_pages) = mem->vmp_snext;
3123                 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
3124
3125                 enable_preemption();
3126                 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3127                 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3128
3129                 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3130                 assert(mem->vmp_tabled == FALSE);
3131                 assert(mem->vmp_object == 0);
3132                 assert(!mem->vmp_laundry);
3133                 ASSERT_PMAP_FREE(mem);
3134                 assert(mem->vmp_busy);
3135                 assert(!mem->vmp_pmapped);
3136                 assert(!mem->vmp_wpmapped);
3137                 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
3138
3139 #if CONFIG_BACKGROUND_QUEUE
3140                 vm_page_assign_background_state(mem);
3141 #endif
3142                 return mem;
3143         }
3144         enable_preemption();
3145
3146
3147         /*
3148          *      Optionally produce warnings if the wire or gobble
3149          *      counts exceed some threshold.
3150          */
3151 #if VM_PAGE_WIRE_COUNT_WARNING
3152         if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
3153                 printf("mk: vm_page_grab(): high wired page count of %d\n",
3154                     vm_page_wire_count);
3155         }
3156 #endif
3157 #if VM_PAGE_GOBBLE_COUNT_WARNING
3158         if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
3159                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
3160                     vm_page_gobble_count);
3161         }
3162 #endif
3163
3164         /*
3165          * If free count is low and we have delayed pages from early boot,
3166          * get one of those instead.
3167          */
3168         if (__improbable(vm_delayed_count > 0 &&
3169             vm_page_free_count <= vm_page_free_target &&
3170             (mem = vm_get_delayed_page(grab_options)) != NULL)) {
3171                 return mem;
3172         }
3173
3174         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3175
3176         /*
3177          *      Only let privileged threads (involved in pageout)
3178          *      dip into the reserved pool.
3179          */
3180         if ((vm_page_free_count < vm_page_free_reserved) &&
3181             !(current_thread()->options & TH_OPT_VMPRIV)) {
3182                 /* no page for us in the free queue... */
3183                 lck_mtx_unlock(&vm_page_queue_free_lock);
3184                 mem = VM_PAGE_NULL;
3185
3186 #if CONFIG_SECLUDED_MEMORY
3187                 /* ... but can we try and grab from the secluded queue? */
3188                 if (vm_page_secluded_count > 0 &&
3189                     ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
3190                     task_can_use_secluded_mem(current_task(), TRUE))) {
3191                         mem = vm_page_grab_secluded();
3192                         if (grab_options & VM_PAGE_GRAB_SECLUDED) {
3193                                 vm_page_secluded.grab_for_iokit++;
3194                                 if (mem) {
3195                                         vm_page_secluded.grab_for_iokit_success++;
3196                                 }
3197                         }
3198                         if (mem) {
3199                                 VM_CHECK_MEMORYSTATUS;
3200
3201                                 disable_preemption();
3202                                 vm_page_grab_diags();
3203                                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
3204                                 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
3205                                 enable_preemption();
3206
3207                                 return mem;
3208                         }
3209                 }
3210 #else /* CONFIG_SECLUDED_MEMORY */
3211                 (void) grab_options;
3212 #endif /* CONFIG_SECLUDED_MEMORY */
3213         } else {
3214                 vm_page_t        head;
3215                 vm_page_t        tail;
3216                 unsigned int     pages_to_steal;
3217                 unsigned int     color;
3218                 unsigned int clump_end, sub_count;
3219
3220                 while (vm_page_free_count == 0) {
3221                         lck_mtx_unlock(&vm_page_queue_free_lock);
3222                         /*
3223                          * must be a privileged thread to be
3224                          * in this state since a non-privileged
3225                          * thread would have bailed if we were
3226                          * under the vm_page_free_reserved mark
3227                          */
3228                         VM_PAGE_WAIT();
3229                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3230                 }
3231
3232                 disable_preemption();
3233
3234                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
3235                         lck_mtx_unlock(&vm_page_queue_free_lock);
3236
3237                         /*
3238                          * we got preempted and moved to another processor
3239                          * or we got preempted and someone else ran and filled the cache
3240                          */
3241                         goto return_page_from_cpu_list;
3242                 }
3243                 if (vm_page_free_count <= vm_page_free_reserved) {
3244                         pages_to_steal = 1;
3245                 } else {
3246                         if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved)) {
3247                                 pages_to_steal = vm_free_magazine_refill_limit;
3248                         } else {
3249                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
3250                         }
3251                 }
3252                 color = PROCESSOR_DATA(current_processor(), start_color);
3253                 head = tail = NULL;
3254
3255                 vm_page_free_count -= pages_to_steal;
3256                 clump_end = sub_count = 0;
3257
3258                 while (pages_to_steal--) {
3259                         while (vm_page_queue_empty(&vm_page_queue_free[color].qhead)) {
3260                                 color = (color + 1) & vm_color_mask;
3261                         }
3262 #if defined(__x86_64__)
3263                         vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
3264                             mem, clump_end);
3265 #else
3266                         vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
3267                             mem, vmp_pageq);
3268 #endif
3269
3270                         assert(mem->vmp_q_state == VM_PAGE_ON_FREE_Q);
3271
3272                         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3273
3274 #if defined(__arm__) || defined(__arm64__)
3275                         color = (color + 1) & vm_color_mask;
3276 #else
3277
3278 #if DEVELOPMENT || DEBUG
3279
3280                         sub_count++;
3281                         if (clump_end) {
3282                                 vm_clump_update_stats(sub_count);
3283                                 sub_count = 0;
3284                                 color = (color + 1) & vm_color_mask;
3285                         }
3286 #else
3287                         if (clump_end) {
3288                                 color = (color + 1) & vm_color_mask;
3289                         }
3290
3291 #endif /* if DEVELOPMENT || DEBUG */
3292
3293 #endif  /* if defined(__arm__) || defined(__arm64__) */
3294
3295                         if (head == NULL) {
3296                                 head = mem;
3297                         } else {
3298                                 tail->vmp_snext = mem;
3299                         }
3300                         tail = mem;
3301
3302                         assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3303                         assert(mem->vmp_tabled == FALSE);
3304                         assert(mem->vmp_object == 0);
3305                         assert(!mem->vmp_laundry);
3306
3307                         mem->vmp_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
3308
3309                         ASSERT_PMAP_FREE(mem);
3310                         assert(mem->vmp_busy);
3311                         assert(!mem->vmp_pmapped);
3312                         assert(!mem->vmp_wpmapped);
3313                         assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
3314                 }
3315 #if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
3316                 vm_clump_update_stats(sub_count);
3317 #endif
3318                 lck_mtx_unlock(&vm_page_queue_free_lock);
3319
3320 #if HIBERNATION
3321                 if (hibernate_rebuild_needed) {
3322                         panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
3323                 }
3324 #endif /* HIBERNATION */
3325                 PROCESSOR_DATA(current_processor(), free_pages) = head->vmp_snext;
3326                 PROCESSOR_DATA(current_processor(), start_color) = color;
3327
3328                 /*
3329                  * satisfy this request
3330                  */
3331                 vm_page_grab_diags();
3332                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
3333                 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
3334                 mem = head;
3335                 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
3336
3337                 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3338                 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3339
3340                 enable_preemption();
3341         }
3342         /*
3343          *      Decide if we should poke the pageout daemon.
3344          *      We do this if the free count is less than the low
3345          *      water mark. VM Pageout Scan will keep running till
3346          *      the free_count > free_target (& hence above free_min).
3347          *      This wakeup is to catch the possibility of the counts
3348          *      dropping between VM Pageout Scan parking and this check.
3349          *
3350          *      We don't have the counts locked ... if they change a little,
3351          *      it doesn't really matter.
3352          */
3353         if (vm_page_free_count < vm_page_free_min) {
3354                 lck_mtx_lock(&vm_page_queue_free_lock);
3355                 if (vm_pageout_running == FALSE) {
3356                         lck_mtx_unlock(&vm_page_queue_free_lock);
3357                         thread_wakeup((event_t) &vm_page_free_wanted);
3358                 } else {
3359                         lck_mtx_unlock(&vm_page_queue_free_lock);
3360                 }
3361         }
3362
3363         VM_CHECK_MEMORYSTATUS;
3364
3365         if (mem) {
3366 //              dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4);  /* (TEST/DEBUG) */
3367
3368 #if CONFIG_BACKGROUND_QUEUE
3369                 vm_page_assign_background_state(mem);
3370 #endif
3371         }
3372         return mem;
3373 }
3374
3375 #if CONFIG_SECLUDED_MEMORY
3376 vm_page_t
3377 vm_page_grab_secluded(void)
3378 {
3379         vm_page_t       mem;
3380         vm_object_t     object;
3381         int             refmod_state;
3382
3383         if (vm_page_secluded_count == 0) {
3384                 /* no secluded pages to grab... */
3385                 return VM_PAGE_NULL;
3386         }
3387
3388         /* secluded queue is protected by the VM page queue lock */
3389         vm_page_lock_queues();
3390
3391         if (vm_page_secluded_count == 0) {
3392                 /* no secluded pages to grab... */
3393                 vm_page_unlock_queues();
3394                 return VM_PAGE_NULL;
3395         }
3396
3397 #if 00
3398         /* can we grab from the secluded queue? */
3399         if (vm_page_secluded_count > vm_page_secluded_target ||
3400             (vm_page_secluded_count > 0 &&
3401             task_can_use_secluded_mem(current_task(), TRUE))) {
3402                 /* OK */
3403         } else {
3404                 /* can't grab from secluded queue... */
3405                 vm_page_unlock_queues();
3406                 return VM_PAGE_NULL;
3407         }
3408 #endif
3409
3410         /* we can grab a page from secluded queue! */
3411         assert((vm_page_secluded_count_free +
3412             vm_page_secluded_count_inuse) ==
3413             vm_page_secluded_count);
3414         if (current_task()->task_can_use_secluded_mem) {
3415                 assert(num_tasks_can_use_secluded_mem > 0);
3416         }
3417         assert(!vm_page_queue_empty(&vm_page_queue_secluded));
3418         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3419         mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
3420         assert(mem->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
3421         vm_page_queues_remove(mem, TRUE);
3422
3423         object = VM_PAGE_OBJECT(mem);
3424
3425         assert(!mem->vmp_fictitious);
3426         assert(!VM_PAGE_WIRED(mem));
3427         if (object == VM_OBJECT_NULL) {
3428                 /* free for grab! */
3429                 vm_page_unlock_queues();
3430                 vm_page_secluded.grab_success_free++;
3431
3432                 assert(mem->vmp_busy);
3433                 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3434                 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3435                 assert(mem->vmp_pageq.next == 0);
3436                 assert(mem->vmp_pageq.prev == 0);
3437                 assert(mem->vmp_listq.next == 0);
3438                 assert(mem->vmp_listq.prev == 0);
3439 #if CONFIG_BACKGROUND_QUEUE
3440                 assert(mem->vmp_on_backgroundq == 0);
3441                 assert(mem->vmp_backgroundq.next == 0);
3442                 assert(mem->vmp_backgroundq.prev == 0);
3443 #endif /* CONFIG_BACKGROUND_QUEUE */
3444                 return mem;
3445         }
3446
3447         assert(!object->internal);
3448 //      vm_page_pageable_external_count--;
3449
3450         if (!vm_object_lock_try(object)) {
3451 //              printf("SECLUDED: page %p: object %p locked\n", mem, object);
3452                 vm_page_secluded.grab_failure_locked++;
3453 reactivate_secluded_page:
3454                 vm_page_activate(mem);
3455                 vm_page_unlock_queues();
3456                 return VM_PAGE_NULL;
3457         }
3458         if (mem->vmp_busy ||
3459             mem->vmp_cleaning ||
3460             mem->vmp_laundry) {
3461                 /* can't steal page in this state... */
3462                 vm_object_unlock(object);
3463                 vm_page_secluded.grab_failure_state++;
3464                 goto reactivate_secluded_page;
3465         }
3466
3467         mem->vmp_busy = TRUE;
3468         refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3469         if (refmod_state & VM_MEM_REFERENCED) {
3470                 mem->vmp_reference = TRUE;
3471         }
3472         if (refmod_state & VM_MEM_MODIFIED) {
3473                 SET_PAGE_DIRTY(mem, FALSE);
3474         }
3475         if (mem->vmp_dirty || mem->vmp_precious) {
3476                 /* can't grab a dirty page; re-activate */
3477 //              printf("SECLUDED: dirty page %p\n", mem);
3478                 PAGE_WAKEUP_DONE(mem);
3479                 vm_page_secluded.grab_failure_dirty++;
3480                 vm_object_unlock(object);
3481                 goto reactivate_secluded_page;
3482         }
3483         if (mem->vmp_reference) {
3484                 /* it's been used but we do need to grab a page... */
3485         }
3486
3487         vm_page_unlock_queues();
3488
3489         /* finish what vm_page_free() would have done... */
3490         vm_page_free_prepare_object(mem, TRUE);
3491         vm_object_unlock(object);
3492         object = VM_OBJECT_NULL;
3493         if (vm_page_free_verify) {
3494                 ASSERT_PMAP_FREE(mem);
3495         }
3496         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3497         vm_page_secluded.grab_success_other++;
3498
3499         assert(mem->vmp_busy);
3500         assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3501         assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3502         assert(mem->vmp_pageq.next == 0);
3503         assert(mem->vmp_pageq.prev == 0);
3504         assert(mem->vmp_listq.next == 0);
3505         assert(mem->vmp_listq.prev == 0);
3506 #if CONFIG_BACKGROUND_QUEUE
3507         assert(mem->vmp_on_backgroundq == 0);
3508         assert(mem->vmp_backgroundq.next == 0);
3509         assert(mem->vmp_backgroundq.prev == 0);
3510 #endif /* CONFIG_BACKGROUND_QUEUE */
3511
3512         return mem;
3513 }
3514
3515 uint64_t
3516 vm_page_secluded_drain(void)
3517 {
3518         vm_page_t local_freeq;
3519         int local_freed;
3520         uint64_t num_reclaimed;
3521         unsigned int saved_secluded_count, saved_secluded_target;
3522
3523         num_reclaimed = 0;
3524         local_freeq = NULL;
3525         local_freed = 0;
3526
3527         vm_page_lock_queues();
3528
3529         saved_secluded_count = vm_page_secluded_count;
3530         saved_secluded_target = vm_page_secluded_target;
3531         vm_page_secluded_target = 0;
3532         VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3533         while (vm_page_secluded_count) {
3534                 vm_page_t secluded_page;
3535
3536                 assert((vm_page_secluded_count_free +
3537                     vm_page_secluded_count_inuse) ==
3538                     vm_page_secluded_count);
3539                 secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
3540                 assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
3541
3542                 vm_page_queues_remove(secluded_page, FALSE);
3543                 assert(!secluded_page->vmp_fictitious);
3544                 assert(!VM_PAGE_WIRED(secluded_page));
3545
3546                 if (secluded_page->vmp_object == 0) {
3547                         /* transfer to free queue */
3548                         assert(secluded_page->vmp_busy);
3549                         secluded_page->vmp_snext = local_freeq;
3550                         local_freeq = secluded_page;
3551                         local_freed += 1;
3552                 } else {
3553                         /* transfer to head of active queue */
3554                         vm_page_enqueue_active(secluded_page, FALSE);
3555                         secluded_page = VM_PAGE_NULL;
3556                 }
3557                 num_reclaimed++;
3558         }
3559         vm_page_secluded_target = saved_secluded_target;
3560         VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3561
3562 //      printf("FBDP %s:%d secluded_count %d->%d, target %d, reclaimed %lld\n", __FUNCTION__, __LINE__, saved_secluded_count, vm_page_secluded_count, vm_page_secluded_target, num_reclaimed);
3563
3564         vm_page_unlock_queues();
3565
3566         if (local_freed) {
3567                 vm_page_free_list(local_freeq, TRUE);
3568                 local_freeq = NULL;
3569                 local_freed = 0;
3570         }
3571
3572         return num_reclaimed;
3573 }
3574 #endif /* CONFIG_SECLUDED_MEMORY */
3575
3576
3577 static inline void
3578 vm_page_grab_diags()
3579 {
3580 #if DEVELOPMENT || DEBUG
3581         task_t task = current_task();
3582         if (task == NULL) {
3583                 return;
3584         }
3585
3586         ledger_credit(task->ledger, task_ledgers.pages_grabbed, 1);
3587 #endif /* DEVELOPMENT || DEBUG */
3588 }
3589
3590 /*
3591  *      vm_page_release:
3592  *
3593  *      Return a page to the free list.
3594  */
3595
3596 void
3597 vm_page_release(
3598         vm_page_t       mem,
3599         boolean_t       page_queues_locked)
3600 {
3601         unsigned int    color;
3602         int     need_wakeup = 0;
3603         int     need_priv_wakeup = 0;
3604 #if CONFIG_SECLUDED_MEMORY
3605         int     need_secluded_wakeup = 0;
3606 #endif /* CONFIG_SECLUDED_MEMORY */
3607         event_t wakeup_event = NULL;
3608
3609         if (page_queues_locked) {
3610                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3611         } else {
3612                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3613         }
3614
3615         assert(!mem->vmp_private && !mem->vmp_fictitious);
3616         if (vm_page_free_verify) {
3617                 ASSERT_PMAP_FREE(mem);
3618         }
3619 //      dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5);  /* (TEST/DEBUG) */
3620
3621         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3622
3623         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3624
3625         assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3626         assert(mem->vmp_busy);
3627         assert(!mem->vmp_laundry);
3628         assert(mem->vmp_object == 0);
3629         assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
3630         assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3631 #if CONFIG_BACKGROUND_QUEUE
3632         assert(mem->vmp_backgroundq.next == 0 &&
3633             mem->vmp_backgroundq.prev == 0 &&
3634             mem->vmp_on_backgroundq == FALSE);
3635 #endif
3636         if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
3637             vm_lopage_free_count < vm_lopage_free_limit &&
3638             VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3639                 /*
3640                  * this exists to support hardware controllers
3641                  * incapable of generating DMAs with more than 32 bits
3642                  * of address on platforms with physical memory > 4G...
3643                  */
3644                 vm_page_queue_enter_first(&vm_lopage_queue_free, mem, vmp_pageq);
3645                 vm_lopage_free_count++;
3646
3647                 if (vm_lopage_free_count >= vm_lopage_free_limit) {
3648                         vm_lopage_refill = FALSE;
3649                 }
3650
3651                 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3652                 mem->vmp_lopage = TRUE;
3653 #if CONFIG_SECLUDED_MEMORY
3654         } else if (vm_page_free_count > vm_page_free_reserved &&
3655             vm_page_secluded_count < vm_page_secluded_target &&
3656             num_tasks_can_use_secluded_mem == 0) {
3657                 /*
3658                  * XXX FBDP TODO: also avoid refilling secluded queue
3659                  * when some IOKit objects are already grabbing from it...
3660                  */
3661                 if (!page_queues_locked) {
3662                         if (!vm_page_trylock_queues()) {
3663                                 /* take locks in right order */
3664                                 lck_mtx_unlock(&vm_page_queue_free_lock);
3665                                 vm_page_lock_queues();
3666                                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3667                         }
3668                 }
3669                 mem->vmp_lopage = FALSE;
3670                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3671                 vm_page_queue_enter_first(&vm_page_queue_secluded, mem, vmp_pageq);
3672                 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
3673                 vm_page_secluded_count++;
3674                 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3675                 vm_page_secluded_count_free++;
3676                 if (!page_queues_locked) {
3677                         vm_page_unlock_queues();
3678                 }
3679                 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3680                 if (vm_page_free_wanted_secluded > 0) {
3681                         vm_page_free_wanted_secluded--;
3682                         need_secluded_wakeup = 1;
3683                 }
3684 #endif /* CONFIG_SECLUDED_MEMORY */
3685         } else {
3686                 mem->vmp_lopage = FALSE;
3687                 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3688
3689                 color = VM_PAGE_GET_COLOR(mem);
3690 #if defined(__x86_64__)
3691                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
3692 #else
3693                 vm_page_queue_enter(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
3694 #endif
3695                 vm_page_free_count++;
3696                 /*
3697                  *      Check if we should wake up someone waiting for page.
3698                  *      But don't bother waking them unless they can allocate.
3699                  *
3700                  *      We wakeup only one thread, to prevent starvation.
3701                  *      Because the scheduling system handles wait queues FIFO,
3702                  *      if we wakeup all waiting threads, one greedy thread
3703                  *      can starve multiple niceguy threads.  When the threads
3704                  *      all wakeup, the greedy threads runs first, grabs the page,
3705                  *      and waits for another page.  It will be the first to run
3706                  *      when the next page is freed.
3707                  *
3708                  *      However, there is a slight danger here.
3709                  *      The thread we wake might not use the free page.
3710                  *      Then the other threads could wait indefinitely
3711                  *      while the page goes unused.  To forestall this,
3712                  *      the pageout daemon will keep making free pages
3713                  *      as long as vm_page_free_wanted is non-zero.
3714                  */
3715
3716                 assert(vm_page_free_count > 0);
3717                 if (vm_page_free_wanted_privileged > 0) {
3718                         vm_page_free_wanted_privileged--;
3719                         need_priv_wakeup = 1;
3720 #if CONFIG_SECLUDED_MEMORY
3721                 } else if (vm_page_free_wanted_secluded > 0 &&
3722                     vm_page_free_count > vm_page_free_reserved) {
3723                         vm_page_free_wanted_secluded--;
3724                         need_secluded_wakeup = 1;
3725 #endif /* CONFIG_SECLUDED_MEMORY */
3726                 } else if (vm_page_free_wanted > 0 &&
3727                     vm_page_free_count > vm_page_free_reserved) {
3728                         vm_page_free_wanted--;
3729                         need_wakeup = 1;
3730                 }
3731         }
3732         vm_pageout_vminfo.vm_page_pages_freed++;
3733
3734         VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, 1, 0, 0, 0);
3735
3736         lck_mtx_unlock(&vm_page_queue_free_lock);
3737
3738         if (need_priv_wakeup) {
3739                 wakeup_event = &vm_page_free_wanted_privileged;
3740         }
3741 #if CONFIG_SECLUDED_MEMORY
3742         else if (need_secluded_wakeup) {
3743                 wakeup_event = &vm_page_free_wanted_secluded;
3744         }
3745 #endif /* CONFIG_SECLUDED_MEMORY */
3746         else if (need_wakeup) {
3747                 wakeup_event = &vm_page_free_count;
3748         }
3749
3750         if (wakeup_event) {
3751                 if (vps_dynamic_priority_enabled == TRUE) {
3752                         thread_t thread_woken = NULL;
3753                         wakeup_one_with_inheritor((event_t) wakeup_event, THREAD_AWAKENED, LCK_WAKE_DO_NOT_TRANSFER_PUSH, &thread_woken);
3754                         thread_deallocate(thread_woken);
3755                 } else {
3756                         thread_wakeup_one((event_t) wakeup_event);
3757                 }
3758         }
3759
3760         VM_CHECK_MEMORYSTATUS;
3761 }
3762
3763 /*
3764  * This version of vm_page_release() is used only at startup
3765  * when we are single-threaded and pages are being released
3766  * for the first time. Hence, no locking or unnecessary checks are made.
3767  * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3768  */
3769 void
3770 vm_page_release_startup(
3771         vm_page_t       mem)
3772 {
3773         vm_page_queue_t queue_free;
3774
3775         if (vm_lopage_free_count < vm_lopage_free_limit &&
3776             VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3777                 mem->vmp_lopage = TRUE;
3778                 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3779                 vm_lopage_free_count++;
3780                 queue_free = &vm_lopage_queue_free;
3781 #if CONFIG_SECLUDED_MEMORY
3782         } else if (vm_page_secluded_count < vm_page_secluded_target) {
3783                 mem->vmp_lopage = FALSE;
3784                 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
3785                 vm_page_secluded_count++;
3786                 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3787                 vm_page_secluded_count_free++;
3788                 queue_free = &vm_page_queue_secluded;
3789 #endif /* CONFIG_SECLUDED_MEMORY */
3790         } else {
3791                 mem->vmp_lopage = FALSE;
3792                 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3793                 vm_page_free_count++;
3794                 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
3795         }
3796         if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
3797 #if defined(__x86_64__)
3798                 vm_page_queue_enter_clump(queue_free, mem);
3799 #else
3800                 vm_page_queue_enter(queue_free, mem, vmp_pageq);
3801 #endif
3802         } else {
3803                 vm_page_queue_enter_first(queue_free, mem, vmp_pageq);
3804         }
3805 }
3806
3807 /*
3808  *      vm_page_wait:
3809  *
3810  *      Wait for a page to become available.
3811  *      If there are plenty of free pages, then we don't sleep.
3812  *
3813  *      Returns:
3814  *              TRUE:  There may be another page, try again
3815  *              FALSE: We were interrupted out of our wait, don't try again
3816  */
3817
3818 boolean_t
3819 vm_page_wait(
3820         int     interruptible )
3821 {
3822         /*
3823          *      We can't use vm_page_free_reserved to make this
3824          *      determination.  Consider: some thread might
3825          *      need to allocate two pages.  The first allocation
3826          *      succeeds, the second fails.  After the first page is freed,
3827          *      a call to vm_page_wait must really block.
3828          */
3829         kern_return_t   wait_result;
3830         int             need_wakeup = 0;
3831         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3832         event_t         wait_event = NULL;
3833
3834         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3835
3836         if (is_privileged && vm_page_free_count) {
3837                 lck_mtx_unlock(&vm_page_queue_free_lock);
3838                 return TRUE;
3839         }
3840
3841         if (vm_page_free_count >= vm_page_free_target) {
3842                 lck_mtx_unlock(&vm_page_queue_free_lock);
3843                 return TRUE;
3844         }
3845
3846         if (is_privileged) {
3847                 if (vm_page_free_wanted_privileged++ == 0) {
3848                         need_wakeup = 1;
3849                 }
3850                 wait_event = (event_t)&vm_page_free_wanted_privileged;
3851 #if CONFIG_SECLUDED_MEMORY
3852         } else if (secluded_for_apps &&
3853             task_can_use_secluded_mem(current_task(), FALSE)) {
3854 #if 00
3855                 /* XXX FBDP: need pageq lock for this... */
3856                 /* XXX FBDP: might wait even if pages available, */
3857                 /* XXX FBDP: hopefully not for too long... */
3858                 if (vm_page_secluded_count > 0) {
3859                         lck_mtx_unlock(&vm_page_queue_free_lock);
3860                         return TRUE;
3861                 }
3862 #endif
3863                 if (vm_page_free_wanted_secluded++ == 0) {
3864                         need_wakeup = 1;
3865                 }
3866                 wait_event = (event_t)&vm_page_free_wanted_secluded;
3867 #endif /* CONFIG_SECLUDED_MEMORY */
3868         } else {
3869                 if (vm_page_free_wanted++ == 0) {
3870                         need_wakeup = 1;
3871                 }
3872                 wait_event = (event_t)&vm_page_free_count;
3873         }
3874
3875         /*
3876          * We don't do a vm_pageout_scan wakeup if we already have
3877          * some waiters because vm_pageout_scan checks for waiters
3878          * before it returns and does so behind the vm_page_queue_free_lock,
3879          * which we own when we bump the waiter counts.
3880          */
3881
3882         if (vps_dynamic_priority_enabled == TRUE) {
3883                 /*
3884                  * We are waking up vm_pageout_scan here. If it needs
3885                  * the vm_page_queue_free_lock before we unlock it
3886                  * we'll end up just blocking and incur an extra
3887                  * context switch. Could be a perf. issue.
3888                  */
3889
3890                 counter(c_vm_page_wait_block++);
3891
3892                 if (need_wakeup) {
3893                         thread_wakeup((event_t)&vm_page_free_wanted);
3894                 }
3895
3896                 /*
3897                  * LD: This event is going to get recorded every time because
3898                  * we don't get back THREAD_WAITING from lck_mtx_sleep_with_inheritor.
3899                  * We just block in that routine.
3900                  */
3901                 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3902                     vm_page_free_wanted_privileged,
3903                     vm_page_free_wanted,
3904 #if CONFIG_SECLUDED_MEMORY
3905                     vm_page_free_wanted_secluded,
3906 #else /* CONFIG_SECLUDED_MEMORY */
3907                     0,
3908 #endif /* CONFIG_SECLUDED_MEMORY */
3909                     0);
3910                 wait_result =  lck_mtx_sleep_with_inheritor(&vm_page_queue_free_lock,
3911                     LCK_SLEEP_UNLOCK,
3912                     wait_event,
3913                     vm_pageout_scan_thread,
3914                     interruptible,
3915                     0);
3916         } else {
3917                 wait_result = assert_wait(wait_event, interruptible);
3918
3919                 lck_mtx_unlock(&vm_page_queue_free_lock);
3920                 counter(c_vm_page_wait_block++);
3921
3922                 if (need_wakeup) {
3923                         thread_wakeup((event_t)&vm_page_free_wanted);
3924                 }
3925
3926                 if (wait_result == THREAD_WAITING) {
3927                         VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3928                             vm_page_free_wanted_privileged,
3929                             vm_page_free_wanted,
3930 #if CONFIG_SECLUDED_MEMORY
3931                             vm_page_free_wanted_secluded,
3932 #else /* CONFIG_SECLUDED_MEMORY */
3933                             0,
3934 #endif /* CONFIG_SECLUDED_MEMORY */
3935                             0);
3936                         wait_result = thread_block(THREAD_CONTINUE_NULL);
3937                         VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block,
3938                             VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3939                 }
3940         }
3941
3942         return (wait_result == THREAD_AWAKENED) || (wait_result == THREAD_NOT_WAITING);
3943 }
3944
3945 /*
3946  *      vm_page_alloc:
3947  *
3948  *      Allocate and return a memory cell associated
3949  *      with this VM object/offset pair.
3950  *
3951  *      Object must be locked.
3952  */
3953
3954 vm_page_t
3955 vm_page_alloc(
3956         vm_object_t             object,
3957         vm_object_offset_t      offset)
3958 {
3959         vm_page_t       mem;
3960         int             grab_options;
3961
3962         vm_object_lock_assert_exclusive(object);
3963         grab_options = 0;
3964 #if CONFIG_SECLUDED_MEMORY
3965         if (object->can_grab_secluded) {
3966                 grab_options |= VM_PAGE_GRAB_SECLUDED;
3967         }
3968 #endif /* CONFIG_SECLUDED_MEMORY */
3969         mem = vm_page_grab_options(grab_options);
3970         if (mem == VM_PAGE_NULL) {
3971                 return VM_PAGE_NULL;
3972         }
3973
3974         vm_page_insert(mem, object, offset);
3975
3976         return mem;
3977 }
3978
3979 /*
3980  *      vm_page_alloc_guard:
3981  *
3982  *      Allocate a fictitious page which will be used
3983  *      as a guard page.  The page will be inserted into
3984  *      the object and returned to the caller.
3985  */
3986
3987 vm_page_t
3988 vm_page_alloc_guard(
3989         vm_object_t             object,
3990         vm_object_offset_t      offset)
3991 {
3992         vm_page_t       mem;
3993
3994         vm_object_lock_assert_exclusive(object);
3995         mem = vm_page_grab_guard();
3996         if (mem == VM_PAGE_NULL) {
3997                 return VM_PAGE_NULL;
3998         }
3999
4000         vm_page_insert(mem, object, offset);
4001
4002         return mem;
4003 }
4004
4005
4006 counter(unsigned int c_laundry_pages_freed = 0; )
4007
4008 /*
4009  *      vm_page_free_prepare:
4010  *
4011  *      Removes page from any queue it may be on
4012  *      and disassociates it from its VM object.
4013  *
4014  *      Object and page queues must be locked prior to entry.
4015  */
4016 static void
4017 vm_page_free_prepare(
4018         vm_page_t       mem)
4019 {
4020         vm_page_free_prepare_queues(mem);
4021         vm_page_free_prepare_object(mem, TRUE);
4022 }
4023
4024
4025 void
4026 vm_page_free_prepare_queues(
4027         vm_page_t       mem)
4028 {
4029         vm_object_t     m_object;
4030
4031         VM_PAGE_CHECK(mem);
4032
4033         assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
4034         assert(!mem->vmp_cleaning);
4035         m_object = VM_PAGE_OBJECT(mem);
4036
4037         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4038         if (m_object) {
4039                 vm_object_lock_assert_exclusive(m_object);
4040         }
4041         if (mem->vmp_laundry) {
4042                 /*
4043                  * We may have to free a page while it's being laundered
4044                  * if we lost its pager (due to a forced unmount, for example).
4045                  * We need to call vm_pageout_steal_laundry() before removing
4046                  * the page from its VM object, so that we can remove it
4047                  * from its pageout queue and adjust the laundry accounting
4048                  */
4049                 vm_pageout_steal_laundry(mem, TRUE);
4050                 counter(++c_laundry_pages_freed);
4051         }
4052
4053         vm_page_queues_remove(mem, TRUE);
4054
4055         if (VM_PAGE_WIRED(mem)) {
4056                 assert(mem->vmp_wire_count > 0);
4057
4058                 if (m_object) {
4059                         VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4060                         VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4061                         VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
4062
4063                         assert(m_object->resident_page_count >=
4064                             m_object->wired_page_count);
4065
4066                         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4067                                 OSAddAtomic(+1, &vm_page_purgeable_count);
4068                                 assert(vm_page_purgeable_wired_count > 0);
4069                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4070                         }
4071                         if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4072                             m_object->purgable == VM_PURGABLE_EMPTY) &&
4073                             m_object->vo_owner != TASK_NULL) {
4074                                 task_t          owner;
4075                                 int             ledger_idx_volatile;
4076                                 int             ledger_idx_nonvolatile;
4077                                 int             ledger_idx_volatile_compressed;
4078                                 int             ledger_idx_nonvolatile_compressed;
4079                                 boolean_t       do_footprint;
4080
4081                                 owner = VM_OBJECT_OWNER(m_object);
4082                                 vm_object_ledger_tag_ledgers(
4083                                         m_object,
4084                                         &ledger_idx_volatile,
4085                                         &ledger_idx_nonvolatile,
4086                                         &ledger_idx_volatile_compressed,
4087                                         &ledger_idx_nonvolatile_compressed,
4088                                         &do_footprint);
4089                                 /*
4090                                  * While wired, this page was accounted
4091                                  * as "non-volatile" but it should now
4092                                  * be accounted as "volatile".
4093                                  */
4094                                 /* one less "non-volatile"... */
4095                                 ledger_debit(owner->ledger,
4096                                     ledger_idx_nonvolatile,
4097                                     PAGE_SIZE);
4098                                 if (do_footprint) {
4099                                         /* ... and "phys_footprint" */
4100                                         ledger_debit(owner->ledger,
4101                                             task_ledgers.phys_footprint,
4102                                             PAGE_SIZE);
4103                                 }
4104                                 /* one more "volatile" */
4105                                 ledger_credit(owner->ledger,
4106                                     ledger_idx_volatile,
4107                                     PAGE_SIZE);
4108                         }
4109                 }
4110                 if (!mem->vmp_private && !mem->vmp_fictitious) {
4111                         vm_page_wire_count--;
4112                 }
4113
4114                 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
4115                 mem->vmp_wire_count = 0;
4116                 assert(!mem->vmp_gobbled);
4117         } else if (mem->vmp_gobbled) {
4118                 if (!mem->vmp_private && !mem->vmp_fictitious) {
4119                         vm_page_wire_count--;
4120                 }
4121                 vm_page_gobble_count--;
4122         }
4123 }
4124
4125
4126 void
4127 vm_page_free_prepare_object(
4128         vm_page_t       mem,
4129         boolean_t       remove_from_hash)
4130 {
4131         if (mem->vmp_tabled) {
4132                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
4133         }
4134         PAGE_WAKEUP(mem);               /* clears wanted */
4135
4136         if (mem->vmp_private) {
4137                 mem->vmp_private = FALSE;
4138                 mem->vmp_fictitious = TRUE;
4139                 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
4140         }
4141         if (!mem->vmp_fictitious) {
4142                 assert(mem->vmp_pageq.next == 0);
4143                 assert(mem->vmp_pageq.prev == 0);
4144                 assert(mem->vmp_listq.next == 0);
4145                 assert(mem->vmp_listq.prev == 0);
4146 #if CONFIG_BACKGROUND_QUEUE
4147                 assert(mem->vmp_backgroundq.next == 0);
4148                 assert(mem->vmp_backgroundq.prev == 0);
4149 #endif /* CONFIG_BACKGROUND_QUEUE */
4150                 assert(mem->vmp_next_m == 0);
4151                 ASSERT_PMAP_FREE(mem);
4152                 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->vmp_lopage);
4153         }
4154 }
4155
4156
4157 /*
4158  *      vm_page_free:
4159  *
4160  *      Returns the given page to the free list,
4161  *      disassociating it with any VM object.
4162  *
4163  *      Object and page queues must be locked prior to entry.
4164  */
4165 void
4166 vm_page_free(
4167         vm_page_t       mem)
4168 {
4169         vm_page_free_prepare(mem);
4170
4171         if (mem->vmp_fictitious) {
4172                 vm_page_release_fictitious(mem);
4173         } else {
4174                 vm_page_release(mem,
4175                     TRUE);             /* page queues are locked */
4176         }
4177 }
4178
4179
4180 void
4181 vm_page_free_unlocked(
4182         vm_page_t       mem,
4183         boolean_t       remove_from_hash)
4184 {
4185         vm_page_lockspin_queues();
4186         vm_page_free_prepare_queues(mem);
4187         vm_page_unlock_queues();
4188
4189         vm_page_free_prepare_object(mem, remove_from_hash);
4190
4191         if (mem->vmp_fictitious) {
4192                 vm_page_release_fictitious(mem);
4193         } else {
4194                 vm_page_release(mem, FALSE); /* page queues are not locked */
4195         }
4196 }
4197
4198
4199 /*
4200  * Free a list of pages.  The list can be up to several hundred pages,
4201  * as blocked up by vm_pageout_scan().
4202  * The big win is not having to take the free list lock once
4203  * per page.
4204  *
4205  * The VM page queues lock (vm_page_queue_lock) should NOT be held.
4206  * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
4207  */
4208 void
4209 vm_page_free_list(
4210         vm_page_t       freeq,
4211         boolean_t       prepare_object)
4212 {
4213         vm_page_t       mem;
4214         vm_page_t       nxt;
4215         vm_page_t       local_freeq;
4216         int             pg_count;
4217
4218         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
4219         LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
4220
4221         while (freeq) {
4222                 pg_count = 0;
4223                 local_freeq = VM_PAGE_NULL;
4224                 mem = freeq;
4225
4226                 /*
4227                  * break up the processing into smaller chunks so
4228                  * that we can 'pipeline' the pages onto the
4229                  * free list w/o introducing too much
4230                  * contention on the global free queue lock
4231                  */
4232                 while (mem && pg_count < 64) {
4233                         assert((mem->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
4234                             (mem->vmp_q_state == VM_PAGE_IS_WIRED));
4235 #if CONFIG_BACKGROUND_QUEUE
4236                         assert(mem->vmp_backgroundq.next == 0 &&
4237                             mem->vmp_backgroundq.prev == 0 &&
4238                             mem->vmp_on_backgroundq == FALSE);
4239 #endif
4240                         nxt = mem->vmp_snext;
4241                         mem->vmp_snext = NULL;
4242                         assert(mem->vmp_pageq.prev == 0);
4243
4244                         if (vm_page_free_verify && !mem->vmp_fictitious && !mem->vmp_private) {
4245                                 ASSERT_PMAP_FREE(mem);
4246                         }
4247                         if (prepare_object == TRUE) {
4248                                 vm_page_free_prepare_object(mem, TRUE);
4249                         }
4250
4251                         if (!mem->vmp_fictitious) {
4252                                 assert(mem->vmp_busy);
4253
4254                                 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
4255                                     vm_lopage_free_count < vm_lopage_free_limit &&
4256                                     VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
4257                                         vm_page_release(mem, FALSE); /* page queues are not locked */
4258 #if CONFIG_SECLUDED_MEMORY
4259                                 } else if (vm_page_secluded_count < vm_page_secluded_target &&
4260                                     num_tasks_can_use_secluded_mem == 0) {
4261                                         vm_page_release(mem,
4262                                             FALSE);             /* page queues are not locked */
4263 #endif /* CONFIG_SECLUDED_MEMORY */
4264                                 } else {
4265                                         /*
4266                                          * IMPORTANT: we can't set the page "free" here
4267                                          * because that would make the page eligible for
4268                                          * a physically-contiguous allocation (see
4269                                          * vm_page_find_contiguous()) right away (we don't
4270                                          * hold the vm_page_queue_free lock).  That would
4271                                          * cause trouble because the page is not actually
4272                                          * in the free queue yet...
4273                                          */
4274                                         mem->vmp_snext = local_freeq;
4275                                         local_freeq = mem;
4276                                         pg_count++;
4277
4278                                         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
4279                                 }
4280                         } else {
4281                                 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
4282                                     VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
4283                                 vm_page_release_fictitious(mem);
4284                         }
4285                         mem = nxt;
4286                 }
4287                 freeq = mem;
4288
4289                 if ((mem = local_freeq)) {
4290                         unsigned int    avail_free_count;
4291                         unsigned int    need_wakeup = 0;
4292                         unsigned int    need_priv_wakeup = 0;
4293 #if CONFIG_SECLUDED_MEMORY
4294                         unsigned int    need_wakeup_secluded = 0;
4295 #endif /* CONFIG_SECLUDED_MEMORY */
4296                         event_t         priv_wakeup_event, secluded_wakeup_event, normal_wakeup_event;
4297                         boolean_t       priv_wakeup_all, secluded_wakeup_all, normal_wakeup_all;
4298
4299                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
4300
4301                         while (mem) {
4302                                 int     color;
4303
4304                                 nxt = mem->vmp_snext;
4305
4306                                 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
4307                                 assert(mem->vmp_busy);
4308                                 mem->vmp_lopage = FALSE;
4309                                 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
4310
4311                                 color = VM_PAGE_GET_COLOR(mem);
4312 #if defined(__x86_64__)
4313                                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
4314 #else
4315                                 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
4316                                     mem, vmp_pageq);
4317 #endif
4318                                 mem = nxt;
4319                         }
4320                         vm_pageout_vminfo.vm_page_pages_freed += pg_count;
4321                         vm_page_free_count += pg_count;
4322                         avail_free_count = vm_page_free_count;
4323
4324                         VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, pg_count, 0, 0, 0);
4325
4326                         if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
4327                                 if (avail_free_count < vm_page_free_wanted_privileged) {
4328                                         need_priv_wakeup = avail_free_count;
4329                                         vm_page_free_wanted_privileged -= avail_free_count;
4330                                         avail_free_count = 0;
4331                                 } else {
4332                                         need_priv_wakeup = vm_page_free_wanted_privileged;
4333                                         avail_free_count -= vm_page_free_wanted_privileged;
4334                                         vm_page_free_wanted_privileged = 0;
4335                                 }
4336                         }
4337 #if CONFIG_SECLUDED_MEMORY
4338                         if (vm_page_free_wanted_secluded > 0 &&
4339                             avail_free_count > vm_page_free_reserved) {
4340                                 unsigned int available_pages;
4341                                 available_pages = (avail_free_count -
4342                                     vm_page_free_reserved);
4343                                 if (available_pages <
4344                                     vm_page_free_wanted_secluded) {
4345                                         need_wakeup_secluded = available_pages;
4346                                         vm_page_free_wanted_secluded -=
4347                                             available_pages;
4348                                         avail_free_count -= available_pages;
4349                                 } else {
4350                                         need_wakeup_secluded =
4351                                             vm_page_free_wanted_secluded;
4352                                         avail_free_count -=
4353                                             vm_page_free_wanted_secluded;
4354                                         vm_page_free_wanted_secluded = 0;
4355                                 }
4356                         }
4357 #endif /* CONFIG_SECLUDED_MEMORY */
4358                         if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
4359                                 unsigned int  available_pages;
4360
4361                                 available_pages = avail_free_count - vm_page_free_reserved;
4362
4363                                 if (available_pages >= vm_page_free_wanted) {
4364                                         need_wakeup = vm_page_free_wanted;
4365                                         vm_page_free_wanted = 0;
4366                                 } else {
4367                                         need_wakeup = available_pages;
4368                                         vm_page_free_wanted -= available_pages;
4369                                 }
4370                         }
4371                         lck_mtx_unlock(&vm_page_queue_free_lock);
4372
4373                         priv_wakeup_event = NULL;
4374                         secluded_wakeup_event = NULL;
4375                         normal_wakeup_event = NULL;
4376
4377                         priv_wakeup_all = FALSE;
4378                         secluded_wakeup_all = FALSE;
4379                         normal_wakeup_all = FALSE;
4380
4381
4382                         if (need_priv_wakeup != 0) {
4383                                 /*
4384                                  * There shouldn't be that many VM-privileged threads,
4385                                  * so let's wake them all up, even if we don't quite
4386                                  * have enough pages to satisfy them all.
4387                                  */
4388                                 priv_wakeup_event = (event_t)&vm_page_free_wanted_privileged;
4389                                 priv_wakeup_all = TRUE;
4390                         }
4391 #if CONFIG_SECLUDED_MEMORY
4392                         if (need_wakeup_secluded != 0 &&
4393                             vm_page_free_wanted_secluded == 0) {
4394                                 secluded_wakeup_event = (event_t)&vm_page_free_wanted_secluded;
4395                                 secluded_wakeup_all = TRUE;
4396                                 need_wakeup_secluded = 0;
4397                         } else {
4398                                 secluded_wakeup_event = (event_t)&vm_page_free_wanted_secluded;
4399                         }
4400 #endif /* CONFIG_SECLUDED_MEMORY */
4401                         if (need_wakeup != 0 && vm_page_free_wanted == 0) {
4402                                 /*
4403                                  * We don't expect to have any more waiters
4404                                  * after this, so let's wake them all up at
4405                                  * once.
4406                                  */
4407                                 normal_wakeup_event = (event_t) &vm_page_free_count;
4408                                 normal_wakeup_all = TRUE;
4409                                 need_wakeup = 0;
4410                         } else {
4411                                 normal_wakeup_event = (event_t) &vm_page_free_count;
4412                         }
4413
4414                         if (priv_wakeup_event ||
4415 #if CONFIG_SECLUDED_MEMORY
4416                             secluded_wakeup_event ||
4417 #endif /* CONFIG_SECLUDED_MEMORY */
4418                             normal_wakeup_event) {
4419                                 if (vps_dynamic_priority_enabled == TRUE) {
4420                                         thread_t thread_woken = NULL;
4421
4422                                         if (priv_wakeup_all == TRUE) {
4423                                                 wakeup_all_with_inheritor(priv_wakeup_event, THREAD_AWAKENED);
4424                                         }
4425
4426 #if CONFIG_SECLUDED_MEMORY
4427                                         if (secluded_wakeup_all == TRUE) {
4428                                                 wakeup_all_with_inheritor(secluded_wakeup_event, THREAD_AWAKENED);
4429                                         }
4430
4431                                         while (need_wakeup_secluded-- != 0) {
4432                                                 /*
4433                                                  * Wake up one waiter per page we just released.
4434                                                  */
4435                                                 wakeup_one_with_inheritor(secluded_wakeup_event, THREAD_AWAKENED, LCK_WAKE_DO_NOT_TRANSFER_PUSH, &thread_woken);
4436                                                 thread_deallocate(thread_woken);
4437                                         }
4438 #endif /* CONFIG_SECLUDED_MEMORY */
4439
4440                                         if (normal_wakeup_all == TRUE) {
4441                                                 wakeup_all_with_inheritor(normal_wakeup_event, THREAD_AWAKENED);
4442                                         }
4443
4444                                         while (need_wakeup-- != 0) {
4445                                                 /*
4446                                                  * Wake up one waiter per page we just released.
4447                                                  */
4448                                                 wakeup_one_with_inheritor(normal_wakeup_event, THREAD_AWAKENED, LCK_WAKE_DO_NOT_TRANSFER_PUSH, &thread_woken);
4449                                                 thread_deallocate(thread_woken);
4450                                         }
4451                                 } else {
4452                                         /*
4453                                          * Non-priority-aware wakeups.
4454                                          */
4455
4456                                         if (priv_wakeup_all == TRUE) {
4457                                                 thread_wakeup(priv_wakeup_event);
4458                                         }
4459
4460 #if CONFIG_SECLUDED_MEMORY
4461                                         if (secluded_wakeup_all == TRUE) {
4462                                                 thread_wakeup(secluded_wakeup_event);
4463                                         }
4464
4465                                         while (need_wakeup_secluded-- != 0) {
4466                                                 /*
4467                                                  * Wake up one waiter per page we just released.
4468                                                  */
4469                                                 thread_wakeup_one(secluded_wakeup_event);
4470                                         }
4471
4472 #endif /* CONFIG_SECLUDED_MEMORY */
4473                                         if (normal_wakeup_all == TRUE) {
4474                                                 thread_wakeup(normal_wakeup_event);
4475                                         }
4476
4477                                         while (need_wakeup-- != 0) {
4478                                                 /*
4479                                                  * Wake up one waiter per page we just released.
4480                                                  */
4481                                                 thread_wakeup_one(normal_wakeup_event);
4482                                         }
4483                                 }
4484                         }
4485
4486                         VM_CHECK_MEMORYSTATUS;
4487                 }
4488         }
4489 }
4490
4491
4492 /*
4493  *      vm_page_wire:
4494  *
4495  *      Mark this page as wired down by yet
4496  *      another map, removing it from paging queues
4497  *      as necessary.
4498  *
4499  *      The page's object and the page queues must be locked.
4500  */
4501
4502
4503 void
4504 vm_page_wire(
4505         vm_page_t mem,
4506         vm_tag_t           tag,
4507         boolean_t          check_memorystatus)
4508 {
4509         vm_object_t     m_object;
4510
4511         m_object = VM_PAGE_OBJECT(mem);
4512
4513 //      dbgLog(current_thread(), mem->vmp_offset, m_object, 1); /* (TEST/DEBUG) */
4514
4515         VM_PAGE_CHECK(mem);
4516         if (m_object) {
4517                 vm_object_lock_assert_exclusive(m_object);
4518         } else {
4519                 /*
4520                  * In theory, the page should be in an object before it
4521                  * gets wired, since we need to hold the object lock
4522                  * to update some fields in the page structure.
4523                  * However, some code (i386 pmap, for example) might want
4524                  * to wire a page before it gets inserted into an object.
4525                  * That's somewhat OK, as long as nobody else can get to
4526                  * that page and update it at the same time.
4527                  */
4528         }
4529         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4530         if (!VM_PAGE_WIRED(mem)) {
4531                 if (mem->vmp_laundry) {
4532                         vm_pageout_steal_laundry(mem, TRUE);
4533                 }
4534
4535                 vm_page_queues_remove(mem, TRUE);
4536
4537                 assert(mem->vmp_wire_count == 0);
4538                 mem->vmp_q_state = VM_PAGE_IS_WIRED;
4539
4540                 if (m_object) {
4541                         VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4542                         VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
4543                         VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
4544
4545                         assert(m_object->resident_page_count >=
4546                             m_object->wired_page_count);
4547                         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4548                                 assert(vm_page_purgeable_count > 0);
4549                                 OSAddAtomic(-1, &vm_page_purgeable_count);
4550                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
4551                         }
4552                         if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4553                             m_object->purgable == VM_PURGABLE_EMPTY) &&
4554                             m_object->vo_owner != TASK_NULL) {
4555                                 task_t          owner;
4556                                 int             ledger_idx_volatile;
4557                                 int             ledger_idx_nonvolatile;
4558                                 int             ledger_idx_volatile_compressed;
4559                                 int             ledger_idx_nonvolatile_compressed;
4560                                 boolean_t       do_footprint;
4561
4562                                 owner = VM_OBJECT_OWNER(m_object);
4563                                 vm_object_ledger_tag_ledgers(
4564                                         m_object,
4565                                         &ledger_idx_volatile,
4566                                         &ledger_idx_nonvolatile,
4567                                         &ledger_idx_volatile_compressed,
4568                                         &ledger_idx_nonvolatile_compressed,
4569                                         &do_footprint);
4570                                 /* less volatile bytes */
4571                                 ledger_debit(owner->ledger,
4572                                     ledger_idx_volatile,
4573                                     PAGE_SIZE);
4574                                 /* more not-quite-volatile bytes */
4575                                 ledger_credit(owner->ledger,
4576                                     ledger_idx_nonvolatile,
4577                                     PAGE_SIZE);
4578                                 if (do_footprint) {
4579                                         /* more footprint */
4580                                         ledger_credit(owner->ledger,
4581                                             task_ledgers.phys_footprint,
4582                                             PAGE_SIZE);
4583                                 }
4584                         }
4585                         if (m_object->all_reusable) {
4586                                 /*
4587                                  * Wired pages are not counted as "re-usable"
4588                                  * in "all_reusable" VM objects, so nothing
4589                                  * to do here.
4590                                  */
4591                         } else if (mem->vmp_reusable) {
4592                                 /*
4593                                  * This page is not "re-usable" when it's
4594                                  * wired, so adjust its state and the
4595                                  * accounting.
4596                                  */
4597                                 vm_object_reuse_pages(m_object,
4598                                     mem->vmp_offset,
4599                                     mem->vmp_offset + PAGE_SIZE_64,
4600                                     FALSE);
4601                         }
4602                 }
4603                 assert(!mem->vmp_reusable);
4604
4605                 if (!mem->vmp_private && !mem->vmp_fictitious && !mem->vmp_gobbled) {
4606                         vm_page_wire_count++;
4607                 }
4608                 if (mem->vmp_gobbled) {
4609                         vm_page_gobble_count--;
4610                 }
4611                 mem->vmp_gobbled = FALSE;
4612
4613                 if (check_memorystatus == TRUE) {
4614                         VM_CHECK_MEMORYSTATUS;
4615                 }
4616         }
4617         assert(!mem->vmp_gobbled);
4618         assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
4619         mem->vmp_wire_count++;
4620         if (__improbable(mem->vmp_wire_count == 0)) {
4621                 panic("vm_page_wire(%p): wire_count overflow", mem);
4622         }
4623         VM_PAGE_CHECK(mem);
4624 }
4625
4626 /*
4627  *      vm_page_unwire:
4628  *
4629  *      Release one wiring of this page, potentially
4630  *      enabling it to be paged again.
4631  *
4632  *      The page's object and the page queues must be locked.
4633  */
4634 void
4635 vm_page_unwire(
4636         vm_page_t       mem,
4637         boolean_t       queueit)
4638 {
4639         vm_object_t     m_object;
4640
4641         m_object = VM_PAGE_OBJECT(mem);
4642
4643 //      dbgLog(current_thread(), mem->vmp_offset, m_object, 0); /* (TEST/DEBUG) */
4644
4645         VM_PAGE_CHECK(mem);
4646         assert(VM_PAGE_WIRED(mem));
4647         assert(mem->vmp_wire_count > 0);
4648         assert(!mem->vmp_gobbled);
4649         assert(m_object != VM_OBJECT_NULL);
4650         vm_object_lock_assert_exclusive(m_object);
4651         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4652         if (--mem->vmp_wire_count == 0) {
4653                 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
4654
4655                 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4656                 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4657                 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
4658                 if (!mem->vmp_private && !mem->vmp_fictitious) {
4659                         vm_page_wire_count--;
4660                 }
4661
4662                 assert(m_object->resident_page_count >=
4663                     m_object->wired_page_count);
4664                 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4665                         OSAddAtomic(+1, &vm_page_purgeable_count);
4666                         assert(vm_page_purgeable_wired_count > 0);
4667                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4668                 }
4669                 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4670                     m_object->purgable == VM_PURGABLE_EMPTY) &&
4671                     m_object->vo_owner != TASK_NULL) {
4672                         task_t          owner;
4673                         int             ledger_idx_volatile;
4674                         int             ledger_idx_nonvolatile;
4675                         int             ledger_idx_volatile_compressed;
4676                         int             ledger_idx_nonvolatile_compressed;
4677                         boolean_t       do_footprint;
4678
4679                         owner = VM_OBJECT_OWNER(m_object);
4680                         vm_object_ledger_tag_ledgers(
4681                                 m_object,
4682                                 &ledger_idx_volatile,
4683                                 &ledger_idx_nonvolatile,
4684                                 &ledger_idx_volatile_compressed,
4685                                 &ledger_idx_nonvolatile_compressed,
4686                                 &do_footprint);
4687                         /* more volatile bytes */
4688                         ledger_credit(owner->ledger,
4689                             ledger_idx_volatile,
4690                             PAGE_SIZE);
4691                         /* less not-quite-volatile bytes */
4692                         ledger_debit(owner->ledger,
4693                             ledger_idx_nonvolatile,
4694                             PAGE_SIZE);
4695                         if (do_footprint) {
4696                                 /* less footprint */
4697                                 ledger_debit(owner->ledger,
4698                                     task_ledgers.phys_footprint,
4699                                     PAGE_SIZE);
4700                         }
4701                 }
4702                 assert(m_object != kernel_object);
4703                 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
4704
4705                 if (queueit == TRUE) {
4706                         if (m_object->purgable == VM_PURGABLE_EMPTY) {
4707                                 vm_page_deactivate(mem);
4708                         } else {
4709                                 vm_page_activate(mem);
4710                         }
4711                 }
4712
4713                 VM_CHECK_MEMORYSTATUS;
4714         }
4715         VM_PAGE_CHECK(mem);
4716 }
4717
4718 /*
4719  *      vm_page_deactivate:
4720  *
4721  *      Returns the given page to the inactive list,
4722  *      indicating that no physical maps have access
4723  *      to this page.  [Used by the physical mapping system.]
4724  *
4725  *      The page queues must be locked.
4726  */
4727 void
4728 vm_page_deactivate(
4729         vm_page_t       m)
4730 {
4731         vm_page_deactivate_internal(m, TRUE);
4732 }
4733
4734
4735 void
4736 vm_page_deactivate_internal(
4737         vm_page_t       m,
4738         boolean_t       clear_hw_reference)
4739 {
4740         vm_object_t     m_object;
4741
4742         m_object = VM_PAGE_OBJECT(m);
4743
4744         VM_PAGE_CHECK(m);
4745         assert(m_object != kernel_object);
4746         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4747
4748 //      dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6);    /* (TEST/DEBUG) */
4749         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4750         /*
4751          *      This page is no longer very interesting.  If it was
4752          *      interesting (active or inactive/referenced), then we
4753          *      clear the reference bit and (re)enter it in the
4754          *      inactive queue.  Note wired pages should not have
4755          *      their reference bit cleared.
4756          */
4757         assert( !(m->vmp_absent && !m->vmp_unusual));
4758
4759         if (m->vmp_gobbled) {           /* can this happen? */
4760                 assert( !VM_PAGE_WIRED(m));
4761
4762                 if (!m->vmp_private && !m->vmp_fictitious) {
4763                         vm_page_wire_count--;
4764                 }
4765                 vm_page_gobble_count--;
4766                 m->vmp_gobbled = FALSE;
4767         }
4768         /*
4769          * if this page is currently on the pageout queue, we can't do the
4770          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4771          * and we can't remove it manually since we would need the object lock
4772          * (which is not required here) to decrement the activity_in_progress
4773          * reference which is held on the object while the page is in the pageout queue...
4774          * just let the normal laundry processing proceed
4775          */
4776         if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4777             (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4778             (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4779             VM_PAGE_WIRED(m)) {
4780                 return;
4781         }
4782         if (!m->vmp_absent && clear_hw_reference == TRUE) {
4783                 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
4784         }
4785
4786         m->vmp_reference = FALSE;
4787         m->vmp_no_cache = FALSE;
4788
4789         if (!VM_PAGE_INACTIVE(m)) {
4790                 vm_page_queues_remove(m, FALSE);
4791
4792                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4793                     m->vmp_dirty && m_object->internal &&
4794                     (m_object->purgable == VM_PURGABLE_DENY ||
4795                     m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4796                     m_object->purgable == VM_PURGABLE_VOLATILE)) {
4797                         vm_page_check_pageable_safe(m);
4798                         vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);
4799                         m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
4800                         vm_page_throttled_count++;
4801                 } else {
4802                         if (m_object->named && m_object->ref_count == 1) {
4803                                 vm_page_speculate(m, FALSE);
4804 #if DEVELOPMENT || DEBUG
4805                                 vm_page_speculative_recreated++;
4806 #endif
4807                         } else {
4808                                 vm_page_enqueue_inactive(m, FALSE);
4809                         }
4810                 }
4811         }
4812 }
4813
4814 /*
4815  * vm_page_enqueue_cleaned
4816  *
4817  * Put the page on the cleaned queue, mark it cleaned, etc.
4818  * Being on the cleaned queue (and having m->clean_queue set)
4819  * does ** NOT ** guarantee that the page is clean!
4820  *
4821  * Call with the queues lock held.
4822  */
4823
4824 void
4825 vm_page_enqueue_cleaned(vm_page_t m)
4826 {
4827         vm_object_t     m_object;
4828
4829         m_object = VM_PAGE_OBJECT(m);
4830
4831         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4832         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4833         assert( !(m->vmp_absent && !m->vmp_unusual));
4834
4835         if (VM_PAGE_WIRED(m)) {
4836                 return;
4837         }
4838
4839         if (m->vmp_gobbled) {
4840                 if (!m->vmp_private && !m->vmp_fictitious) {
4841                         vm_page_wire_count--;
4842                 }
4843                 vm_page_gobble_count--;
4844                 m->vmp_gobbled = FALSE;
4845         }
4846         /*
4847          * if this page is currently on the pageout queue, we can't do the
4848          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4849          * and we can't remove it manually since we would need the object lock
4850          * (which is not required here) to decrement the activity_in_progress
4851          * reference which is held on the object while the page is in the pageout queue...
4852          * just let the normal laundry processing proceed
4853          */
4854         if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4855             (m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4856             (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4857                 return;
4858         }
4859         vm_page_queues_remove(m, FALSE);
4860
4861         vm_page_check_pageable_safe(m);
4862         vm_page_queue_enter(&vm_page_queue_cleaned, m, vmp_pageq);
4863         m->vmp_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
4864         vm_page_cleaned_count++;
4865
4866         vm_page_inactive_count++;
4867         if (m_object->internal) {
4868                 vm_page_pageable_internal_count++;
4869         } else {
4870                 vm_page_pageable_external_count++;
4871         }
4872 #if CONFIG_BACKGROUND_QUEUE
4873         if (m->vmp_in_background) {
4874                 vm_page_add_to_backgroundq(m, TRUE);
4875         }
4876 #endif
4877         VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
4878 }
4879
4880 /*
4881  *      vm_page_activate:
4882  *
4883  *      Put the specified page on the active list (if appropriate).
4884  *
4885  *      The page queues must be locked.
4886  */
4887
4888 void
4889 vm_page_activate(
4890         vm_page_t       m)
4891 {
4892         vm_object_t     m_object;
4893
4894         m_object = VM_PAGE_OBJECT(m);
4895
4896         VM_PAGE_CHECK(m);
4897 #ifdef  FIXME_4778297
4898         assert(m_object != kernel_object);
4899 #endif
4900         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4901         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4902         assert( !(m->vmp_absent && !m->vmp_unusual));
4903
4904         if (m->vmp_gobbled) {
4905                 assert( !VM_PAGE_WIRED(m));
4906                 if (!m->vmp_private && !m->vmp_fictitious) {
4907                         vm_page_wire_count--;
4908                 }
4909                 vm_page_gobble_count--;
4910                 m->vmp_gobbled = FALSE;
4911         }
4912         /*
4913          * if this page is currently on the pageout queue, we can't do the
4914          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4915          * and we can't remove it manually since we would need the object lock
4916          * (which is not required here) to decrement the activity_in_progress
4917          * reference which is held on the object while the page is in the pageout queue...
4918          * just let the normal laundry processing proceed
4919          */
4920         if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4921             (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4922             (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4923                 return;
4924         }
4925
4926 #if DEBUG
4927         if (m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q) {
4928                 panic("vm_page_activate: already active");
4929         }
4930 #endif
4931
4932         if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
4933                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4934                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4935         }
4936
4937         vm_page_queues_remove(m, FALSE);
4938
4939         if (!VM_PAGE_WIRED(m)) {
4940                 vm_page_check_pageable_safe(m);
4941                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4942                     m->vmp_dirty && m_object->internal &&
4943                     (m_object->purgable == VM_PURGABLE_DENY ||
4944                     m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4945                     m_object->purgable == VM_PURGABLE_VOLATILE)) {
4946                         vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);
4947                         m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
4948                         vm_page_throttled_count++;
4949                 } else {
4950 #if CONFIG_SECLUDED_MEMORY
4951                         if (secluded_for_filecache &&
4952                             vm_page_secluded_target != 0 &&
4953                             num_tasks_can_use_secluded_mem == 0 &&
4954                             m_object->eligible_for_secluded) {
4955                                 vm_page_queue_enter(&vm_page_queue_secluded, m, vmp_pageq);
4956                                 m->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
4957                                 vm_page_secluded_count++;
4958                                 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
4959                                 vm_page_secluded_count_inuse++;
4960                                 assert(!m_object->internal);
4961 //                              vm_page_pageable_external_count++;
4962                         } else
4963 #endif /* CONFIG_SECLUDED_MEMORY */
4964                         vm_page_enqueue_active(m, FALSE);
4965                 }
4966                 m->vmp_reference = TRUE;
4967                 m->vmp_no_cache = FALSE;
4968         }
4969         VM_PAGE_CHECK(m);
4970 }
4971
4972
4973 /*
4974  *      vm_page_speculate:
4975  *
4976  *      Put the specified page on the speculative list (if appropriate).
4977  *
4978  *      The page queues must be locked.
4979  */
4980 void
4981 vm_page_speculate(
4982         vm_page_t       m,
4983         boolean_t       new)
4984 {
4985         struct vm_speculative_age_q     *aq;
4986         vm_object_t     m_object;
4987
4988         m_object = VM_PAGE_OBJECT(m);
4989
4990         VM_PAGE_CHECK(m);
4991         vm_page_check_pageable_safe(m);
4992
4993         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4994         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4995         assert( !(m->vmp_absent && !m->vmp_unusual));
4996         assert(m_object->internal == FALSE);
4997
4998         /*
4999          * if this page is currently on the pageout queue, we can't do the
5000          * vm_page_queues_remove (which doesn't handle the pageout queue case)
5001          * and we can't remove it manually since we would need the object lock
5002          * (which is not required here) to decrement the activity_in_progress
5003          * reference which is held on the object while the page is in the pageout queue...
5004          * just let the normal laundry processing proceed
5005          */
5006         if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
5007             (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
5008             (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5009                 return;
5010         }
5011
5012         vm_page_queues_remove(m, FALSE);
5013
5014         if (!VM_PAGE_WIRED(m)) {
5015                 mach_timespec_t         ts;
5016                 clock_sec_t sec;
5017                 clock_nsec_t nsec;
5018
5019                 clock_get_system_nanotime(&sec, &nsec);
5020                 ts.tv_sec = (unsigned int) sec;
5021                 ts.tv_nsec = nsec;
5022
5023                 if (vm_page_speculative_count == 0) {
5024                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5025                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5026
5027                         aq = &vm_page_queue_speculative[speculative_age_index];
5028
5029                         /*
5030                          * set the timer to begin a new group
5031                          */
5032                         aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
5033                         aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
5034
5035                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
5036                 } else {
5037                         aq = &vm_page_queue_speculative[speculative_age_index];
5038
5039                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
5040                                 speculative_age_index++;
5041
5042                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
5043                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5044                                 }
5045                                 if (speculative_age_index == speculative_steal_index) {
5046                                         speculative_steal_index = speculative_age_index + 1;
5047
5048                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
5049                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5050                                         }
5051                                 }
5052                                 aq = &vm_page_queue_speculative[speculative_age_index];
5053
5054                                 if (!vm_page_queue_empty(&aq->age_q)) {
5055                                         vm_page_speculate_ageit(aq);
5056                                 }
5057
5058                                 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
5059                                 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
5060
5061                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
5062                         }
5063                 }
5064                 vm_page_enqueue_tail(&aq->age_q, &m->vmp_pageq);
5065                 m->vmp_q_state = VM_PAGE_ON_SPECULATIVE_Q;
5066                 vm_page_speculative_count++;
5067                 vm_page_pageable_external_count++;
5068
5069                 if (new == TRUE) {
5070                         vm_object_lock_assert_exclusive(m_object);
5071
5072                         m_object->pages_created++;
5073 #if DEVELOPMENT || DEBUG
5074                         vm_page_speculative_created++;
5075 #endif
5076                 }
5077         }
5078         VM_PAGE_CHECK(m);
5079 }
5080
5081
5082 /*
5083  * move pages from the specified aging bin to
5084  * the speculative bin that pageout_scan claims from
5085  *
5086  *      The page queues must be locked.
5087  */
5088 void
5089 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
5090 {
5091         struct vm_speculative_age_q     *sq;
5092         vm_page_t       t;
5093
5094         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
5095
5096         if (vm_page_queue_empty(&sq->age_q)) {
5097                 sq->age_q.next = aq->age_q.next;
5098                 sq->age_q.prev = aq->age_q.prev;
5099
5100                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
5101                 t->vmp_pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
5102
5103                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
5104                 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
5105         } else {
5106                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
5107                 t->vmp_pageq.next = aq->age_q.next;
5108
5109                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
5110                 t->vmp_pageq.prev = sq->age_q.prev;
5111
5112                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
5113                 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
5114
5115                 sq->age_q.prev = aq->age_q.prev;
5116         }
5117         vm_page_queue_init(&aq->age_q);
5118 }
5119
5120
5121 void
5122 vm_page_lru(
5123         vm_page_t       m)
5124 {
5125         VM_PAGE_CHECK(m);
5126         assert(VM_PAGE_OBJECT(m) != kernel_object);
5127         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
5128
5129         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
5130
5131         if (m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q) {
5132                 /*
5133                  * we don't need to do all the other work that
5134                  * vm_page_queues_remove and vm_page_enqueue_inactive
5135                  * bring along for the ride
5136                  */
5137                 assert(!m->vmp_laundry);
5138                 assert(!m->vmp_private);
5139
5140                 m->vmp_no_cache = FALSE;
5141
5142                 vm_page_queue_remove(&vm_page_queue_inactive, m, vmp_pageq);
5143                 vm_page_queue_enter(&vm_page_queue_inactive, m, vmp_pageq);
5144
5145                 return;
5146         }
5147         /*
5148          * if this page is currently on the pageout queue, we can't do the
5149          * vm_page_queues_remove (which doesn't handle the pageout queue case)
5150          * and we can't remove it manually since we would need the object lock
5151          * (which is not required here) to decrement the activity_in_progress
5152          * reference which is held on the object while the page is in the pageout queue...
5153          * just let the normal laundry processing proceed
5154          */
5155         if (m->vmp_laundry || m->vmp_private ||
5156             (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
5157             (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
5158             VM_PAGE_WIRED(m)) {
5159                 return;
5160         }
5161
5162         m->vmp_no_cache = FALSE;
5163
5164         vm_page_queues_remove(m, FALSE);
5165
5166         vm_page_enqueue_inactive(m, FALSE);
5167 }
5168
5169
5170 void
5171 vm_page_reactivate_all_throttled(void)
5172 {
5173         vm_page_t       first_throttled, last_throttled;
5174         vm_page_t       first_active;
5175         vm_page_t       m;
5176         int             extra_active_count;
5177         int             extra_internal_count, extra_external_count;
5178         vm_object_t     m_object;
5179
5180         if (!VM_DYNAMIC_PAGING_ENABLED()) {
5181                 return;
5182         }
5183
5184         extra_active_count = 0;
5185         extra_internal_count = 0;
5186         extra_external_count = 0;
5187         vm_page_lock_queues();
5188         if (!vm_page_queue_empty(&vm_page_queue_throttled)) {
5189                 /*
5190                  * Switch "throttled" pages to "active".
5191                  */
5192                 vm_page_queue_iterate(&vm_page_queue_throttled, m, vmp_pageq) {
5193                         VM_PAGE_CHECK(m);
5194                         assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
5195
5196                         m_object = VM_PAGE_OBJECT(m);
5197
5198                         extra_active_count++;
5199                         if (m_object->internal) {
5200                                 extra_internal_count++;
5201                         } else {
5202                                 extra_external_count++;
5203                         }
5204
5205                         m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
5206                         VM_PAGE_CHECK(m);
5207 #if CONFIG_BACKGROUND_QUEUE
5208                         if (m->vmp_in_background) {
5209                                 vm_page_add_to_backgroundq(m, FALSE);
5210                         }
5211 #endif
5212                 }
5213
5214                 /*
5215                  * Transfer the entire throttled queue to a regular LRU page queues.
5216                  * We insert it at the head of the active queue, so that these pages
5217                  * get re-evaluated by the LRU algorithm first, since they've been
5218                  * completely out of it until now.
5219                  */
5220                 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
5221                 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
5222                 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
5223                 if (vm_page_queue_empty(&vm_page_queue_active)) {
5224                         vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
5225                 } else {
5226                         first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
5227                 }
5228                 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
5229                 first_throttled->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
5230                 last_throttled->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
5231
5232 #if DEBUG
5233                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
5234 #endif
5235                 vm_page_queue_init(&vm_page_queue_throttled);
5236                 /*
5237                  * Adjust the global page counts.
5238                  */
5239                 vm_page_active_count += extra_active_count;
5240                 vm_page_pageable_internal_count += extra_internal_count;
5241                 vm_page_pageable_external_count += extra_external_count;
5242                 vm_page_throttled_count = 0;
5243         }
5244         assert(vm_page_throttled_count == 0);
5245         assert(vm_page_queue_empty(&vm_page_queue_throttled));
5246         vm_page_unlock_queues();
5247 }
5248
5249
5250 /*
5251  * move pages from the indicated local queue to the global active queue
5252  * its ok to fail if we're below the hard limit and force == FALSE
5253  * the nolocks == TRUE case is to allow this function to be run on
5254  * the hibernate path
5255  */
5256
5257 void
5258 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
5259 {
5260         struct vpl      *lq;
5261         vm_page_t       first_local, last_local;
5262         vm_page_t       first_active;
5263         vm_page_t       m;
5264         uint32_t        count = 0;
5265
5266         if (vm_page_local_q == NULL) {
5267                 return;
5268         }
5269
5270         lq = &vm_page_local_q[lid].vpl_un.vpl;
5271
5272         if (nolocks == FALSE) {
5273                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
5274                         if (!vm_page_trylockspin_queues()) {
5275                                 return;
5276                         }
5277                 } else {
5278                         vm_page_lockspin_queues();
5279                 }
5280
5281                 VPL_LOCK(&lq->vpl_lock);
5282         }
5283         if (lq->vpl_count) {
5284                 /*
5285                  * Switch "local" pages to "active".
5286                  */
5287                 assert(!vm_page_queue_empty(&lq->vpl_queue));
5288
5289                 vm_page_queue_iterate(&lq->vpl_queue, m, vmp_pageq) {
5290                         VM_PAGE_CHECK(m);
5291                         vm_page_check_pageable_safe(m);
5292                         assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
5293                         assert(!m->vmp_fictitious);
5294
5295                         if (m->vmp_local_id != lid) {
5296                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
5297                         }
5298
5299                         m->vmp_local_id = 0;
5300                         m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
5301                         VM_PAGE_CHECK(m);
5302 #if CONFIG_BACKGROUND_QUEUE
5303                         if (m->vmp_in_background) {
5304                                 vm_page_add_to_backgroundq(m, FALSE);
5305                         }
5306 #endif
5307                         count++;
5308                 }
5309                 if (count != lq->vpl_count) {
5310                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
5311                 }
5312
5313                 /*
5314                  * Transfer the entire local queue to a regular LRU page queues.
5315                  */
5316                 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
5317                 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
5318                 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
5319
5320                 if (vm_page_queue_empty(&vm_page_queue_active)) {
5321                         vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
5322                 } else {
5323                         first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
5324                 }
5325                 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
5326                 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
5327                 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
5328
5329                 vm_page_queue_init(&lq->vpl_queue);
5330                 /*
5331                  * Adjust the global page counts.
5332                  */
5333                 vm_page_active_count += lq->vpl_count;
5334                 vm_page_pageable_internal_count += lq->vpl_internal_count;
5335                 vm_page_pageable_external_count += lq->vpl_external_count;
5336                 lq->vpl_count = 0;
5337                 lq->vpl_internal_count = 0;
5338                 lq->vpl_external_count = 0;
5339         }
5340         assert(vm_page_queue_empty(&lq->vpl_queue));
5341
5342         if (nolocks == FALSE) {
5343                 VPL_UNLOCK(&lq->vpl_lock);
5344
5345                 vm_page_balance_inactive(count / 4);
5346                 vm_page_unlock_queues();
5347         }
5348 }
5349
5350 /*
5351  *      vm_page_part_zero_fill:
5352  *
5353  *      Zero-fill a part of the page.
5354  */
5355 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
5356 void
5357 vm_page_part_zero_fill(
5358         vm_page_t       m,
5359         vm_offset_t     m_pa,
5360         vm_size_t       len)
5361 {
5362 #if 0
5363         /*
5364          * we don't hold the page queue lock
5365          * so this check isn't safe to make
5366          */
5367         VM_PAGE_CHECK(m);
5368 #endif
5369
5370 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
5371         pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
5372 #else
5373         vm_page_t       tmp;
5374         while (1) {
5375                 tmp = vm_page_grab();
5376                 if (tmp == VM_PAGE_NULL) {
5377                         vm_page_wait(THREAD_UNINT);
5378                         continue;
5379                 }
5380                 break;
5381         }
5382         vm_page_zero_fill(tmp);
5383         if (m_pa != 0) {
5384                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
5385         }
5386         if ((m_pa + len) < PAGE_SIZE) {
5387                 vm_page_part_copy(m, m_pa + len, tmp,
5388                     m_pa + len, PAGE_SIZE - (m_pa + len));
5389         }
5390         vm_page_copy(tmp, m);
5391         VM_PAGE_FREE(tmp);
5392 #endif
5393 }
5394
5395 /*
5396  *      vm_page_zero_fill:
5397  *
5398  *      Zero-fill the specified page.
5399  */
5400 void
5401 vm_page_zero_fill(
5402         vm_page_t       m)
5403 {
5404 #if 0
5405         /*
5406          * we don't hold the page queue lock
5407          * so this check isn't safe to make
5408          */
5409         VM_PAGE_CHECK(m);
5410 #endif
5411
5412 //      dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0);              /* (BRINGUP) */
5413         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
5414 }
5415
5416 /*
5417  *      vm_page_part_copy:
5418  *
5419  *      copy part of one page to another
5420  */
5421
5422 void
5423 vm_page_part_copy(
5424         vm_page_t       src_m,
5425         vm_offset_t     src_pa,
5426         vm_page_t       dst_m,
5427         vm_offset_t     dst_pa,
5428         vm_size_t       len)
5429 {
5430 #if 0
5431         /*
5432          * we don't hold the page queue lock
5433          * so this check isn't safe to make
5434          */
5435         VM_PAGE_CHECK(src_m);
5436         VM_PAGE_CHECK(dst_m);
5437 #endif
5438         pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
5439             VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
5440 }
5441
5442 /*
5443  *      vm_page_copy:
5444  *
5445  *      Copy one page to another
5446  */
5447
5448 int vm_page_copy_cs_validations = 0;
5449 int vm_page_copy_cs_tainted = 0;
5450
5451 void
5452 vm_page_copy(
5453         vm_page_t       src_m,
5454         vm_page_t       dest_m)
5455 {
5456         vm_object_t     src_m_object;
5457
5458         src_m_object = VM_PAGE_OBJECT(src_m);
5459
5460 #if 0
5461         /*
5462          * we don't hold the page queue lock
5463          * so this check isn't safe to make
5464          */
5465         VM_PAGE_CHECK(src_m);
5466         VM_PAGE_CHECK(dest_m);
5467 #endif
5468         vm_object_lock_assert_held(src_m_object);
5469
5470         if (src_m_object != VM_OBJECT_NULL &&
5471             src_m_object->code_signed) {
5472                 /*
5473                  * We're copying a page from a code-signed object.
5474                  * Whoever ends up mapping the copy page might care about
5475                  * the original page's integrity, so let's validate the
5476                  * source page now.
5477                  */
5478                 vm_page_copy_cs_validations++;
5479                 vm_page_validate_cs(src_m);
5480 #if DEVELOPMENT || DEBUG
5481                 DTRACE_VM4(codesigned_copy,
5482                     vm_object_t, src_m_object,
5483                     vm_object_offset_t, src_m->vmp_offset,
5484                     int, src_m->vmp_cs_validated,
5485                     int, src_m->vmp_cs_tainted);
5486 #endif /* DEVELOPMENT || DEBUG */
5487         }
5488
5489         /*
5490          * Propagate the cs_tainted bit to the copy page. Do not propagate
5491          * the cs_validated bit.
5492          */
5493         dest_m->vmp_cs_tainted = src_m->vmp_cs_tainted;
5494         if (dest_m->vmp_cs_tainted) {
5495                 vm_page_copy_cs_tainted++;
5496         }
5497         dest_m->vmp_error = src_m->vmp_error; /* sliding src_m might have failed... */
5498         pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
5499 }
5500
5501 #if MACH_ASSERT
5502 static void
5503 _vm_page_print(
5504         vm_page_t       p)
5505 {
5506         printf("vm_page %p: \n", p);
5507         printf("  pageq: next=%p prev=%p\n",
5508             (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next),
5509             (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.prev));
5510         printf("  listq: next=%p prev=%p\n",
5511             (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.next)),
5512             (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.prev)));
5513         printf("  next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m)));
5514         printf("  object=%p offset=0x%llx\n", VM_PAGE_OBJECT(p), p->vmp_offset);
5515         printf("  wire_count=%u\n", p->vmp_wire_count);
5516         printf("  q_state=%u\n", p->vmp_q_state);
5517
5518         printf("  %slaundry, %sref, %sgobbled, %sprivate\n",
5519             (p->vmp_laundry ? "" : "!"),
5520             (p->vmp_reference ? "" : "!"),
5521             (p->vmp_gobbled ? "" : "!"),
5522             (p->vmp_private ? "" : "!"));
5523         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
5524             (p->vmp_busy ? "" : "!"),
5525             (p->vmp_wanted ? "" : "!"),
5526             (p->vmp_tabled ? "" : "!"),
5527             (p->vmp_fictitious ? "" : "!"),
5528             (p->vmp_pmapped ? "" : "!"),
5529             (p->vmp_wpmapped ? "" : "!"));
5530         printf("  %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
5531             (p->vmp_free_when_done ? "" : "!"),
5532             (p->vmp_absent ? "" : "!"),
5533             (p->vmp_error ? "" : "!"),
5534             (p->vmp_dirty ? "" : "!"),
5535             (p->vmp_cleaning ? "" : "!"),
5536             (p->vmp_precious ? "" : "!"),
5537             (p->vmp_clustered ? "" : "!"));
5538         printf("  %soverwriting, %srestart, %sunusual\n",
5539             (p->vmp_overwriting ? "" : "!"),
5540             (p->vmp_restart ? "" : "!"),
5541             (p->vmp_unusual ? "" : "!"));
5542         printf("  %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
5543             (p->vmp_cs_validated ? "" : "!"),
5544             (p->vmp_cs_tainted ? "" : "!"),
5545             (p->vmp_cs_nx ? "" : "!"),
5546             (p->vmp_no_cache ? "" : "!"));
5547
5548         printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
5549 }
5550
5551 /*
5552  *      Check that the list of pages is ordered by
5553  *      ascending physical address and has no holes.
5554  */
5555 static int
5556 vm_page_verify_contiguous(
5557         vm_page_t       pages,
5558         unsigned int    npages)
5559 {
5560         vm_page_t               m;
5561         unsigned int            page_count;
5562         vm_offset_t             prev_addr;
5563
5564         prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
5565         page_count = 1;
5566         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
5567                 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5568                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
5569                             m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
5570                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
5571                         panic("vm_page_verify_contiguous:  not contiguous!");
5572                 }
5573                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5574                 ++page_count;
5575         }
5576         if (page_count != npages) {
5577                 printf("pages %p actual count 0x%x but requested 0x%x\n",
5578                     pages, page_count, npages);
5579                 panic("vm_page_verify_contiguous:  count error");
5580         }
5581         return 1;
5582 }
5583
5584
5585 /*
5586  *      Check the free lists for proper length etc.
5587  */
5588 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
5589 static unsigned int
5590 vm_page_verify_free_list(
5591         vm_page_queue_head_t    *vm_page_queue,
5592         unsigned int    color,
5593         vm_page_t       look_for_page,
5594         boolean_t       expect_page)
5595 {
5596         unsigned int    npages;
5597         vm_page_t       m;
5598         vm_page_t       prev_m;
5599         boolean_t       found_page;
5600
5601         if (!vm_page_verify_this_free_list_enabled) {
5602                 return 0;
5603         }
5604
5605         found_page = FALSE;
5606         npages = 0;
5607         prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
5608
5609         vm_page_queue_iterate(vm_page_queue, m, vmp_pageq) {
5610                 if (m == look_for_page) {
5611                         found_page = TRUE;
5612                 }
5613                 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev) != prev_m) {
5614                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
5615                             color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev), prev_m);
5616                 }
5617                 if (!m->vmp_busy) {
5618                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
5619                             color, npages, m);
5620                 }
5621                 if (color != (unsigned int) -1) {
5622                         if (VM_PAGE_GET_COLOR(m) != color) {
5623                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
5624                                     color, npages, m, VM_PAGE_GET_COLOR(m), color);
5625                         }
5626                         if (m->vmp_q_state != VM_PAGE_ON_FREE_Q) {
5627                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
5628                                     color, npages, m, m->vmp_q_state);
5629                         }
5630                 } else {
5631                         if (m->vmp_q_state != VM_PAGE_ON_FREE_LOCAL_Q) {
5632                                 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
5633                                     npages, m, m->vmp_q_state);
5634                         }
5635                 }
5636                 ++npages;
5637                 prev_m = m;
5638         }
5639         if (look_for_page != VM_PAGE_NULL) {
5640                 unsigned int other_color;
5641
5642                 if (expect_page && !found_page) {
5643                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
5644                             color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
5645                         _vm_page_print(look_for_page);
5646                         for (other_color = 0;
5647                             other_color < vm_colors;
5648                             other_color++) {
5649                                 if (other_color == color) {
5650                                         continue;
5651                                 }
5652                                 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
5653                                     other_color, look_for_page, FALSE);
5654                         }
5655                         if (color == (unsigned int) -1) {
5656                                 vm_page_verify_free_list(&vm_lopage_queue_free,
5657                                     (unsigned int) -1, look_for_page, FALSE);
5658                         }
5659                         panic("vm_page_verify_free_list(color=%u)\n", color);
5660                 }
5661                 if (!expect_page && found_page) {
5662                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
5663                             color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
5664                 }
5665         }
5666         return npages;
5667 }
5668
5669 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
5670 static void
5671 vm_page_verify_free_lists( void )
5672 {
5673         unsigned int    color, npages, nlopages;
5674         boolean_t       toggle = TRUE;
5675
5676         if (!vm_page_verify_all_free_lists_enabled) {
5677                 return;
5678         }
5679
5680         npages = 0;
5681
5682         lck_mtx_lock(&vm_page_queue_free_lock);
5683
5684         if (vm_page_verify_this_free_list_enabled == TRUE) {
5685                 /*
5686                  * This variable has been set globally for extra checking of
5687                  * each free list Q. Since we didn't set it, we don't own it
5688                  * and we shouldn't toggle it.
5689                  */
5690                 toggle = FALSE;
5691         }
5692
5693         if (toggle == TRUE) {
5694                 vm_page_verify_this_free_list_enabled = TRUE;
5695         }
5696
5697         for (color = 0; color < vm_colors; color++) {
5698                 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
5699                     color, VM_PAGE_NULL, FALSE);
5700         }
5701         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
5702             (unsigned int) -1,
5703             VM_PAGE_NULL, FALSE);
5704         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count) {
5705                 panic("vm_page_verify_free_lists:  "
5706                     "npages %u free_count %d nlopages %u lo_free_count %u",
5707                     npages, vm_page_free_count, nlopages, vm_lopage_free_count);
5708         }
5709
5710         if (toggle == TRUE) {
5711                 vm_page_verify_this_free_list_enabled = FALSE;
5712         }
5713
5714         lck_mtx_unlock(&vm_page_queue_free_lock);
5715 }
5716
5717 #endif  /* MACH_ASSERT */
5718
5719
5720 extern boolean_t(*volatile consider_buffer_cache_collect)(int);
5721
5722 /*
5723  *      CONTIGUOUS PAGE ALLOCATION
5724  *
5725  *      Find a region large enough to contain at least n pages
5726  *      of contiguous physical memory.
5727  *
5728  *      This is done by traversing the vm_page_t array in a linear fashion
5729  *      we assume that the vm_page_t array has the avaiable physical pages in an
5730  *      ordered, ascending list... this is currently true of all our implementations
5731  *      and must remain so... there can be 'holes' in the array...  we also can
5732  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5733  *      which use to happen via 'vm_page_convert'... that function was no longer
5734  *      being called and was removed...
5735  *
5736  *      The basic flow consists of stabilizing some of the interesting state of
5737  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5738  *      sweep at the beginning of the array looking for pages that meet our criterea
5739  *      for a 'stealable' page... currently we are pretty conservative... if the page
5740  *      meets this criterea and is physically contiguous to the previous page in the 'run'
5741  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
5742  *      and start to develop a new run... if at this point we've already considered
5743  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5744  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
5745  *      to other threads trying to acquire free pages (or move pages from q to q),
5746  *      and then continue from the spot we left off... we only make 1 pass through the
5747  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
5748  *      which steals the pages from the queues they're currently on... pages on the free
5749  *      queue can be stolen directly... pages that are on any of the other queues
5750  *      must be removed from the object they are tabled on... this requires taking the
5751  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5752  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
5753  *      dump the pages we've currently stolen back to the free list, and pick up our
5754  *      scan from the point where we aborted the 'current' run.
5755  *
5756  *
5757  *      Requirements:
5758  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
5759  *
5760  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5761  *
5762  * Algorithm:
5763  */
5764
5765 #define MAX_CONSIDERED_BEFORE_YIELD     1000
5766
5767
5768 #define RESET_STATE_OF_RUN()    \
5769         MACRO_BEGIN             \
5770         prevcontaddr = -2;      \
5771         start_pnum = -1;        \
5772         free_considered = 0;    \
5773         substitute_needed = 0;  \
5774         npages = 0;             \
5775         MACRO_END
5776
5777 /*
5778  * Can we steal in-use (i.e. not free) pages when searching for
5779  * physically-contiguous pages ?
5780  */
5781 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5782
5783 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
5784 #if DEBUG
5785 int vm_page_find_contig_debug = 0;
5786 #endif
5787
5788 static vm_page_t
5789 vm_page_find_contiguous(
5790         unsigned int    contig_pages,
5791         ppnum_t         max_pnum,
5792         ppnum_t     pnum_mask,
5793         boolean_t       wire,
5794         int             flags)
5795 {
5796         vm_page_t       m = NULL;
5797         ppnum_t         prevcontaddr = 0;
5798         ppnum_t         start_pnum = 0;
5799         unsigned int    npages = 0, considered = 0, scanned = 0;
5800         unsigned int    page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5801         unsigned int    idx_last_contig_page_found = 0;
5802         int             free_considered = 0, free_available = 0;
5803         int             substitute_needed = 0;
5804         boolean_t       wrapped, zone_gc_called = FALSE;
5805         kern_return_t   kr;
5806 #if DEBUG
5807         clock_sec_t     tv_start_sec = 0, tv_end_sec = 0;
5808         clock_usec_t    tv_start_usec = 0, tv_end_usec = 0;
5809 #endif
5810
5811         int             yielded = 0;
5812         int             dumped_run = 0;
5813         int             stolen_pages = 0;
5814         int             compressed_pages = 0;
5815
5816
5817         if (contig_pages == 0) {
5818                 return VM_PAGE_NULL;
5819         }
5820
5821 full_scan_again:
5822
5823 #if MACH_ASSERT
5824         vm_page_verify_free_lists();
5825 #endif
5826 #if DEBUG
5827         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5828 #endif
5829         PAGE_REPLACEMENT_ALLOWED(TRUE);
5830
5831         /*
5832          * If there are still delayed pages, try to free up some that match.
5833          */
5834         if (__improbable(vm_delayed_count != 0 && contig_pages != 0)) {
5835                 vm_free_delayed_pages_contig(contig_pages, max_pnum, pnum_mask);
5836         }
5837
5838         vm_page_lock_queues();
5839         lck_mtx_lock(&vm_page_queue_free_lock);
5840
5841         RESET_STATE_OF_RUN();
5842
5843         scanned = 0;
5844         considered = 0;
5845         free_available = vm_page_free_count - vm_page_free_reserved;
5846
5847         wrapped = FALSE;
5848
5849         if (flags & KMA_LOMEM) {
5850                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
5851         } else {
5852                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
5853         }
5854
5855         orig_last_idx = idx_last_contig_page_found;
5856         last_idx = orig_last_idx;
5857
5858         for (page_idx = last_idx, start_idx = last_idx;
5859             npages < contig_pages && page_idx < vm_pages_count;
5860             page_idx++) {
5861 retry:
5862                 if (wrapped &&
5863                     npages == 0 &&
5864                     page_idx >= orig_last_idx) {
5865                         /*
5866                          * We're back where we started and we haven't
5867                          * found any suitable contiguous range.  Let's
5868                          * give up.
5869                          */
5870                         break;
5871                 }
5872                 scanned++;
5873                 m = &vm_pages[page_idx];
5874
5875                 assert(!m->vmp_fictitious);
5876                 assert(!m->vmp_private);
5877
5878                 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
5879                         /* no more low pages... */
5880                         break;
5881                 }
5882                 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
5883                         /*
5884                          * not aligned
5885                          */
5886                         RESET_STATE_OF_RUN();
5887                 } else if (VM_PAGE_WIRED(m) || m->vmp_gobbled ||
5888                     m->vmp_laundry || m->vmp_wanted ||
5889                     m->vmp_cleaning || m->vmp_overwriting || m->vmp_free_when_done) {
5890                         /*
5891                          * page is in a transient state
5892                          * or a state we don't want to deal
5893                          * with, so don't consider it which
5894                          * means starting a new run
5895                          */
5896                         RESET_STATE_OF_RUN();
5897                 } else if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
5898                     (m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5899                     (m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5900                     (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5901                         /*
5902                          * page needs to be on one of our queues (other then the pageout or special free queues)
5903                          * or it needs to belong to the compressor pool (which is now indicated
5904                          * by vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5905                          * from the check for VM_PAGE_NOT_ON_Q)
5906                          * in order for it to be stable behind the
5907                          * locks we hold at this point...
5908                          * if not, don't consider it which
5909                          * means starting a new run
5910                          */
5911                         RESET_STATE_OF_RUN();
5912                 } else if ((m->vmp_q_state != VM_PAGE_ON_FREE_Q) && (!m->vmp_tabled || m->vmp_busy)) {
5913                         /*
5914                          * pages on the free list are always 'busy'
5915                          * so we couldn't test for 'busy' in the check
5916                          * for the transient states... pages that are
5917                          * 'free' are never 'tabled', so we also couldn't
5918                          * test for 'tabled'.  So we check here to make
5919                          * sure that a non-free page is not busy and is
5920                          * tabled on an object...
5921                          * if not, don't consider it which
5922                          * means starting a new run
5923                          */
5924                         RESET_STATE_OF_RUN();
5925                 } else {
5926                         if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5927                                 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
5928                                         RESET_STATE_OF_RUN();
5929                                         goto did_consider;
5930                                 } else {
5931                                         npages = 1;
5932                                         start_idx = page_idx;
5933                                         start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
5934                                 }
5935                         } else {
5936                                 npages++;
5937                         }
5938                         prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
5939
5940                         VM_PAGE_CHECK(m);
5941                         if (m->vmp_q_state == VM_PAGE_ON_FREE_Q) {
5942                                 free_considered++;
5943                         } else {
5944                                 /*
5945                                  * This page is not free.
5946                                  * If we can't steal used pages,
5947                                  * we have to give up this run
5948                                  * and keep looking.
5949                                  * Otherwise, we might need to
5950                                  * move the contents of this page
5951                                  * into a substitute page.
5952                                  */
5953 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5954                                 if (m->vmp_pmapped || m->vmp_dirty || m->vmp_precious) {
5955                                         substitute_needed++;
5956                                 }
5957 #else
5958                                 RESET_STATE_OF_RUN();
5959 #endif
5960                         }
5961
5962                         if ((free_considered + substitute_needed) > free_available) {
5963                                 /*
5964                                  * if we let this run continue
5965                                  * we will end up dropping the vm_page_free_count
5966                                  * below the reserve limit... we need to abort
5967                                  * this run, but we can at least re-consider this
5968                                  * page... thus the jump back to 'retry'
5969                                  */
5970                                 RESET_STATE_OF_RUN();
5971
5972                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5973                                         considered++;
5974                                         goto retry;
5975                                 }
5976                                 /*
5977                                  * free_available == 0
5978                                  * so can't consider any free pages... if
5979                                  * we went to retry in this case, we'd
5980                                  * get stuck looking at the same page
5981                                  * w/o making any forward progress
5982                                  * we also want to take this path if we've already
5983                                  * reached our limit that controls the lock latency
5984                                  */
5985                         }
5986                 }
5987 did_consider:
5988                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
5989                         PAGE_REPLACEMENT_ALLOWED(FALSE);
5990
5991                         lck_mtx_unlock(&vm_page_queue_free_lock);
5992                         vm_page_unlock_queues();
5993
5994                         mutex_pause(0);
5995
5996                         PAGE_REPLACEMENT_ALLOWED(TRUE);
5997
5998                         vm_page_lock_queues();
5999                         lck_mtx_lock(&vm_page_queue_free_lock);
6000
6001                         RESET_STATE_OF_RUN();
6002                         /*
6003                          * reset our free page limit since we
6004                          * dropped the lock protecting the vm_page_free_queue
6005                          */
6006                         free_available = vm_page_free_count - vm_page_free_reserved;
6007                         considered = 0;
6008
6009                         yielded++;
6010
6011                         goto retry;
6012                 }
6013                 considered++;
6014         }
6015         m = VM_PAGE_NULL;
6016
6017         if (npages != contig_pages) {
6018                 if (!wrapped) {
6019                         /*
6020                          * We didn't find a contiguous range but we didn't
6021                          * start from the very first page.
6022                          * Start again from the very first page.
6023                          */
6024                         RESET_STATE_OF_RUN();
6025                         if (flags & KMA_LOMEM) {
6026                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
6027                         } else {
6028                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
6029                         }
6030                         last_idx = 0;
6031                         page_idx = last_idx;
6032                         wrapped = TRUE;
6033                         goto retry;
6034                 }
6035                 lck_mtx_unlock(&vm_page_queue_free_lock);
6036         } else {
6037                 vm_page_t       m1;
6038                 vm_page_t       m2;
6039                 unsigned int    cur_idx;
6040                 unsigned int    tmp_start_idx;
6041                 vm_object_t     locked_object = VM_OBJECT_NULL;
6042                 boolean_t       abort_run = FALSE;
6043
6044                 assert(page_idx - start_idx == contig_pages);
6045
6046                 tmp_start_idx = start_idx;
6047
6048                 /*
6049                  * first pass through to pull the free pages
6050                  * off of the free queue so that in case we
6051                  * need substitute pages, we won't grab any
6052                  * of the free pages in the run... we'll clear
6053                  * the 'free' bit in the 2nd pass, and even in
6054                  * an abort_run case, we'll collect all of the
6055                  * free pages in this run and return them to the free list
6056                  */
6057                 while (start_idx < page_idx) {
6058                         m1 = &vm_pages[start_idx++];
6059
6060 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
6061                         assert(m1->vmp_q_state == VM_PAGE_ON_FREE_Q);
6062 #endif
6063
6064                         if (m1->vmp_q_state == VM_PAGE_ON_FREE_Q) {
6065                                 unsigned int color;
6066
6067                                 color = VM_PAGE_GET_COLOR(m1);
6068 #if MACH_ASSERT
6069                                 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
6070 #endif
6071                                 vm_page_queue_remove(&vm_page_queue_free[color].qhead, m1, vmp_pageq);
6072
6073                                 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
6074 #if MACH_ASSERT
6075                                 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
6076 #endif
6077                                 /*
6078                                  * Clear the "free" bit so that this page
6079                                  * does not get considered for another
6080                                  * concurrent physically-contiguous allocation.
6081                                  */
6082                                 m1->vmp_q_state = VM_PAGE_NOT_ON_Q;
6083                                 assert(m1->vmp_busy);
6084
6085                                 vm_page_free_count--;
6086                         }
6087                 }
6088                 if (flags & KMA_LOMEM) {
6089                         vm_page_lomem_find_contiguous_last_idx = page_idx;
6090                 } else {
6091                         vm_page_find_contiguous_last_idx = page_idx;
6092                 }
6093
6094                 /*
6095                  * we can drop the free queue lock at this point since
6096                  * we've pulled any 'free' candidates off of the list
6097                  * we need it dropped so that we can do a vm_page_grab
6098                  * when substituing for pmapped/dirty pages
6099                  */
6100                 lck_mtx_unlock(&vm_page_queue_free_lock);
6101
6102                 start_idx = tmp_start_idx;
6103                 cur_idx = page_idx - 1;
6104
6105                 while (start_idx++ < page_idx) {
6106                         /*
6107                          * must go through the list from back to front
6108                          * so that the page list is created in the
6109                          * correct order - low -> high phys addresses
6110                          */
6111                         m1 = &vm_pages[cur_idx--];
6112
6113                         if (m1->vmp_object == 0) {
6114                                 /*
6115                                  * page has already been removed from
6116                                  * the free list in the 1st pass
6117                                  */
6118                                 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
6119                                 assert(m1->vmp_offset == (vm_object_offset_t) -1);
6120                                 assert(m1->vmp_busy);
6121                                 assert(!m1->vmp_wanted);
6122                                 assert(!m1->vmp_laundry);
6123                         } else {
6124                                 vm_object_t object;
6125                                 int refmod;
6126                                 boolean_t disconnected, reusable;
6127
6128                                 if (abort_run == TRUE) {
6129                                         continue;
6130                                 }
6131
6132                                 assert(m1->vmp_q_state != VM_PAGE_NOT_ON_Q);
6133
6134                                 object = VM_PAGE_OBJECT(m1);
6135
6136                                 if (object != locked_object) {
6137                                         if (locked_object) {
6138                                                 vm_object_unlock(locked_object);
6139                                                 locked_object = VM_OBJECT_NULL;
6140                                         }
6141                                         if (vm_object_lock_try(object)) {
6142                                                 locked_object = object;
6143                                         }
6144                                 }
6145                                 if (locked_object == VM_OBJECT_NULL ||
6146                                     (VM_PAGE_WIRED(m1) || m1->vmp_gobbled ||
6147                                     m1->vmp_laundry || m1->vmp_wanted ||
6148                                     m1->vmp_cleaning || m1->vmp_overwriting || m1->vmp_free_when_done || m1->vmp_busy) ||
6149                                     (m1->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
6150                                         if (locked_object) {
6151                                                 vm_object_unlock(locked_object);
6152                                                 locked_object = VM_OBJECT_NULL;
6153                                         }
6154                                         tmp_start_idx = cur_idx;
6155                                         abort_run = TRUE;
6156                                         continue;
6157                                 }
6158
6159                                 disconnected = FALSE;
6160                                 reusable = FALSE;
6161
6162                                 if ((m1->vmp_reusable ||
6163                                     object->all_reusable) &&
6164                                     (m1->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
6165                                     !m1->vmp_dirty &&
6166                                     !m1->vmp_reference) {
6167                                         /* reusable page... */
6168                                         refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
6169                                         disconnected = TRUE;
6170                                         if (refmod == 0) {
6171                                                 /*
6172                                                  * ... not reused: can steal
6173                                                  * without relocating contents.
6174                                                  */
6175                                                 reusable = TRUE;
6176                                         }
6177                                 }
6178
6179                                 if ((m1->vmp_pmapped &&
6180                                     !reusable) ||
6181                                     m1->vmp_dirty ||
6182                                     m1->vmp_precious) {
6183                                         vm_object_offset_t offset;
6184
6185                                         m2 = vm_page_grab_options(VM_PAGE_GRAB_Q_LOCK_HELD);
6186
6187                                         if (m2 == VM_PAGE_NULL) {
6188                                                 if (locked_object) {
6189                                                         vm_object_unlock(locked_object);
6190                                                         locked_object = VM_OBJECT_NULL;
6191                                                 }
6192                                                 tmp_start_idx = cur_idx;
6193                                                 abort_run = TRUE;
6194                                                 continue;
6195                                         }
6196                                         if (!disconnected) {
6197                                                 if (m1->vmp_pmapped) {
6198                                                         refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
6199                                                 } else {
6200                                                         refmod = 0;
6201                                                 }
6202                                         }
6203
6204                                         /* copy the page's contents */
6205                                         pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
6206                                         /* copy the page's state */
6207                                         assert(!VM_PAGE_WIRED(m1));
6208                                         assert(m1->vmp_q_state != VM_PAGE_ON_FREE_Q);
6209                                         assert(m1->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q);
6210                                         assert(!m1->vmp_laundry);
6211                                         m2->vmp_reference       = m1->vmp_reference;
6212                                         assert(!m1->vmp_gobbled);
6213                                         assert(!m1->vmp_private);
6214                                         m2->vmp_no_cache        = m1->vmp_no_cache;
6215                                         m2->vmp_xpmapped        = 0;
6216                                         assert(!m1->vmp_busy);
6217                                         assert(!m1->vmp_wanted);
6218                                         assert(!m1->vmp_fictitious);
6219                                         m2->vmp_pmapped = m1->vmp_pmapped; /* should flush cache ? */
6220                                         m2->vmp_wpmapped        = m1->vmp_wpmapped;
6221                                         assert(!m1->vmp_free_when_done);
6222                                         m2->vmp_absent  = m1->vmp_absent;
6223                                         m2->vmp_error   = m1->vmp_error;
6224                                         m2->vmp_dirty   = m1->vmp_dirty;
6225                                         assert(!m1->vmp_cleaning);
6226                                         m2->vmp_precious        = m1->vmp_precious;
6227                                         m2->vmp_clustered       = m1->vmp_clustered;
6228                                         assert(!m1->vmp_overwriting);
6229                                         m2->vmp_restart = m1->vmp_restart;
6230                                         m2->vmp_unusual = m1->vmp_unusual;
6231                                         m2->vmp_cs_validated = m1->vmp_cs_validated;
6232                                         m2->vmp_cs_tainted      = m1->vmp_cs_tainted;
6233                                         m2->vmp_cs_nx   = m1->vmp_cs_nx;
6234
6235                                         /*
6236                                          * If m1 had really been reusable,
6237                                          * we would have just stolen it, so
6238                                          * let's not propagate it's "reusable"
6239                                          * bit and assert that m2 is not
6240                                          * marked as "reusable".
6241                                          */
6242                                         // m2->vmp_reusable     = m1->vmp_reusable;
6243                                         assert(!m2->vmp_reusable);
6244
6245                                         // assert(!m1->vmp_lopage);
6246
6247                                         if (m1->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6248                                                 m2->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
6249                                         }
6250
6251                                         /*
6252                                          * page may need to be flushed if
6253                                          * it is marshalled into a UPL
6254                                          * that is going to be used by a device
6255                                          * that doesn't support coherency
6256                                          */
6257                                         m2->vmp_written_by_kernel = TRUE;
6258
6259                                         /*
6260                                          * make sure we clear the ref/mod state
6261                                          * from the pmap layer... else we risk
6262                                          * inheriting state from the last time
6263                                          * this page was used...
6264                                          */
6265                                         pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
6266
6267                                         if (refmod & VM_MEM_REFERENCED) {
6268                                                 m2->vmp_reference = TRUE;
6269                                         }
6270                                         if (refmod & VM_MEM_MODIFIED) {
6271                                                 SET_PAGE_DIRTY(m2, TRUE);
6272                                         }
6273                                         offset = m1->vmp_offset;
6274
6275                                         /*
6276                                          * completely cleans up the state
6277                                          * of the page so that it is ready
6278                                          * to be put onto the free list, or
6279                                          * for this purpose it looks like it
6280                                          * just came off of the free list
6281                                          */
6282                                         vm_page_free_prepare(m1);
6283
6284                                         /*
6285                                          * now put the substitute page
6286                                          * on the object
6287                                          */
6288                                         vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
6289
6290                                         if (m2->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6291                                                 m2->vmp_pmapped = TRUE;
6292                                                 m2->vmp_wpmapped = TRUE;
6293
6294                                                 PMAP_ENTER(kernel_pmap, m2->vmp_offset, m2,
6295                                                     VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
6296
6297                                                 assert(kr == KERN_SUCCESS);
6298
6299                                                 compressed_pages++;
6300                                         } else {
6301                                                 if (m2->vmp_reference) {
6302                                                         vm_page_activate(m2);
6303                                                 } else {
6304                                                         vm_page_deactivate(m2);
6305                                                 }
6306                                         }
6307                                         PAGE_WAKEUP_DONE(m2);
6308                                 } else {
6309                                         assert(m1->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6310
6311                                         /*
6312                                          * completely cleans up the state
6313                                          * of the page so that it is ready
6314                                          * to be put onto the free list, or
6315                                          * for this purpose it looks like it
6316                                          * just came off of the free list
6317                                          */
6318                                         vm_page_free_prepare(m1);
6319                                 }
6320
6321                                 stolen_pages++;
6322                         }
6323 #if CONFIG_BACKGROUND_QUEUE
6324                         vm_page_assign_background_state(m1);
6325 #endif
6326                         VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
6327                         m1->vmp_snext = m;
6328                         m = m1;
6329                 }
6330                 if (locked_object) {
6331                         vm_object_unlock(locked_object);
6332                         locked_object = VM_OBJECT_NULL;
6333                 }
6334
6335                 if (abort_run == TRUE) {
6336                         /*
6337                          * want the index of the last
6338                          * page in this run that was
6339                          * successfully 'stolen', so back
6340                          * it up 1 for the auto-decrement on use
6341                          * and 1 more to bump back over this page
6342                          */
6343                         page_idx = tmp_start_idx + 2;
6344                         if (page_idx >= vm_pages_count) {
6345                                 if (wrapped) {
6346                                         if (m != VM_PAGE_NULL) {
6347                                                 vm_page_unlock_queues();
6348                                                 vm_page_free_list(m, FALSE);
6349                                                 vm_page_lock_queues();
6350                                                 m = VM_PAGE_NULL;
6351                                         }
6352                                         dumped_run++;
6353                                         goto done_scanning;
6354                                 }
6355                                 page_idx = last_idx = 0;
6356                                 wrapped = TRUE;
6357                         }
6358                         abort_run = FALSE;
6359
6360                         /*
6361                          * We didn't find a contiguous range but we didn't
6362                          * start from the very first page.
6363                          * Start again from the very first page.
6364                          */
6365                         RESET_STATE_OF_RUN();
6366
6367                         if (flags & KMA_LOMEM) {
6368                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
6369                         } else {
6370                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
6371                         }
6372
6373                         last_idx = page_idx;
6374
6375                         if (m != VM_PAGE_NULL) {
6376                                 vm_page_unlock_queues();
6377                                 vm_page_free_list(m, FALSE);
6378                                 vm_page_lock_queues();
6379                                 m = VM_PAGE_NULL;
6380                         }
6381                         dumped_run++;
6382
6383                         lck_mtx_lock(&vm_page_queue_free_lock);
6384                         /*
6385                          * reset our free page limit since we
6386                          * dropped the lock protecting the vm_page_free_queue
6387                          */
6388                         free_available = vm_page_free_count - vm_page_free_reserved;
6389                         goto retry;
6390                 }
6391
6392                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
6393                         assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
6394                         assert(m1->vmp_wire_count == 0);
6395
6396                         if (wire == TRUE) {
6397                                 m1->vmp_wire_count++;
6398                                 m1->vmp_q_state = VM_PAGE_IS_WIRED;
6399                         } else {
6400                                 m1->vmp_gobbled = TRUE;
6401                         }
6402                 }
6403                 if (wire == FALSE) {
6404                         vm_page_gobble_count += npages;
6405                 }
6406
6407                 /*
6408                  * gobbled pages are also counted as wired pages
6409                  */
6410                 vm_page_wire_count += npages;
6411
6412                 assert(vm_page_verify_contiguous(m, npages));
6413         }
6414 done_scanning:
6415         PAGE_REPLACEMENT_ALLOWED(FALSE);
6416
6417         vm_page_unlock_queues();
6418
6419 #if DEBUG
6420         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
6421
6422         tv_end_sec -= tv_start_sec;
6423         if (tv_end_usec < tv_start_usec) {
6424                 tv_end_sec--;
6425                 tv_end_usec += 1000000;
6426         }
6427         tv_end_usec -= tv_start_usec;
6428         if (tv_end_usec >= 1000000) {
6429                 tv_end_sec++;
6430                 tv_end_sec -= 1000000;
6431         }
6432         if (vm_page_find_contig_debug) {
6433                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages\n",
6434                     __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
6435                     (long)tv_end_sec, tv_end_usec, orig_last_idx,
6436                         scanned, yielded, dumped_run, stolen_pages, compressed_pages);
6437         }
6438
6439 #endif
6440 #if MACH_ASSERT
6441         vm_page_verify_free_lists();
6442 #endif
6443         if (m == NULL && zone_gc_called == FALSE) {
6444                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
6445                     __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
6446                         scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
6447
6448                 if (consider_buffer_cache_collect != NULL) {
6449                         (void)(*consider_buffer_cache_collect)(1);
6450                 }
6451
6452                 consider_zone_gc(FALSE);
6453
6454                 zone_gc_called = TRUE;
6455
6456                 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
6457                 goto full_scan_again;
6458         }
6459
6460         return m;
6461 }
6462
6463 /*
6464  *      Allocate a list of contiguous, wired pages.
6465  */
6466 kern_return_t
6467 cpm_allocate(
6468         vm_size_t       size,
6469         vm_page_t       *list,
6470         ppnum_t         max_pnum,
6471         ppnum_t         pnum_mask,
6472         boolean_t       wire,
6473         int             flags)
6474 {
6475         vm_page_t               pages;
6476         unsigned int            npages;
6477
6478         if (size % PAGE_SIZE != 0) {
6479                 return KERN_INVALID_ARGUMENT;
6480         }
6481
6482         npages = (unsigned int) (size / PAGE_SIZE);
6483         if (npages != size / PAGE_SIZE) {
6484                 /* 32-bit overflow */
6485                 return KERN_INVALID_ARGUMENT;
6486         }
6487
6488         /*
6489          *      Obtain a pointer to a subset of the free
6490          *      list large enough to satisfy the request;
6491          *      the region will be physically contiguous.
6492          */
6493         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
6494
6495         if (pages == VM_PAGE_NULL) {
6496                 return KERN_NO_SPACE;
6497         }
6498         /*
6499          * determine need for wakeups
6500          */
6501         if (vm_page_free_count < vm_page_free_min) {
6502                 lck_mtx_lock(&vm_page_queue_free_lock);
6503                 if (vm_pageout_running == FALSE) {
6504                         lck_mtx_unlock(&vm_page_queue_free_lock);
6505                         thread_wakeup((event_t) &vm_page_free_wanted);
6506                 } else {
6507                         lck_mtx_unlock(&vm_page_queue_free_lock);
6508                 }
6509         }
6510
6511         VM_CHECK_MEMORYSTATUS;
6512
6513         /*
6514          *      The CPM pages should now be available and
6515          *      ordered by ascending physical address.
6516          */
6517         assert(vm_page_verify_contiguous(pages, npages));
6518
6519         *list = pages;
6520         return KERN_SUCCESS;
6521 }
6522
6523
6524 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
6525
6526 /*
6527  * when working on a 'run' of pages, it is necessary to hold
6528  * the vm_page_queue_lock (a hot global lock) for certain operations
6529  * on the page... however, the majority of the work can be done
6530  * while merely holding the object lock... in fact there are certain
6531  * collections of pages that don't require any work brokered by the
6532  * vm_page_queue_lock... to mitigate the time spent behind the global
6533  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
6534  * while doing all of the work that doesn't require the vm_page_queue_lock...
6535  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
6536  * necessary work for each page... we will grab the busy bit on the page
6537  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
6538  * if it can't immediately take the vm_page_queue_lock in order to compete
6539  * for the locks in the same order that vm_pageout_scan takes them.
6540  * the operation names are modeled after the names of the routines that
6541  * need to be called in order to make the changes very obvious in the
6542  * original loop
6543  */
6544
6545 void
6546 vm_page_do_delayed_work(
6547         vm_object_t     object,
6548         vm_tag_t        tag,
6549         struct vm_page_delayed_work *dwp,
6550         int             dw_count)
6551 {
6552         int             j;
6553         vm_page_t       m;
6554         vm_page_t       local_free_q = VM_PAGE_NULL;
6555
6556         /*
6557          * pageout_scan takes the vm_page_lock_queues first
6558          * then tries for the object lock... to avoid what
6559          * is effectively a lock inversion, we'll go to the
6560          * trouble of taking them in that same order... otherwise
6561          * if this object contains the majority of the pages resident
6562          * in the UBC (or a small set of large objects actively being
6563          * worked on contain the majority of the pages), we could
6564          * cause the pageout_scan thread to 'starve' in its attempt
6565          * to find pages to move to the free queue, since it has to
6566          * successfully acquire the object lock of any candidate page
6567          * before it can steal/clean it.
6568          */
6569         if (!vm_page_trylockspin_queues()) {
6570                 vm_object_unlock(object);
6571
6572                 /*
6573                  * "Turnstile enabled vm_pageout_scan" can be runnable
6574                  * for a very long time without getting on a core.
6575                  * If this is a higher priority thread it could be
6576                  * waiting here for a very long time respecting the fact
6577                  * that pageout_scan would like its object after VPS does
6578                  * a mutex_pause(0).
6579                  * So we cap the number of yields in the vm_object_lock_avoid()
6580                  * case to a single mutex_pause(0) which will give vm_pageout_scan
6581                  * 10us to run and grab the object if needed.
6582                  */
6583                 vm_page_lockspin_queues();
6584
6585                 for (j = 0;; j++) {
6586                         if ((!vm_object_lock_avoid(object) ||
6587                             (vps_dynamic_priority_enabled && (j > 0))) &&
6588                             _vm_object_lock_try(object)) {
6589                                 break;
6590                         }
6591                         vm_page_unlock_queues();
6592                         mutex_pause(j);
6593                         vm_page_lockspin_queues();
6594                 }
6595         }
6596         for (j = 0; j < dw_count; j++, dwp++) {
6597                 m = dwp->dw_m;
6598
6599                 if (dwp->dw_mask & DW_vm_pageout_throttle_up) {
6600                         vm_pageout_throttle_up(m);
6601                 }
6602 #if CONFIG_PHANTOM_CACHE
6603                 if (dwp->dw_mask & DW_vm_phantom_cache_update) {
6604                         vm_phantom_cache_update(m);
6605                 }
6606 #endif
6607                 if (dwp->dw_mask & DW_vm_page_wire) {
6608                         vm_page_wire(m, tag, FALSE);
6609                 } else if (dwp->dw_mask & DW_vm_page_unwire) {
6610                         boolean_t       queueit;
6611
6612                         queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
6613
6614                         vm_page_unwire(m, queueit);
6615                 }
6616                 if (dwp->dw_mask & DW_vm_page_free) {
6617                         vm_page_free_prepare_queues(m);
6618
6619                         assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6620                         /*
6621                          * Add this page to our list of reclaimed pages,
6622                          * to be freed later.
6623                          */
6624                         m->vmp_snext = local_free_q;
6625                         local_free_q = m;
6626                 } else {
6627                         if (dwp->dw_mask & DW_vm_page_deactivate_internal) {
6628                                 vm_page_deactivate_internal(m, FALSE);
6629                         } else if (dwp->dw_mask & DW_vm_page_activate) {
6630                                 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
6631                                         vm_page_activate(m);
6632                                 }
6633                         } else if (dwp->dw_mask & DW_vm_page_speculate) {
6634                                 vm_page_speculate(m, TRUE);
6635                         } else if (dwp->dw_mask & DW_enqueue_cleaned) {
6636                                 /*
6637                                  * if we didn't hold the object lock and did this,
6638                                  * we might disconnect the page, then someone might
6639                                  * soft fault it back in, then we would put it on the
6640                                  * cleaned queue, and so we would have a referenced (maybe even dirty)
6641                                  * page on that queue, which we don't want
6642                                  */
6643                                 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6644
6645                                 if ((refmod_state & VM_MEM_REFERENCED)) {
6646                                         /*
6647                                          * this page has been touched since it got cleaned; let's activate it
6648                                          * if it hasn't already been
6649                                          */
6650                                         VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
6651                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
6652
6653                                         if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
6654                                                 vm_page_activate(m);
6655                                         }
6656                                 } else {
6657                                         m->vmp_reference = FALSE;
6658                                         vm_page_enqueue_cleaned(m);
6659                                 }
6660                         } else if (dwp->dw_mask & DW_vm_page_lru) {
6661                                 vm_page_lru(m);
6662                         } else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
6663                                 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6664                                         vm_page_queues_remove(m, TRUE);
6665                                 }
6666                         }
6667                         if (dwp->dw_mask & DW_set_reference) {
6668                                 m->vmp_reference = TRUE;
6669                         } else if (dwp->dw_mask & DW_clear_reference) {
6670                                 m->vmp_reference = FALSE;
6671                         }
6672
6673                         if (dwp->dw_mask & DW_move_page) {
6674                                 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6675                                         vm_page_queues_remove(m, FALSE);
6676
6677                                         assert(VM_PAGE_OBJECT(m) != kernel_object);
6678
6679                                         vm_page_enqueue_inactive(m, FALSE);
6680                                 }
6681                         }
6682                         if (dwp->dw_mask & DW_clear_busy) {
6683                                 m->vmp_busy = FALSE;
6684                         }
6685
6686                         if (dwp->dw_mask & DW_PAGE_WAKEUP) {
6687                                 PAGE_WAKEUP(m);
6688                         }
6689                 }
6690         }
6691         vm_page_unlock_queues();
6692
6693         if (local_free_q) {
6694                 vm_page_free_list(local_free_q, TRUE);
6695         }
6696
6697         VM_CHECK_MEMORYSTATUS;
6698 }
6699
6700 kern_return_t
6701 vm_page_alloc_list(
6702         int     page_count,
6703         int     flags,
6704         vm_page_t *list)
6705 {
6706         vm_page_t       lo_page_list = VM_PAGE_NULL;
6707         vm_page_t       mem;
6708         int             i;
6709
6710         if (!(flags & KMA_LOMEM)) {
6711                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6712         }
6713
6714         for (i = 0; i < page_count; i++) {
6715                 mem = vm_page_grablo();
6716
6717                 if (mem == VM_PAGE_NULL) {
6718                         if (lo_page_list) {
6719                                 vm_page_free_list(lo_page_list, FALSE);
6720                         }
6721
6722                         *list = VM_PAGE_NULL;
6723
6724                         return KERN_RESOURCE_SHORTAGE;
6725                 }
6726                 mem->vmp_snext = lo_page_list;
6727                 lo_page_list = mem;
6728         }
6729         *list = lo_page_list;
6730
6731         return KERN_SUCCESS;
6732 }
6733
6734 void
6735 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6736 {
6737         page->vmp_offset = offset;
6738 }
6739
6740 vm_page_t
6741 vm_page_get_next(vm_page_t page)
6742 {
6743         return page->vmp_snext;
6744 }
6745
6746 vm_object_offset_t
6747 vm_page_get_offset(vm_page_t page)
6748 {
6749         return page->vmp_offset;
6750 }
6751
6752 ppnum_t
6753 vm_page_get_phys_page(vm_page_t page)
6754 {
6755         return VM_PAGE_GET_PHYS_PAGE(page);
6756 }
6757
6758
6759 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6760
6761 #if HIBERNATION
6762
6763 static vm_page_t hibernate_gobble_queue;
6764
6765 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
6766 static int  hibernate_flush_dirty_pages(int);
6767 static int  hibernate_flush_queue(vm_page_queue_head_t *, int);
6768
6769 void hibernate_flush_wait(void);
6770 void hibernate_mark_in_progress(void);
6771 void hibernate_clear_in_progress(void);
6772
6773 void            hibernate_free_range(int, int);
6774 void            hibernate_hash_insert_page(vm_page_t);
6775 uint32_t        hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
6776 void            hibernate_rebuild_vm_structs(void);
6777 uint32_t        hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6778 ppnum_t         hibernate_lookup_paddr(unsigned int);
6779
6780 struct hibernate_statistics {
6781         int hibernate_considered;
6782         int hibernate_reentered_on_q;
6783         int hibernate_found_dirty;
6784         int hibernate_skipped_cleaning;
6785         int hibernate_skipped_transient;
6786         int hibernate_skipped_precious;
6787         int hibernate_skipped_external;
6788         int hibernate_queue_nolock;
6789         int hibernate_queue_paused;
6790         int hibernate_throttled;
6791         int hibernate_throttle_timeout;
6792         int hibernate_drained;
6793         int hibernate_drain_timeout;
6794         int cd_lock_failed;
6795         int cd_found_precious;
6796         int cd_found_wired;
6797         int cd_found_busy;
6798         int cd_found_unusual;
6799         int cd_found_cleaning;
6800         int cd_found_laundry;
6801         int cd_found_dirty;
6802         int cd_found_xpmapped;
6803         int cd_skipped_xpmapped;
6804         int cd_local_free;
6805         int cd_total_free;
6806         int cd_vm_page_wire_count;
6807         int cd_vm_struct_pages_unneeded;
6808         int cd_pages;
6809         int cd_discarded;
6810         int cd_count_wire;
6811 } hibernate_stats;
6812
6813
6814 /*
6815  * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6816  * so that we don't overrun the estimated image size, which would
6817  * result in a hibernation failure.
6818  */
6819 #define HIBERNATE_XPMAPPED_LIMIT        40000
6820
6821
6822 static int
6823 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6824 {
6825         wait_result_t   wait_result;
6826
6827         vm_page_lock_queues();
6828
6829         while (!vm_page_queue_empty(&q->pgo_pending)) {
6830                 q->pgo_draining = TRUE;
6831
6832                 assert_wait_timeout((event_t) (&q->pgo_laundry + 1), THREAD_INTERRUPTIBLE, 5000, 1000 * NSEC_PER_USEC);
6833
6834                 vm_page_unlock_queues();
6835
6836                 wait_result = thread_block(THREAD_CONTINUE_NULL);
6837
6838                 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
6839                         hibernate_stats.hibernate_drain_timeout++;
6840
6841                         if (q == &vm_pageout_queue_external) {
6842                                 return 0;
6843                         }
6844
6845                         return 1;
6846                 }
6847                 vm_page_lock_queues();
6848
6849                 hibernate_stats.hibernate_drained++;
6850         }
6851         vm_page_unlock_queues();
6852
6853         return 0;
6854 }
6855
6856
6857 boolean_t hibernate_skip_external = FALSE;
6858
6859 static int
6860 hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
6861 {
6862         vm_page_t       m;
6863         vm_object_t     l_object = NULL;
6864         vm_object_t     m_object = NULL;
6865         int             refmod_state = 0;
6866         int             try_failed_count = 0;
6867         int             retval = 0;
6868         int             current_run = 0;
6869         struct  vm_pageout_queue *iq;
6870         struct  vm_pageout_queue *eq;
6871         struct  vm_pageout_queue *tq;
6872
6873         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
6874             VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
6875
6876         iq = &vm_pageout_queue_internal;
6877         eq = &vm_pageout_queue_external;
6878
6879         vm_page_lock_queues();
6880
6881         while (qcount && !vm_page_queue_empty(q)) {
6882                 if (current_run++ == 1000) {
6883                         if (hibernate_should_abort()) {
6884                                 retval = 1;
6885                                 break;
6886                         }
6887                         current_run = 0;
6888                 }
6889
6890                 m = (vm_page_t) vm_page_queue_first(q);
6891                 m_object = VM_PAGE_OBJECT(m);
6892
6893                 /*
6894                  * check to see if we currently are working
6895                  * with the same object... if so, we've
6896                  * already got the lock
6897                  */
6898                 if (m_object != l_object) {
6899                         /*
6900                          * the object associated with candidate page is
6901                          * different from the one we were just working
6902                          * with... dump the lock if we still own it
6903                          */
6904                         if (l_object != NULL) {
6905                                 vm_object_unlock(l_object);
6906                                 l_object = NULL;
6907                         }
6908                         /*
6909                          * Try to lock object; since we've alread got the
6910                          * page queues lock, we can only 'try' for this one.
6911                          * if the 'try' fails, we need to do a mutex_pause
6912                          * to allow the owner of the object lock a chance to
6913                          * run...
6914                          */
6915                         if (!vm_object_lock_try_scan(m_object)) {
6916                                 if (try_failed_count > 20) {
6917                                         hibernate_stats.hibernate_queue_nolock++;
6918
6919                                         goto reenter_pg_on_q;
6920                                 }
6921
6922                                 vm_page_unlock_queues();
6923                                 mutex_pause(try_failed_count++);
6924                                 vm_page_lock_queues();
6925
6926                                 hibernate_stats.hibernate_queue_paused++;
6927                                 continue;
6928                         } else {
6929                                 l_object = m_object;
6930                         }
6931                 }
6932                 if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error) {
6933                         /*
6934                          * page is not to be cleaned
6935                          * put it back on the head of its queue
6936                          */
6937                         if (m->vmp_cleaning) {
6938                                 hibernate_stats.hibernate_skipped_cleaning++;
6939                         } else {
6940                                 hibernate_stats.hibernate_skipped_transient++;
6941                         }
6942
6943                         goto reenter_pg_on_q;
6944                 }
6945                 if (m_object->copy == VM_OBJECT_NULL) {
6946                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6947                                 /*
6948                                  * let the normal hibernate image path
6949                                  * deal with these
6950                                  */
6951                                 goto reenter_pg_on_q;
6952                         }
6953                 }
6954                 if (!m->vmp_dirty && m->vmp_pmapped) {
6955                         refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6956
6957                         if ((refmod_state & VM_MEM_MODIFIED)) {
6958                                 SET_PAGE_DIRTY(m, FALSE);
6959                         }
6960                 } else {
6961                         refmod_state = 0;
6962                 }
6963
6964                 if (!m->vmp_dirty) {
6965                         /*
6966                          * page is not to be cleaned
6967                          * put it back on the head of its queue
6968                          */
6969                         if (m->vmp_precious) {
6970                                 hibernate_stats.hibernate_skipped_precious++;
6971                         }
6972
6973                         goto reenter_pg_on_q;
6974                 }
6975
6976                 if (hibernate_skip_external == TRUE && !m_object->internal) {
6977                         hibernate_stats.hibernate_skipped_external++;
6978
6979                         goto reenter_pg_on_q;
6980                 }
6981                 tq = NULL;
6982
6983                 if (m_object->internal) {
6984                         if (VM_PAGE_Q_THROTTLED(iq)) {
6985                                 tq = iq;
6986                         }
6987                 } else if (VM_PAGE_Q_THROTTLED(eq)) {
6988                         tq = eq;
6989                 }
6990
6991                 if (tq != NULL) {
6992                         wait_result_t   wait_result;
6993                         int             wait_count = 5;
6994
6995                         if (l_object != NULL) {
6996                                 vm_object_unlock(l_object);
6997                                 l_object = NULL;
6998                         }
6999
7000                         while (retval == 0) {
7001                                 tq->pgo_throttled = TRUE;
7002
7003                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
7004
7005                                 vm_page_unlock_queues();
7006
7007                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
7008
7009                                 vm_page_lock_queues();
7010
7011                                 if (wait_result != THREAD_TIMED_OUT) {
7012                                         break;
7013                                 }
7014                                 if (!VM_PAGE_Q_THROTTLED(tq)) {
7015                                         break;
7016                                 }
7017
7018                                 if (hibernate_should_abort()) {
7019                                         retval = 1;
7020                                 }
7021
7022                                 if (--wait_count == 0) {
7023                                         hibernate_stats.hibernate_throttle_timeout++;
7024
7025                                         if (tq == eq) {
7026                                                 hibernate_skip_external = TRUE;
7027                                                 break;
7028                                         }
7029                                         retval = 1;
7030                                 }
7031                         }
7032                         if (retval) {
7033                                 break;
7034                         }
7035
7036                         hibernate_stats.hibernate_throttled++;
7037
7038                         continue;
7039                 }
7040                 /*
7041                  * we've already factored out pages in the laundry which
7042                  * means this page can't be on the pageout queue so it's
7043                  * safe to do the vm_page_queues_remove
7044                  */
7045                 vm_page_queues_remove(m, TRUE);
7046
7047                 if (m_object->internal == TRUE) {
7048                         pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
7049                 }
7050
7051                 vm_pageout_cluster(m);
7052
7053                 hibernate_stats.hibernate_found_dirty++;
7054
7055                 goto next_pg;
7056
7057 reenter_pg_on_q:
7058                 vm_page_queue_remove(q, m, vmp_pageq);
7059                 vm_page_queue_enter(q, m, vmp_pageq);
7060
7061                 hibernate_stats.hibernate_reentered_on_q++;
7062 next_pg:
7063                 hibernate_stats.hibernate_considered++;
7064
7065                 qcount--;
7066                 try_failed_count = 0;
7067         }
7068         if (l_object != NULL) {
7069                 vm_object_unlock(l_object);
7070                 l_object = NULL;
7071         }
7072
7073         vm_page_unlock_queues();
7074
7075         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
7076
7077         return retval;
7078 }
7079
7080
7081 static int
7082 hibernate_flush_dirty_pages(int pass)
7083 {
7084         struct vm_speculative_age_q     *aq;
7085         uint32_t        i;
7086
7087         if (vm_page_local_q) {
7088                 for (i = 0; i < vm_page_local_q_count; i++) {
7089                         vm_page_reactivate_local(i, TRUE, FALSE);
7090                 }
7091         }
7092
7093         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
7094                 int             qcount;
7095                 vm_page_t       m;
7096
7097                 aq = &vm_page_queue_speculative[i];
7098
7099                 if (vm_page_queue_empty(&aq->age_q)) {
7100                         continue;
7101                 }
7102                 qcount = 0;
7103
7104                 vm_page_lockspin_queues();
7105
7106                 vm_page_queue_iterate(&aq->age_q, m, vmp_pageq) {
7107                         qcount++;
7108                 }
7109                 vm_page_unlock_queues();
7110
7111                 if (qcount) {
7112                         if (hibernate_flush_queue(&aq->age_q, qcount)) {
7113                                 return 1;
7114                         }
7115                 }
7116         }
7117         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count)) {
7118                 return 1;
7119         }
7120         /* XXX FBDP TODO: flush secluded queue */
7121         if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count)) {
7122                 return 1;
7123         }
7124         if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count)) {
7125                 return 1;
7126         }
7127         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
7128                 return 1;
7129         }
7130
7131         if (pass == 1) {
7132                 vm_compressor_record_warmup_start();
7133         }
7134
7135         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
7136                 if (pass == 1) {
7137                         vm_compressor_record_warmup_end();
7138                 }
7139                 return 1;
7140         }
7141         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
7142                 if (pass == 1) {
7143                         vm_compressor_record_warmup_end();
7144                 }
7145                 return 1;
7146         }
7147         if (pass == 1) {
7148                 vm_compressor_record_warmup_end();
7149         }
7150
7151         if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external)) {
7152                 return 1;
7153         }
7154
7155         return 0;
7156 }
7157
7158
7159 void
7160 hibernate_reset_stats()
7161 {
7162         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
7163 }
7164
7165
7166 int
7167 hibernate_flush_memory()
7168 {
7169         int     retval;
7170
7171         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
7172
7173         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
7174
7175         hibernate_cleaning_in_progress = TRUE;
7176         hibernate_skip_external = FALSE;
7177
7178         if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
7179                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
7180
7181                 vm_compressor_flush();
7182
7183                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
7184
7185                 if (consider_buffer_cache_collect != NULL) {
7186                         unsigned int orig_wire_count;
7187
7188                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
7189                         orig_wire_count = vm_page_wire_count;
7190
7191                         (void)(*consider_buffer_cache_collect)(1);
7192                         consider_zone_gc(FALSE);
7193
7194                         HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
7195
7196                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
7197                 }
7198         }
7199         hibernate_cleaning_in_progress = FALSE;
7200
7201         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
7202
7203         if (retval) {
7204                 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
7205         }
7206
7207
7208         HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
7209             hibernate_stats.hibernate_considered,
7210             hibernate_stats.hibernate_reentered_on_q,
7211             hibernate_stats.hibernate_found_dirty);
7212         HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
7213             hibernate_stats.hibernate_skipped_cleaning,
7214             hibernate_stats.hibernate_skipped_transient,
7215             hibernate_stats.hibernate_skipped_precious,
7216             hibernate_stats.hibernate_skipped_external,
7217             hibernate_stats.hibernate_queue_nolock);
7218         HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
7219             hibernate_stats.hibernate_queue_paused,
7220             hibernate_stats.hibernate_throttled,
7221             hibernate_stats.hibernate_throttle_timeout,
7222             hibernate_stats.hibernate_drained,
7223             hibernate_stats.hibernate_drain_timeout);
7224
7225         return retval;
7226 }
7227
7228
7229 static void
7230 hibernate_page_list_zero(hibernate_page_list_t *list)
7231 {
7232         uint32_t             bank;
7233         hibernate_bitmap_t * bitmap;
7234
7235         bitmap = &list->bank_bitmap[0];
7236         for (bank = 0; bank < list->bank_count; bank++) {
7237                 uint32_t last_bit;
7238
7239                 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
7240                 // set out-of-bound bits at end of bitmap.
7241                 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
7242                 if (last_bit) {
7243                         bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
7244                 }
7245
7246                 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
7247         }
7248 }
7249
7250 void
7251 hibernate_free_gobble_pages(void)
7252 {
7253         vm_page_t m, next;
7254         uint32_t  count = 0;
7255
7256         m = (vm_page_t) hibernate_gobble_queue;
7257         while (m) {
7258                 next = m->vmp_snext;
7259                 vm_page_free(m);
7260                 count++;
7261                 m = next;
7262         }
7263         hibernate_gobble_queue = VM_PAGE_NULL;
7264
7265         if (count) {
7266                 HIBLOG("Freed %d pages\n", count);
7267         }
7268 }
7269
7270 static boolean_t
7271 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
7272 {
7273         vm_object_t object = NULL;
7274         int                  refmod_state;
7275         boolean_t            discard = FALSE;
7276
7277         do{
7278                 if (m->vmp_private) {
7279                         panic("hibernate_consider_discard: private");
7280                 }
7281
7282                 object = VM_PAGE_OBJECT(m);
7283
7284                 if (!vm_object_lock_try(object)) {
7285                         object = NULL;
7286                         if (!preflight) {
7287                                 hibernate_stats.cd_lock_failed++;
7288                         }
7289                         break;
7290                 }
7291                 if (VM_PAGE_WIRED(m)) {
7292                         if (!preflight) {
7293                                 hibernate_stats.cd_found_wired++;
7294                         }
7295                         break;
7296                 }
7297                 if (m->vmp_precious) {
7298                         if (!preflight) {
7299                                 hibernate_stats.cd_found_precious++;
7300                         }
7301                         break;
7302                 }
7303                 if (m->vmp_busy || !object->alive) {
7304                         /*
7305                          *      Somebody is playing with this page.
7306                          */
7307                         if (!preflight) {
7308                                 hibernate_stats.cd_found_busy++;
7309                         }
7310                         break;
7311                 }
7312                 if (m->vmp_absent || m->vmp_unusual || m->vmp_error) {
7313                         /*
7314                          * If it's unusual in anyway, ignore it
7315                          */
7316                         if (!preflight) {
7317                                 hibernate_stats.cd_found_unusual++;
7318                         }
7319                         break;
7320                 }
7321                 if (m->vmp_cleaning) {
7322                         if (!preflight) {
7323                                 hibernate_stats.cd_found_cleaning++;
7324                         }
7325                         break;
7326                 }
7327                 if (m->vmp_laundry) {
7328                         if (!preflight) {
7329                                 hibernate_stats.cd_found_laundry++;
7330                         }
7331                         break;
7332                 }
7333                 if (!m->vmp_dirty) {
7334                         refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
7335
7336                         if (refmod_state & VM_MEM_REFERENCED) {
7337                                 m->vmp_reference = TRUE;
7338                         }
7339                         if (refmod_state & VM_MEM_MODIFIED) {
7340                                 SET_PAGE_DIRTY(m, FALSE);
7341                         }
7342                 }
7343
7344                 /*
7345                  * If it's clean or purgeable we can discard the page on wakeup.
7346                  */
7347                 discard = (!m->vmp_dirty)
7348                     || (VM_PURGABLE_VOLATILE == object->purgable)
7349                     || (VM_PURGABLE_EMPTY == object->purgable);
7350
7351
7352                 if (discard == FALSE) {
7353                         if (!preflight) {
7354                                 hibernate_stats.cd_found_dirty++;
7355                         }
7356                 } else if (m->vmp_xpmapped && m->vmp_reference && !object->internal) {
7357                         if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
7358                                 if (!preflight) {
7359                                         hibernate_stats.cd_found_xpmapped++;
7360                                 }
7361                                 discard = FALSE;
7362                         } else {
7363                                 if (!preflight) {
7364                                         hibernate_stats.cd_skipped_xpmapped++;
7365                                 }
7366                         }
7367                 }
7368         }while (FALSE);
7369
7370         if (object) {
7371                 vm_object_unlock(object);
7372         }
7373
7374         return discard;
7375 }
7376
7377
7378 static void
7379 hibernate_discard_page(vm_page_t m)
7380 {
7381         vm_object_t m_object;
7382
7383         if (m->vmp_absent || m->vmp_unusual || m->vmp_error) {
7384                 /*
7385                  * If it's unusual in anyway, ignore
7386                  */
7387                 return;
7388         }
7389
7390         m_object = VM_PAGE_OBJECT(m);
7391
7392 #if MACH_ASSERT || DEBUG
7393         if (!vm_object_lock_try(m_object)) {
7394                 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
7395         }
7396 #else
7397         /* No need to lock page queue for token delete, hibernate_vm_unlock()
7398          *  makes sure these locks are uncontended before sleep */
7399 #endif /* MACH_ASSERT || DEBUG */
7400
7401         if (m->vmp_pmapped == TRUE) {
7402                 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7403         }
7404
7405         if (m->vmp_laundry) {
7406                 panic("hibernate_discard_page(%p) laundry", m);
7407         }
7408         if (m->vmp_private) {
7409                 panic("hibernate_discard_page(%p) private", m);
7410         }
7411         if (m->vmp_fictitious) {
7412                 panic("hibernate_discard_page(%p) fictitious", m);
7413         }
7414
7415         if (VM_PURGABLE_VOLATILE == m_object->purgable) {
7416                 /* object should be on a queue */
7417                 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
7418                 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
7419                 assert(old_queue);
7420                 if (m_object->purgeable_when_ripe) {
7421                         vm_purgeable_token_delete_first(old_queue);
7422                 }
7423                 vm_object_lock_assert_exclusive(m_object);
7424                 m_object->purgable = VM_PURGABLE_EMPTY;
7425
7426                 /*
7427                  * Purgeable ledgers:  pages of VOLATILE and EMPTY objects are
7428                  * accounted in the "volatile" ledger, so no change here.
7429                  * We have to update vm_page_purgeable_count, though, since we're
7430                  * effectively purging this object.
7431                  */
7432                 unsigned int delta;
7433                 assert(m_object->resident_page_count >= m_object->wired_page_count);
7434                 delta = (m_object->resident_page_count - m_object->wired_page_count);
7435                 assert(vm_page_purgeable_count >= delta);
7436                 assert(delta > 0);
7437                 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
7438         }
7439
7440         vm_page_free(m);
7441
7442 #if MACH_ASSERT || DEBUG
7443         vm_object_unlock(m_object);
7444 #endif  /* MACH_ASSERT || DEBUG */
7445 }
7446
7447 /*
7448  *  Grab locks for hibernate_page_list_setall()
7449  */
7450 void
7451 hibernate_vm_lock_queues(void)
7452 {
7453         vm_object_lock(compressor_object);
7454         vm_page_lock_queues();
7455         lck_mtx_lock(&vm_page_queue_free_lock);
7456         lck_mtx_lock(&vm_purgeable_queue_lock);
7457
7458         if (vm_page_local_q) {
7459                 uint32_t  i;
7460                 for (i = 0; i < vm_page_local_q_count; i++) {
7461                         struct vpl  *lq;
7462                         lq = &vm_page_local_q[i].vpl_un.vpl;
7463                         VPL_LOCK(&lq->vpl_lock);
7464                 }
7465         }
7466 }
7467
7468 void
7469 hibernate_vm_unlock_queues(void)
7470 {
7471         if (vm_page_local_q) {
7472                 uint32_t  i;
7473                 for (i = 0; i < vm_page_local_q_count; i++) {
7474                         struct vpl  *lq;
7475                         lq = &vm_page_local_q[i].vpl_un.vpl;
7476                         VPL_UNLOCK(&lq->vpl_lock);
7477                 }
7478         }
7479         lck_mtx_unlock(&vm_purgeable_queue_lock);
7480         lck_mtx_unlock(&vm_page_queue_free_lock);
7481         vm_page_unlock_queues();
7482         vm_object_unlock(compressor_object);
7483 }
7484
7485 /*
7486  *  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
7487  *  pages known to VM to not need saving are subtracted.
7488  *  Wired pages to be saved are present in page_list_wired, pageable in page_list.
7489  */
7490
7491 void
7492 hibernate_page_list_setall(hibernate_page_list_t * page_list,
7493     hibernate_page_list_t * page_list_wired,
7494     hibernate_page_list_t * page_list_pal,
7495     boolean_t preflight,
7496     boolean_t will_discard,
7497     uint32_t * pagesOut)
7498 {
7499         uint64_t start, end, nsec;
7500         vm_page_t m;
7501         vm_page_t next;
7502         uint32_t pages = page_list->page_count;
7503         uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
7504         uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
7505         uint32_t count_wire = pages;
7506         uint32_t count_discard_active    = 0;
7507         uint32_t count_discard_inactive  = 0;
7508         uint32_t count_discard_cleaned   = 0;
7509         uint32_t count_discard_purgeable = 0;
7510         uint32_t count_discard_speculative = 0;
7511         uint32_t count_discard_vm_struct_pages = 0;
7512         uint32_t i;
7513         uint32_t             bank;
7514         hibernate_bitmap_t * bitmap;
7515         hibernate_bitmap_t * bitmap_wired;
7516         boolean_t                    discard_all;
7517         boolean_t            discard;
7518
7519         HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
7520
7521         if (preflight) {
7522                 page_list       = NULL;
7523                 page_list_wired = NULL;
7524                 page_list_pal   = NULL;
7525                 discard_all     = FALSE;
7526         } else {
7527                 discard_all     = will_discard;
7528         }
7529
7530 #if MACH_ASSERT || DEBUG
7531         if (!preflight) {
7532                 assert(hibernate_vm_locks_are_safe());
7533                 vm_page_lock_queues();
7534                 if (vm_page_local_q) {
7535                         for (i = 0; i < vm_page_local_q_count; i++) {
7536                                 struct vpl      *lq;
7537                                 lq = &vm_page_local_q[i].vpl_un.vpl;
7538                                 VPL_LOCK(&lq->vpl_lock);
7539                         }
7540                 }
7541         }
7542 #endif  /* MACH_ASSERT || DEBUG */
7543
7544
7545         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
7546
7547         clock_get_uptime(&start);
7548
7549         if (!preflight) {
7550                 hibernate_page_list_zero(page_list);
7551                 hibernate_page_list_zero(page_list_wired);
7552                 hibernate_page_list_zero(page_list_pal);
7553
7554                 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
7555                 hibernate_stats.cd_pages = pages;
7556         }
7557
7558         if (vm_page_local_q) {
7559                 for (i = 0; i < vm_page_local_q_count; i++) {
7560                         vm_page_reactivate_local(i, TRUE, !preflight);
7561                 }
7562         }
7563
7564         if (preflight) {
7565                 vm_object_lock(compressor_object);
7566                 vm_page_lock_queues();
7567                 lck_mtx_lock(&vm_page_queue_free_lock);
7568         }
7569
7570         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7571
7572         hibernation_vmqueues_inspection = TRUE;
7573
7574         m = (vm_page_t) hibernate_gobble_queue;
7575         while (m) {
7576                 pages--;
7577                 count_wire--;
7578                 if (!preflight) {
7579                         hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7580                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7581                 }
7582                 m = m->vmp_snext;
7583         }
7584
7585         if (!preflight) {
7586                 for (i = 0; i < real_ncpus; i++) {
7587                         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor) {
7588                                 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->vmp_snext) {
7589                                         assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
7590
7591                                         pages--;
7592                                         count_wire--;
7593                                         hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7594                                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7595
7596                                         hibernate_stats.cd_local_free++;
7597                                         hibernate_stats.cd_total_free++;
7598                                 }
7599                         }
7600                 }
7601         }
7602
7603         for (i = 0; i < vm_colors; i++) {
7604                 vm_page_queue_iterate(&vm_page_queue_free[i].qhead, m, vmp_pageq) {
7605                         assert(m->vmp_q_state == VM_PAGE_ON_FREE_Q);
7606
7607                         pages--;
7608                         count_wire--;
7609                         if (!preflight) {
7610                                 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7611                                 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7612
7613                                 hibernate_stats.cd_total_free++;
7614                         }
7615                 }
7616         }
7617
7618         vm_page_queue_iterate(&vm_lopage_queue_free, m, vmp_pageq) {
7619                 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
7620
7621                 pages--;
7622                 count_wire--;
7623                 if (!preflight) {
7624                         hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7625                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7626
7627                         hibernate_stats.cd_total_free++;
7628                 }
7629         }
7630
7631         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
7632         while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m)) {
7633                 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
7634
7635                 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7636                 discard = FALSE;
7637                 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7638                     && hibernate_consider_discard(m, preflight)) {
7639                         if (!preflight) {
7640                                 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7641                         }
7642                         count_discard_inactive++;
7643                         discard = discard_all;
7644                 } else {
7645                         count_throttled++;
7646                 }
7647                 count_wire--;
7648                 if (!preflight) {
7649                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7650                 }
7651
7652                 if (discard) {
7653                         hibernate_discard_page(m);
7654                 }
7655                 m = next;
7656         }
7657
7658         m = (vm_page_t)vm_page_queue_first(&vm_page_queue_anonymous);
7659         while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m)) {
7660                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7661
7662                 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7663                 discard = FALSE;
7664                 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7665                     hibernate_consider_discard(m, preflight)) {
7666                         if (!preflight) {
7667                                 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7668                         }
7669                         if (m->vmp_dirty) {
7670                                 count_discard_purgeable++;
7671                         } else {
7672                                 count_discard_inactive++;
7673                         }
7674                         discard = discard_all;
7675                 } else {
7676                         count_anonymous++;
7677                 }
7678                 count_wire--;
7679                 if (!preflight) {
7680                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7681                 }
7682                 if (discard) {
7683                         hibernate_discard_page(m);
7684                 }
7685                 m = next;
7686         }
7687
7688         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7689         while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m)) {
7690                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7691
7692                 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7693                 discard = FALSE;
7694                 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7695                     hibernate_consider_discard(m, preflight)) {
7696                         if (!preflight) {
7697                                 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7698                         }
7699                         if (m->vmp_dirty) {
7700                                 count_discard_purgeable++;
7701                         } else {
7702                                 count_discard_cleaned++;
7703                         }
7704                         discard = discard_all;
7705                 } else {
7706                         count_cleaned++;
7707                 }
7708                 count_wire--;
7709                 if (!preflight) {
7710                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7711                 }
7712                 if (discard) {
7713                         hibernate_discard_page(m);
7714                 }
7715                 m = next;
7716         }
7717
7718         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7719         while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m)) {
7720                 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7721
7722                 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7723                 discard = FALSE;
7724                 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) &&
7725                     hibernate_consider_discard(m, preflight)) {
7726                         if (!preflight) {
7727                                 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7728                         }
7729                         if (m->vmp_dirty) {
7730                                 count_discard_purgeable++;
7731                         } else {
7732                                 count_discard_active++;
7733                         }
7734                         discard = discard_all;
7735                 } else {
7736                         count_active++;
7737                 }
7738                 count_wire--;
7739                 if (!preflight) {
7740                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7741                 }
7742                 if (discard) {
7743                         hibernate_discard_page(m);
7744                 }
7745                 m = next;
7746         }
7747
7748         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7749         while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m)) {
7750                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7751
7752                 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7753                 discard = FALSE;
7754                 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7755                     hibernate_consider_discard(m, preflight)) {
7756                         if (!preflight) {
7757                                 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7758                         }
7759                         if (m->vmp_dirty) {
7760                                 count_discard_purgeable++;
7761                         } else {
7762                                 count_discard_inactive++;
7763                         }
7764                         discard = discard_all;
7765                 } else {
7766                         count_inactive++;
7767                 }
7768                 count_wire--;
7769                 if (!preflight) {
7770                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7771                 }
7772                 if (discard) {
7773                         hibernate_discard_page(m);
7774                 }
7775                 m = next;
7776         }
7777         /* XXX FBDP TODO: secluded queue */
7778
7779         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
7780                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7781                 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m)) {
7782                         assertf(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q,
7783                             "Bad page: %p (0x%x:0x%x) on queue %d has state: %d (Discard: %d, Preflight: %d)",
7784                             m, m->vmp_pageq.next, m->vmp_pageq.prev, i, m->vmp_q_state, discard, preflight);
7785
7786                         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7787                         discard = FALSE;
7788                         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7789                             hibernate_consider_discard(m, preflight)) {
7790                                 if (!preflight) {
7791                                         hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7792                                 }
7793                                 count_discard_speculative++;
7794                                 discard = discard_all;
7795                         } else {
7796                                 count_speculative++;
7797                         }
7798                         count_wire--;
7799                         if (!preflight) {
7800                                 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7801                         }
7802                         if (discard) {
7803                                 hibernate_discard_page(m);
7804                         }
7805                         m = next;
7806                 }
7807         }
7808
7809         vm_page_queue_iterate(&compressor_object->memq, m, vmp_listq) {
7810                 assert(m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
7811
7812                 count_compressor++;
7813                 count_wire--;
7814                 if (!preflight) {
7815                         hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7816                 }
7817         }
7818
7819         if (preflight == FALSE && discard_all == TRUE) {
7820                 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
7821
7822                 HIBLOG("hibernate_teardown started\n");
7823                 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7824                 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
7825
7826                 pages -= count_discard_vm_struct_pages;
7827                 count_wire -= count_discard_vm_struct_pages;
7828
7829                 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
7830
7831                 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
7832         }
7833
7834         if (!preflight) {
7835                 // pull wired from hibernate_bitmap
7836                 bitmap = &page_list->bank_bitmap[0];
7837                 bitmap_wired = &page_list_wired->bank_bitmap[0];
7838                 for (bank = 0; bank < page_list->bank_count; bank++) {
7839                         for (i = 0; i < bitmap->bitmapwords; i++) {
7840                                 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7841                         }
7842                         bitmap = (hibernate_bitmap_t *)&bitmap->bitmap[bitmap->bitmapwords];
7843                         bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7844                 }
7845         }
7846
7847         // machine dependent adjustments
7848         hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
7849
7850         if (!preflight) {
7851                 hibernate_stats.cd_count_wire = count_wire;
7852                 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7853                     count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
7854         }
7855
7856         clock_get_uptime(&end);
7857         absolutetime_to_nanoseconds(end - start, &nsec);
7858         HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
7859
7860         HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n  %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7861             pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7862             discard_all ? "did" : "could",
7863             count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7864
7865         if (hibernate_stats.cd_skipped_xpmapped) {
7866                 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7867         }
7868
7869         *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
7870
7871         if (preflight && will_discard) {
7872                 *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
7873         }
7874
7875         hibernation_vmqueues_inspection = FALSE;
7876
7877 #if MACH_ASSERT || DEBUG
7878         if (!preflight) {
7879                 if (vm_page_local_q) {
7880                         for (i = 0; i < vm_page_local_q_count; i++) {
7881                                 struct vpl      *lq;
7882                                 lq = &vm_page_local_q[i].vpl_un.vpl;
7883                                 VPL_UNLOCK(&lq->vpl_lock);
7884                         }
7885                 }
7886                 vm_page_unlock_queues();
7887         }
7888 #endif  /* MACH_ASSERT || DEBUG */
7889
7890         if (preflight) {
7891                 lck_mtx_unlock(&vm_page_queue_free_lock);
7892                 vm_page_unlock_queues();
7893                 vm_object_unlock(compressor_object);
7894         }
7895
7896         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
7897 }
7898
7899 void
7900 hibernate_page_list_discard(hibernate_page_list_t * page_list)
7901 {
7902         uint64_t  start, end, nsec;
7903         vm_page_t m;
7904         vm_page_t next;
7905         uint32_t  i;
7906         uint32_t  count_discard_active    = 0;
7907         uint32_t  count_discard_inactive  = 0;
7908         uint32_t  count_discard_purgeable = 0;
7909         uint32_t  count_discard_cleaned   = 0;
7910         uint32_t  count_discard_speculative = 0;
7911
7912
7913 #if MACH_ASSERT || DEBUG
7914         vm_page_lock_queues();
7915         if (vm_page_local_q) {
7916                 for (i = 0; i < vm_page_local_q_count; i++) {
7917                         struct vpl      *lq;
7918                         lq = &vm_page_local_q[i].vpl_un.vpl;
7919                         VPL_LOCK(&lq->vpl_lock);
7920                 }
7921         }
7922 #endif  /* MACH_ASSERT || DEBUG */
7923
7924         clock_get_uptime(&start);
7925
7926         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7927         while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m)) {
7928                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7929
7930                 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7931                 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7932                         if (m->vmp_dirty) {
7933                                 count_discard_purgeable++;
7934                         } else {
7935                                 count_discard_inactive++;
7936                         }
7937                         hibernate_discard_page(m);
7938                 }
7939                 m = next;
7940         }
7941
7942         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
7943                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7944                 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m)) {
7945                         assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7946
7947                         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7948                         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7949                                 count_discard_speculative++;
7950                                 hibernate_discard_page(m);
7951                         }
7952                         m = next;
7953                 }
7954         }
7955
7956         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7957         while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m)) {
7958                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7959
7960                 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7961                 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7962                         if (m->vmp_dirty) {
7963                                 count_discard_purgeable++;
7964                         } else {
7965                                 count_discard_inactive++;
7966                         }
7967                         hibernate_discard_page(m);
7968                 }
7969                 m = next;
7970         }
7971         /* XXX FBDP TODO: secluded queue */
7972
7973         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7974         while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m)) {
7975                 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7976
7977                 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7978                 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7979                         if (m->vmp_dirty) {
7980                                 count_discard_purgeable++;
7981                         } else {
7982                                 count_discard_active++;
7983                         }
7984                         hibernate_discard_page(m);
7985                 }
7986                 m = next;
7987         }
7988
7989         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7990         while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m)) {
7991                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7992
7993                 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7994                 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7995                         if (m->vmp_dirty) {
7996                                 count_discard_purgeable++;
7997                         } else {
7998                                 count_discard_cleaned++;
7999                         }
8000                         hibernate_discard_page(m);
8001                 }
8002                 m = next;
8003         }
8004
8005 #if MACH_ASSERT || DEBUG
8006         if (vm_page_local_q) {
8007                 for (i = 0; i < vm_page_local_q_count; i++) {
8008                         struct vpl      *lq;
8009                         lq = &vm_page_local_q[i].vpl_un.vpl;
8010                         VPL_UNLOCK(&lq->vpl_lock);
8011                 }
8012         }
8013         vm_page_unlock_queues();
8014 #endif  /* MACH_ASSERT || DEBUG */
8015
8016         clock_get_uptime(&end);
8017         absolutetime_to_nanoseconds(end - start, &nsec);
8018         HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
8019             nsec / 1000000ULL,
8020             count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
8021 }
8022
8023 boolean_t       hibernate_paddr_map_inited = FALSE;
8024 unsigned int    hibernate_teardown_last_valid_compact_indx = -1;
8025 vm_page_t       hibernate_rebuild_hash_list = NULL;
8026
8027 unsigned int    hibernate_teardown_found_tabled_pages = 0;
8028 unsigned int    hibernate_teardown_found_created_pages = 0;
8029 unsigned int    hibernate_teardown_found_free_pages = 0;
8030 unsigned int    hibernate_teardown_vm_page_free_count;
8031
8032
8033 struct ppnum_mapping {
8034         struct ppnum_mapping    *ppnm_next;
8035         ppnum_t                 ppnm_base_paddr;
8036         unsigned int            ppnm_sindx;
8037         unsigned int            ppnm_eindx;
8038 };
8039
8040 struct ppnum_mapping    *ppnm_head;
8041 struct ppnum_mapping    *ppnm_last_found = NULL;
8042
8043
8044 void
8045 hibernate_create_paddr_map()
8046 {
8047         unsigned int    i;
8048         ppnum_t         next_ppnum_in_run = 0;
8049         struct ppnum_mapping *ppnm = NULL;
8050
8051         if (hibernate_paddr_map_inited == FALSE) {
8052                 for (i = 0; i < vm_pages_count; i++) {
8053                         if (ppnm) {
8054                                 ppnm->ppnm_eindx = i;
8055                         }
8056
8057                         if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
8058                                 ppnm = kalloc(sizeof(struct ppnum_mapping));
8059
8060                                 ppnm->ppnm_next = ppnm_head;
8061                                 ppnm_head = ppnm;
8062
8063                                 ppnm->ppnm_sindx = i;
8064                                 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
8065                         }
8066                         next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
8067                 }
8068                 ppnm->ppnm_eindx++;
8069
8070                 hibernate_paddr_map_inited = TRUE;
8071         }
8072 }
8073
8074 ppnum_t
8075 hibernate_lookup_paddr(unsigned int indx)
8076 {
8077         struct ppnum_mapping *ppnm = NULL;
8078
8079         ppnm = ppnm_last_found;
8080
8081         if (ppnm) {
8082                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
8083                         goto done;
8084                 }
8085         }
8086         for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
8087                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
8088                         ppnm_last_found = ppnm;
8089                         break;
8090                 }
8091         }
8092         if (ppnm == NULL) {
8093                 panic("hibernate_lookup_paddr of %d failed\n", indx);
8094         }
8095 done:
8096         return ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx);
8097 }
8098
8099
8100 uint32_t
8101 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
8102 {
8103         addr64_t        saddr_aligned;
8104         addr64_t        eaddr_aligned;
8105         addr64_t        addr;
8106         ppnum_t         paddr;
8107         unsigned int    mark_as_unneeded_pages = 0;
8108
8109         saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
8110         eaddr_aligned = eaddr & ~PAGE_MASK_64;
8111
8112         for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
8113                 paddr = pmap_find_phys(kernel_pmap, addr);
8114
8115                 assert(paddr);
8116
8117                 hibernate_page_bitset(page_list, TRUE, paddr);
8118                 hibernate_page_bitset(page_list_wired, TRUE, paddr);
8119
8120                 mark_as_unneeded_pages++;
8121         }
8122         return mark_as_unneeded_pages;
8123 }
8124
8125
8126 void
8127 hibernate_hash_insert_page(vm_page_t mem)
8128 {
8129         vm_page_bucket_t *bucket;
8130         int             hash_id;
8131         vm_object_t     m_object;
8132
8133         m_object = VM_PAGE_OBJECT(mem);
8134
8135         assert(mem->vmp_hashed);
8136         assert(m_object);
8137         assert(mem->vmp_offset != (vm_object_offset_t) -1);
8138
8139         /*
8140          *      Insert it into the object_object/offset hash table
8141          */
8142         hash_id = vm_page_hash(m_object, mem->vmp_offset);
8143         bucket = &vm_page_buckets[hash_id];
8144
8145         mem->vmp_next_m = bucket->page_list;
8146         bucket->page_list = VM_PAGE_PACK_PTR(mem);
8147 }
8148
8149
8150 void
8151 hibernate_free_range(int sindx, int eindx)
8152 {
8153         vm_page_t       mem;
8154         unsigned int    color;
8155
8156         while (sindx < eindx) {
8157                 mem = &vm_pages[sindx];
8158
8159                 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
8160
8161                 mem->vmp_lopage = FALSE;
8162                 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
8163
8164                 color = VM_PAGE_GET_COLOR(mem);
8165 #if defined(__x86_64__)
8166                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
8167 #else
8168                 vm_page_queue_enter(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
8169 #endif
8170                 vm_page_free_count++;
8171
8172                 sindx++;
8173         }
8174 }
8175
8176
8177 extern void hibernate_rebuild_pmap_structs(void);
8178
8179 void
8180 hibernate_rebuild_vm_structs(void)
8181 {
8182         int             i, cindx, sindx, eindx;
8183         vm_page_t       mem, tmem, mem_next;
8184         AbsoluteTime    startTime, endTime;
8185         uint64_t        nsec;
8186
8187         if (hibernate_rebuild_needed == FALSE) {
8188                 return;
8189         }
8190
8191         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
8192         HIBLOG("hibernate_rebuild started\n");
8193
8194         clock_get_uptime(&startTime);
8195
8196         hibernate_rebuild_pmap_structs();
8197
8198         bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
8199         eindx = vm_pages_count;
8200
8201         /*
8202          * Mark all the vm_pages[] that have not been initialized yet as being
8203          * transient. This is needed to ensure that buddy page search is corrrect.
8204          * Without this random data in these vm_pages[] can trip the buddy search
8205          */
8206         for (i = hibernate_teardown_last_valid_compact_indx + 1; i < eindx; ++i) {
8207                 vm_pages[i].vmp_q_state = VM_PAGE_NOT_ON_Q;
8208         }
8209
8210         for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
8211                 mem = &vm_pages[cindx];
8212                 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
8213                 /*
8214                  * hibernate_teardown_vm_structs leaves the location where
8215                  * this vm_page_t must be located in "next".
8216                  */
8217                 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
8218                 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
8219
8220                 sindx = (int)(tmem - &vm_pages[0]);
8221
8222                 if (mem != tmem) {
8223                         /*
8224                          * this vm_page_t was moved by hibernate_teardown_vm_structs,
8225                          * so move it back to its real location
8226                          */
8227                         *tmem = *mem;
8228                         mem = tmem;
8229                 }
8230                 if (mem->vmp_hashed) {
8231                         hibernate_hash_insert_page(mem);
8232                 }
8233                 /*
8234                  * the 'hole' between this vm_page_t and the previous
8235                  * vm_page_t we moved needs to be initialized as
8236                  * a range of free vm_page_t's
8237                  */
8238                 hibernate_free_range(sindx + 1, eindx);
8239
8240                 eindx = sindx;
8241         }
8242         if (sindx) {
8243                 hibernate_free_range(0, sindx);
8244         }
8245
8246         assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
8247
8248         /*
8249          * process the list of vm_page_t's that were entered in the hash,
8250          * but were not located in the vm_pages arrary... these are
8251          * vm_page_t's that were created on the fly (i.e. fictitious)
8252          */
8253         for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
8254                 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
8255
8256                 mem->vmp_next_m = 0;
8257                 hibernate_hash_insert_page(mem);
8258         }
8259         hibernate_rebuild_hash_list = NULL;
8260
8261         clock_get_uptime(&endTime);
8262         SUB_ABSOLUTETIME(&endTime, &startTime);
8263         absolutetime_to_nanoseconds(endTime, &nsec);
8264
8265         HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
8266
8267         hibernate_rebuild_needed = FALSE;
8268
8269         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
8270 }
8271
8272
8273 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
8274
8275 uint32_t
8276 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
8277 {
8278         unsigned int    i;
8279         unsigned int    compact_target_indx;
8280         vm_page_t       mem, mem_next;
8281         vm_page_bucket_t *bucket;
8282         unsigned int    mark_as_unneeded_pages = 0;
8283         unsigned int    unneeded_vm_page_bucket_pages = 0;
8284         unsigned int    unneeded_vm_pages_pages = 0;
8285         unsigned int    unneeded_pmap_pages = 0;
8286         addr64_t        start_of_unneeded = 0;
8287         addr64_t        end_of_unneeded = 0;
8288
8289
8290         if (hibernate_should_abort()) {
8291                 return 0;
8292         }
8293
8294         hibernate_rebuild_needed = TRUE;
8295
8296         HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
8297             vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
8298             vm_page_cleaned_count, compressor_object->resident_page_count);
8299
8300         for (i = 0; i < vm_page_bucket_count; i++) {
8301                 bucket = &vm_page_buckets[i];
8302
8303                 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
8304                         assert(mem->vmp_hashed);
8305
8306                         mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
8307
8308                         if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
8309                                 mem->vmp_next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
8310                                 hibernate_rebuild_hash_list = mem;
8311                         }
8312                 }
8313         }
8314         unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
8315         mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
8316
8317         hibernate_teardown_vm_page_free_count = vm_page_free_count;
8318
8319         compact_target_indx = 0;
8320
8321         for (i = 0; i < vm_pages_count; i++) {
8322                 mem = &vm_pages[i];
8323
8324                 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
8325                         unsigned int color;
8326
8327                         assert(mem->vmp_busy);
8328                         assert(!mem->vmp_lopage);
8329
8330                         color = VM_PAGE_GET_COLOR(mem);
8331
8332                         vm_page_queue_remove(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
8333
8334                         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
8335
8336                         vm_page_free_count--;
8337
8338                         hibernate_teardown_found_free_pages++;
8339
8340                         if (vm_pages[compact_target_indx].vmp_q_state != VM_PAGE_ON_FREE_Q) {
8341                                 compact_target_indx = i;
8342                         }
8343                 } else {
8344                         /*
8345                          * record this vm_page_t's original location
8346                          * we need this even if it doesn't get moved
8347                          * as an indicator to the rebuild function that
8348                          * we don't have to move it
8349                          */
8350                         mem->vmp_next_m = VM_PAGE_PACK_PTR(mem);
8351
8352                         if (vm_pages[compact_target_indx].vmp_q_state == VM_PAGE_ON_FREE_Q) {
8353                                 /*
8354                                  * we've got a hole to fill, so
8355                                  * move this vm_page_t to it's new home
8356                                  */
8357                                 vm_pages[compact_target_indx] = *mem;
8358                                 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
8359
8360                                 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
8361                                 compact_target_indx++;
8362                         } else {
8363                                 hibernate_teardown_last_valid_compact_indx = i;
8364                         }
8365                 }
8366         }
8367         unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx + 1],
8368             (addr64_t)&vm_pages[vm_pages_count - 1], page_list, page_list_wired);
8369         mark_as_unneeded_pages += unneeded_vm_pages_pages;
8370
8371         hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
8372
8373         if (start_of_unneeded) {
8374                 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
8375                 mark_as_unneeded_pages += unneeded_pmap_pages;
8376         }
8377         HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
8378
8379         return mark_as_unneeded_pages;
8380 }
8381
8382
8383 #endif /* HIBERNATION */
8384
8385 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8386
8387 #include <mach_vm_debug.h>
8388 #if     MACH_VM_DEBUG
8389
8390 #include <mach_debug/hash_info.h>
8391 #include <vm/vm_debug.h>
8392
8393 /*
8394  *      Routine:        vm_page_info
8395  *      Purpose:
8396  *              Return information about the global VP table.
8397  *              Fills the buffer with as much information as possible
8398  *              and returns the desired size of the buffer.
8399  *      Conditions:
8400  *              Nothing locked.  The caller should provide
8401  *              possibly-pageable memory.
8402  */
8403
8404 unsigned int
8405 vm_page_info(
8406         hash_info_bucket_t *info,
8407         unsigned int count)
8408 {
8409         unsigned int i;
8410         lck_spin_t      *bucket_lock;
8411
8412         if (vm_page_bucket_count < count) {
8413                 count = vm_page_bucket_count;
8414         }
8415
8416         for (i = 0; i < count; i++) {
8417                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
8418                 unsigned int bucket_count = 0;
8419                 vm_page_t m;
8420
8421                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
8422                 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
8423
8424                 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
8425                     m != VM_PAGE_NULL;
8426                     m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->vmp_next_m))) {
8427                         bucket_count++;
8428                 }
8429
8430                 lck_spin_unlock(bucket_lock);
8431
8432                 /* don't touch pageable memory while holding locks */
8433                 info[i].hib_count = bucket_count;
8434         }
8435
8436         return vm_page_bucket_count;
8437 }
8438 #endif  /* MACH_VM_DEBUG */
8439
8440 #if VM_PAGE_BUCKETS_CHECK
8441 void
8442 vm_page_buckets_check(void)
8443 {
8444         unsigned int i;
8445         vm_page_t p;
8446         unsigned int p_hash;
8447         vm_page_bucket_t *bucket;
8448         lck_spin_t      *bucket_lock;
8449
8450         if (!vm_page_buckets_check_ready) {
8451                 return;
8452         }
8453
8454 #if HIBERNATION
8455         if (hibernate_rebuild_needed ||
8456             hibernate_rebuild_hash_list) {
8457                 panic("BUCKET_CHECK: hibernation in progress: "
8458                     "rebuild_needed=%d rebuild_hash_list=%p\n",
8459                     hibernate_rebuild_needed,
8460                     hibernate_rebuild_hash_list);
8461         }
8462 #endif /* HIBERNATION */
8463
8464 #if VM_PAGE_FAKE_BUCKETS
8465         char *cp;
8466         for (cp = (char *) vm_page_fake_buckets_start;
8467             cp < (char *) vm_page_fake_buckets_end;
8468             cp++) {
8469                 if (*cp != 0x5a) {
8470                         panic("BUCKET_CHECK: corruption at %p in fake buckets "
8471                             "[0x%llx:0x%llx]\n",
8472                             cp,
8473                             (uint64_t) vm_page_fake_buckets_start,
8474                             (uint64_t) vm_page_fake_buckets_end);
8475                 }
8476         }
8477 #endif /* VM_PAGE_FAKE_BUCKETS */
8478
8479         for (i = 0; i < vm_page_bucket_count; i++) {
8480                 vm_object_t     p_object;
8481
8482                 bucket = &vm_page_buckets[i];
8483                 if (!bucket->page_list) {
8484                         continue;
8485                 }
8486
8487                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
8488                 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
8489                 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
8490
8491                 while (p != VM_PAGE_NULL) {
8492                         p_object = VM_PAGE_OBJECT(p);
8493
8494                         if (!p->vmp_hashed) {
8495                                 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
8496                                     "hash %d in bucket %d at %p "
8497                                     "is not hashed\n",
8498                                     p, p_object, p->vmp_offset,
8499                                     p_hash, i, bucket);
8500                         }
8501                         p_hash = vm_page_hash(p_object, p->vmp_offset);
8502                         if (p_hash != i) {
8503                                 panic("BUCKET_CHECK: corruption in bucket %d "
8504                                     "at %p: page %p object %p offset 0x%llx "
8505                                     "hash %d\n",
8506                                     i, bucket, p, p_object, p->vmp_offset,
8507                                     p_hash);
8508                         }
8509                         p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m));
8510                 }
8511                 lck_spin_unlock(bucket_lock);
8512         }
8513
8514 //      printf("BUCKET_CHECK: checked buckets\n");
8515 }
8516 #endif /* VM_PAGE_BUCKETS_CHECK */
8517
8518 /*
8519  * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
8520  * local queues if they exist... its the only spot in the system where we add pages
8521  * to those queues...  once on those queues, those pages can only move to one of the
8522  * global page queues or the free queues... they NEVER move from local q to local q.
8523  * the 'local' state is stable when vm_page_queues_remove is called since we're behind
8524  * the global vm_page_queue_lock at this point...  we still need to take the local lock
8525  * in case this operation is being run on a different CPU then the local queue's identity,
8526  * but we don't have to worry about the page moving to a global queue or becoming wired
8527  * while we're grabbing the local lock since those operations would require the global
8528  * vm_page_queue_lock to be held, and we already own it.
8529  *
8530  * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
8531  * 'wired' and local are ALWAYS mutually exclusive conditions.
8532  */
8533
8534 #if CONFIG_BACKGROUND_QUEUE
8535 void
8536 vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
8537 #else
8538 void
8539 vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
8540 #endif
8541 {
8542         boolean_t       was_pageable = TRUE;
8543         vm_object_t     m_object;
8544
8545         m_object = VM_PAGE_OBJECT(mem);
8546
8547         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8548
8549         if (mem->vmp_q_state == VM_PAGE_NOT_ON_Q) {
8550                 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8551 #if CONFIG_BACKGROUND_QUEUE
8552                 if (remove_from_backgroundq == TRUE) {
8553                         vm_page_remove_from_backgroundq(mem);
8554                 }
8555                 if (mem->vmp_on_backgroundq) {
8556                         assert(mem->vmp_backgroundq.next != 0);
8557                         assert(mem->vmp_backgroundq.prev != 0);
8558                 } else {
8559                         assert(mem->vmp_backgroundq.next == 0);
8560                         assert(mem->vmp_backgroundq.prev == 0);
8561                 }
8562 #endif /* CONFIG_BACKGROUND_QUEUE */
8563                 return;
8564         }
8565
8566         if (mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8567                 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8568 #if CONFIG_BACKGROUND_QUEUE
8569                 assert(mem->vmp_backgroundq.next == 0 &&
8570                     mem->vmp_backgroundq.prev == 0 &&
8571                     mem->vmp_on_backgroundq == FALSE);
8572 #endif
8573                 return;
8574         }
8575         if (mem->vmp_q_state == VM_PAGE_IS_WIRED) {
8576                 /*
8577                  * might put these guys on a list for debugging purposes
8578                  * if we do, we'll need to remove this assert
8579                  */
8580                 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8581 #if CONFIG_BACKGROUND_QUEUE
8582                 assert(mem->vmp_backgroundq.next == 0 &&
8583                     mem->vmp_backgroundq.prev == 0 &&
8584                     mem->vmp_on_backgroundq == FALSE);
8585 #endif
8586                 return;
8587         }
8588
8589         assert(m_object != compressor_object);
8590         assert(m_object != kernel_object);
8591         assert(m_object != vm_submap_object);
8592         assert(!mem->vmp_fictitious);
8593
8594         switch (mem->vmp_q_state) {
8595         case VM_PAGE_ON_ACTIVE_LOCAL_Q:
8596         {
8597                 struct vpl      *lq;
8598
8599                 lq = &vm_page_local_q[mem->vmp_local_id].vpl_un.vpl;
8600                 VPL_LOCK(&lq->vpl_lock);
8601                 vm_page_queue_remove(&lq->vpl_queue, mem, vmp_pageq);
8602                 mem->vmp_local_id = 0;
8603                 lq->vpl_count--;
8604                 if (m_object->internal) {
8605                         lq->vpl_internal_count--;
8606                 } else {
8607                         lq->vpl_external_count--;
8608                 }
8609                 VPL_UNLOCK(&lq->vpl_lock);
8610                 was_pageable = FALSE;
8611                 break;
8612         }
8613         case VM_PAGE_ON_ACTIVE_Q:
8614         {
8615                 vm_page_queue_remove(&vm_page_queue_active, mem, vmp_pageq);
8616                 vm_page_active_count--;
8617                 break;
8618         }
8619
8620         case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
8621         {
8622                 assert(m_object->internal == TRUE);
8623
8624                 vm_page_inactive_count--;
8625                 vm_page_queue_remove(&vm_page_queue_anonymous, mem, vmp_pageq);
8626                 vm_page_anonymous_count--;
8627
8628                 vm_purgeable_q_advance_all();
8629                 vm_page_balance_inactive(3);
8630                 break;
8631         }
8632
8633         case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
8634         {
8635                 assert(m_object->internal == FALSE);
8636
8637                 vm_page_inactive_count--;
8638                 vm_page_queue_remove(&vm_page_queue_inactive, mem, vmp_pageq);
8639                 vm_purgeable_q_advance_all();
8640                 vm_page_balance_inactive(3);
8641                 break;
8642         }
8643
8644         case VM_PAGE_ON_INACTIVE_CLEANED_Q:
8645         {
8646                 assert(m_object->internal == FALSE);
8647
8648                 vm_page_inactive_count--;
8649                 vm_page_queue_remove(&vm_page_queue_cleaned, mem, vmp_pageq);
8650                 vm_page_cleaned_count--;
8651                 vm_page_balance_inactive(3);
8652                 break;
8653         }
8654
8655         case VM_PAGE_ON_THROTTLED_Q:
8656         {
8657                 assert(m_object->internal == TRUE);
8658
8659                 vm_page_queue_remove(&vm_page_queue_throttled, mem, vmp_pageq);
8660                 vm_page_throttled_count--;
8661                 was_pageable = FALSE;
8662                 break;
8663         }
8664
8665         case VM_PAGE_ON_SPECULATIVE_Q:
8666         {
8667                 assert(m_object->internal == FALSE);
8668
8669                 vm_page_remque(&mem->vmp_pageq);
8670                 vm_page_speculative_count--;
8671                 vm_page_balance_inactive(3);
8672                 break;
8673         }
8674
8675 #if CONFIG_SECLUDED_MEMORY
8676         case VM_PAGE_ON_SECLUDED_Q:
8677         {
8678                 vm_page_queue_remove(&vm_page_queue_secluded, mem, vmp_pageq);
8679                 vm_page_secluded_count--;
8680                 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
8681                 if (m_object == VM_OBJECT_NULL) {
8682                         vm_page_secluded_count_free--;
8683                         was_pageable = FALSE;
8684                 } else {
8685                         assert(!m_object->internal);
8686                         vm_page_secluded_count_inuse--;
8687                         was_pageable = FALSE;
8688 //                      was_pageable = TRUE;
8689                 }
8690                 break;
8691         }
8692 #endif /* CONFIG_SECLUDED_MEMORY */
8693
8694         default:
8695         {
8696                 /*
8697                  *      if (mem->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
8698                  *              NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
8699                  *              the caller is responsible for determing if the page is on that queue, and if so, must
8700                  *              either first remove it (it needs both the page queues lock and the object lock to do
8701                  *              this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
8702                  *
8703                  *      we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
8704                  *      or any of the undefined states
8705                  */
8706                 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vmp_q_state);
8707                 break;
8708         }
8709         }
8710         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
8711         mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
8712
8713 #if CONFIG_BACKGROUND_QUEUE
8714         if (remove_from_backgroundq == TRUE) {
8715                 vm_page_remove_from_backgroundq(mem);
8716         }
8717 #endif
8718         if (was_pageable) {
8719                 if (m_object->internal) {
8720                         vm_page_pageable_internal_count--;
8721                 } else {
8722                         vm_page_pageable_external_count--;
8723                 }
8724         }
8725 }
8726
8727 void
8728 vm_page_remove_internal(vm_page_t page)
8729 {
8730         vm_object_t __object = VM_PAGE_OBJECT(page);
8731         if (page == __object->memq_hint) {
8732                 vm_page_t       __new_hint;
8733                 vm_page_queue_entry_t   __qe;
8734                 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->vmp_listq);
8735                 if (vm_page_queue_end(&__object->memq, __qe)) {
8736                         __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->vmp_listq);
8737                         if (vm_page_queue_end(&__object->memq, __qe)) {
8738                                 __qe = NULL;
8739                         }
8740                 }
8741                 __new_hint = (vm_page_t)((uintptr_t) __qe);
8742                 __object->memq_hint = __new_hint;
8743         }
8744         vm_page_queue_remove(&__object->memq, page, vmp_listq);
8745 #if CONFIG_SECLUDED_MEMORY
8746         if (__object->eligible_for_secluded) {
8747                 vm_page_secluded.eligible_for_secluded--;
8748         }
8749 #endif /* CONFIG_SECLUDED_MEMORY */
8750 }
8751
8752 void
8753 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8754 {
8755         vm_object_t     m_object;
8756
8757         m_object = VM_PAGE_OBJECT(mem);
8758
8759         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8760         assert(!mem->vmp_fictitious);
8761         assert(!mem->vmp_laundry);
8762         assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
8763         vm_page_check_pageable_safe(mem);
8764
8765         if (m_object->internal) {
8766                 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
8767
8768                 if (first == TRUE) {
8769                         vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vmp_pageq);
8770                 } else {
8771                         vm_page_queue_enter(&vm_page_queue_anonymous, mem, vmp_pageq);
8772                 }
8773
8774                 vm_page_anonymous_count++;
8775                 vm_page_pageable_internal_count++;
8776         } else {
8777                 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
8778
8779                 if (first == TRUE) {
8780                         vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vmp_pageq);
8781                 } else {
8782                         vm_page_queue_enter(&vm_page_queue_inactive, mem, vmp_pageq);
8783                 }
8784
8785                 vm_page_pageable_external_count++;
8786         }
8787         vm_page_inactive_count++;
8788         token_new_pagecount++;
8789
8790 #if CONFIG_BACKGROUND_QUEUE
8791         if (mem->vmp_in_background) {
8792                 vm_page_add_to_backgroundq(mem, FALSE);
8793         }
8794 #endif
8795 }
8796
8797 void
8798 vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8799 {
8800         vm_object_t     m_object;
8801
8802         m_object = VM_PAGE_OBJECT(mem);
8803
8804         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8805         assert(!mem->vmp_fictitious);
8806         assert(!mem->vmp_laundry);
8807         assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
8808         vm_page_check_pageable_safe(mem);
8809
8810         mem->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
8811         if (first == TRUE) {
8812                 vm_page_queue_enter_first(&vm_page_queue_active, mem, vmp_pageq);
8813         } else {
8814                 vm_page_queue_enter(&vm_page_queue_active, mem, vmp_pageq);
8815         }
8816         vm_page_active_count++;
8817
8818         if (m_object->internal) {
8819                 vm_page_pageable_internal_count++;
8820         } else {
8821                 vm_page_pageable_external_count++;
8822         }
8823
8824 #if CONFIG_BACKGROUND_QUEUE
8825         if (mem->vmp_in_background) {
8826                 vm_page_add_to_backgroundq(mem, FALSE);
8827         }
8828 #endif
8829         vm_page_balance_inactive(3);
8830 }
8831
8832 /*
8833  * Pages from special kernel objects shouldn't
8834  * be placed on pageable queues.
8835  */
8836 void
8837 vm_page_check_pageable_safe(vm_page_t page)
8838 {
8839         vm_object_t     page_object;
8840
8841         page_object = VM_PAGE_OBJECT(page);
8842
8843         if (page_object == kernel_object) {
8844                 panic("vm_page_check_pageable_safe: trying to add page" \
8845                     "from kernel object (%p) to pageable queue", kernel_object);
8846         }
8847
8848         if (page_object == compressor_object) {
8849                 panic("vm_page_check_pageable_safe: trying to add page" \
8850                     "from compressor object (%p) to pageable queue", compressor_object);
8851         }
8852
8853         if (page_object == vm_submap_object) {
8854                 panic("vm_page_check_pageable_safe: trying to add page" \
8855                     "from submap object (%p) to pageable queue", vm_submap_object);
8856         }
8857 }
8858
8859 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8860 * wired page diagnose
8861 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8862
8863 #include <libkern/OSKextLibPrivate.h>
8864
8865 #define KA_SIZE(namelen, subtotalscount)        \
8866         (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8867
8868 #define KA_NAME(alloc)  \
8869         ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8870
8871 #define KA_NAME_LEN(alloc)      \
8872     (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8873
8874 vm_tag_t
8875 vm_tag_bt(void)
8876 {
8877         uintptr_t* frameptr;
8878         uintptr_t* frameptr_next;
8879         uintptr_t retaddr;
8880         uintptr_t kstackb, kstackt;
8881         const vm_allocation_site_t * site;
8882         thread_t cthread;
8883         kern_allocation_name_t name;
8884
8885         cthread = current_thread();
8886         if (__improbable(cthread == NULL)) {
8887                 return VM_KERN_MEMORY_OSFMK;
8888         }
8889
8890         if ((name = thread_get_kernel_state(cthread)->allocation_name)) {
8891                 if (!name->tag) {
8892                         vm_tag_alloc(name);
8893                 }
8894                 return name->tag;
8895         }
8896
8897         kstackb = cthread->kernel_stack;
8898         kstackt = kstackb + kernel_stack_size;
8899
8900         /* Load stack frame pointer (EBP on x86) into frameptr */
8901         frameptr = __builtin_frame_address(0);
8902         site = NULL;
8903         while (frameptr != NULL) {
8904                 /* Verify thread stack bounds */
8905                 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) {
8906                         break;
8907                 }
8908
8909                 /* Next frame pointer is pointed to by the previous one */
8910                 frameptr_next = (uintptr_t*) *frameptr;
8911
8912                 /* Pull return address from one spot above the frame pointer */
8913                 retaddr = *(frameptr + 1);
8914
8915 #if defined(HAS_APPLE_PAC)
8916                 retaddr = (uintptr_t) ptrauth_strip((void *)retaddr, ptrauth_key_return_address);
8917 #endif
8918
8919                 if (((retaddr < vm_kernel_builtinkmod_text_end) && (retaddr >= vm_kernel_builtinkmod_text))
8920                     || (retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top)) {
8921                         site = OSKextGetAllocationSiteForCaller(retaddr);
8922                         break;
8923                 }
8924                 frameptr = frameptr_next;
8925         }
8926
8927         return site ? site->tag : VM_KERN_MEMORY_NONE;
8928 }
8929
8930 static uint64_t free_tag_bits[VM_MAX_TAG_VALUE / 64];
8931
8932 void
8933 vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
8934 {
8935         vm_tag_t tag;
8936         uint64_t avail;
8937         uint32_t idx;
8938         vm_allocation_site_t * prev;
8939
8940         if (site->tag) {
8941                 return;
8942         }
8943
8944         idx = 0;
8945         while (TRUE) {
8946                 avail = free_tag_bits[idx];
8947                 if (avail) {
8948                         tag = __builtin_clzll(avail);
8949                         avail &= ~(1ULL << (63 - tag));
8950                         free_tag_bits[idx] = avail;
8951                         tag += (idx << 6);
8952                         break;
8953                 }
8954                 idx++;
8955                 if (idx >= ARRAY_COUNT(free_tag_bits)) {
8956                         for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++) {
8957                                 prev = vm_allocation_sites[idx];
8958                                 if (!prev) {
8959                                         continue;
8960                                 }
8961                                 if (!KA_NAME_LEN(prev)) {
8962                                         continue;
8963                                 }
8964                                 if (!prev->tag) {
8965                                         continue;
8966                                 }
8967                                 if (prev->total) {
8968                                         continue;
8969                                 }
8970                                 if (1 != prev->refcount) {
8971                                         continue;
8972                                 }
8973
8974                                 assert(idx == prev->tag);
8975                                 tag = idx;
8976                                 prev->tag = VM_KERN_MEMORY_NONE;
8977                                 *releasesiteP = prev;
8978                                 break;
8979                         }
8980                         if (idx >= ARRAY_COUNT(vm_allocation_sites)) {
8981                                 tag = VM_KERN_MEMORY_ANY;
8982                         }
8983                         break;
8984                 }
8985         }
8986         site->tag = tag;
8987
8988         OSAddAtomic16(1, &site->refcount);
8989
8990         if (VM_KERN_MEMORY_ANY != tag) {
8991                 vm_allocation_sites[tag] = site;
8992         }
8993
8994         if (tag > vm_allocation_tag_highest) {
8995                 vm_allocation_tag_highest = tag;
8996         }
8997 }
8998
8999 static void
9000 vm_tag_free_locked(vm_tag_t tag)
9001 {
9002         uint64_t avail;
9003         uint32_t idx;
9004         uint64_t bit;
9005
9006         if (VM_KERN_MEMORY_ANY == tag) {
9007                 return;
9008         }
9009
9010         idx = (tag >> 6);
9011         avail = free_tag_bits[idx];
9012         tag &= 63;
9013         bit = (1ULL << (63 - tag));
9014         assert(!(avail & bit));
9015         free_tag_bits[idx] = (avail | bit);
9016 }
9017
9018 static void
9019 vm_tag_init(void)
9020 {
9021         vm_tag_t tag;
9022         for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++) {
9023                 vm_tag_free_locked(tag);
9024         }
9025
9026         for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++) {
9027                 vm_tag_free_locked(tag);
9028         }
9029 }
9030
9031 vm_tag_t
9032 vm_tag_alloc(vm_allocation_site_t * site)
9033 {
9034         vm_tag_t tag;
9035         vm_allocation_site_t * releasesite;
9036
9037         if (VM_TAG_BT & site->flags) {
9038                 tag = vm_tag_bt();
9039                 if (VM_KERN_MEMORY_NONE != tag) {
9040                         return tag;
9041                 }
9042         }
9043
9044         if (!site->tag) {
9045                 releasesite = NULL;
9046                 lck_spin_lock(&vm_allocation_sites_lock);
9047                 vm_tag_alloc_locked(site, &releasesite);
9048                 lck_spin_unlock(&vm_allocation_sites_lock);
9049                 if (releasesite) {
9050                         kern_allocation_name_release(releasesite);
9051                 }
9052         }
9053
9054         return site->tag;
9055 }
9056
9057 void
9058 vm_tag_update_size(vm_tag_t tag, int64_t delta)
9059 {
9060         vm_allocation_site_t * allocation;
9061         uint64_t prior;
9062
9063         assert(VM_KERN_MEMORY_NONE != tag);
9064         assert(tag < VM_MAX_TAG_VALUE);
9065
9066         allocation = vm_allocation_sites[tag];
9067         assert(allocation);
9068
9069         if (delta < 0) {
9070                 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
9071         }
9072         prior = OSAddAtomic64(delta, &allocation->total);
9073
9074 #if DEBUG || DEVELOPMENT
9075
9076         uint64_t new, peak;
9077         new = prior + delta;
9078         do{
9079                 peak = allocation->peak;
9080                 if (new <= peak) {
9081                         break;
9082                 }
9083         }while (!OSCompareAndSwap64(peak, new, &allocation->peak));
9084
9085 #endif /* DEBUG || DEVELOPMENT */
9086
9087         if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) {
9088                 return;
9089         }
9090
9091         if (!prior && !allocation->tag) {
9092                 vm_tag_alloc(allocation);
9093         }
9094 }
9095
9096 void
9097 kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
9098 {
9099         uint64_t prior;
9100
9101         if (delta < 0) {
9102                 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
9103         }
9104         prior = OSAddAtomic64(delta, &allocation->total);
9105
9106 #if DEBUG || DEVELOPMENT
9107
9108         uint64_t new, peak;
9109         new = prior + delta;
9110         do{
9111                 peak = allocation->peak;
9112                 if (new <= peak) {
9113                         break;
9114                 }
9115         }while (!OSCompareAndSwap64(peak, new, &allocation->peak));
9116
9117 #endif /* DEBUG || DEVELOPMENT */
9118
9119         if (!prior && !allocation->tag) {
9120                 vm_tag_alloc(allocation);
9121         }
9122 }
9123
9124 #if VM_MAX_TAG_ZONES
9125
9126 void
9127 vm_allocation_zones_init(void)
9128 {
9129         kern_return_t ret;
9130         vm_offset_t       addr;
9131         vm_size_t     size;
9132
9133         size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t * *)
9134             + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
9135
9136         ret = kernel_memory_allocate(kernel_map,
9137             &addr, round_page(size), 0,
9138             KMA_ZERO, VM_KERN_MEMORY_DIAG);
9139         assert(KERN_SUCCESS == ret);
9140
9141         vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
9142         addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t * *);
9143
9144         // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
9145         // in vm_tag_update_zone_size() won't recurse
9146         vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG]   = (vm_allocation_zone_total_t *) addr;
9147         addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
9148         vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
9149 }
9150
9151 void
9152 vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
9153 {
9154         vm_allocation_zone_total_t * zone;
9155
9156         assert(VM_KERN_MEMORY_NONE != tag);
9157         assert(tag < VM_MAX_TAG_VALUE);
9158
9159         if (zidx >= VM_MAX_TAG_ZONES) {
9160                 return;
9161         }
9162
9163         zone = vm_allocation_zone_totals[tag];
9164         if (!zone) {
9165                 zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
9166                 if (!zone) {
9167                         return;
9168                 }
9169                 bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
9170                 if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag])) {
9171                         kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
9172                 }
9173         }
9174 }
9175
9176 void
9177 vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
9178 {
9179         vm_allocation_zone_total_t * zone;
9180         uint32_t new;
9181
9182         assert(VM_KERN_MEMORY_NONE != tag);
9183         assert(tag < VM_MAX_TAG_VALUE);
9184
9185         if (zidx >= VM_MAX_TAG_ZONES) {
9186                 return;
9187         }
9188
9189         zone = vm_allocation_zone_totals[tag];
9190         assert(zone);
9191         zone += zidx;
9192
9193         /* the zone is locked */
9194         if (delta < 0) {
9195                 assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
9196                 zone->total += delta;
9197         } else {
9198                 zone->total += delta;
9199                 if (zone->total > zone->peak) {
9200                         zone->peak = zone->total;
9201                 }
9202                 if (dwaste) {
9203                         new = zone->waste;
9204                         if (zone->wastediv < 65536) {
9205                                 zone->wastediv++;
9206                         } else {
9207                                 new -= (new >> 16);
9208                         }
9209                         __assert_only bool ov = os_add_overflow(new, dwaste, &new);
9210                         assert(!ov);
9211                         zone->waste = new;
9212                 }
9213         }
9214 }
9215
9216 #endif /* VM_MAX_TAG_ZONES */
9217
9218 void
9219 kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
9220 {
9221         kern_allocation_name_t other;
9222         struct vm_allocation_total * total;
9223         uint32_t subidx;
9224
9225         subidx = 0;
9226         assert(VM_KERN_MEMORY_NONE != subtag);
9227         lck_spin_lock(&vm_allocation_sites_lock);
9228         for (; subidx < allocation->subtotalscount; subidx++) {
9229                 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag) {
9230                         allocation->subtotals[subidx].tag = subtag;
9231                         break;
9232                 }
9233                 if (subtag == allocation->subtotals[subidx].tag) {
9234                         break;
9235                 }
9236         }
9237         lck_spin_unlock(&vm_allocation_sites_lock);
9238         assert(subidx < allocation->subtotalscount);
9239         if (subidx >= allocation->subtotalscount) {
9240                 return;
9241         }
9242
9243         total = &allocation->subtotals[subidx];
9244         other = vm_allocation_sites[subtag];
9245         assert(other);
9246
9247         if (delta < 0) {
9248                 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
9249                 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
9250         }
9251         OSAddAtomic64(delta, &other->mapped);
9252         OSAddAtomic64(delta, &total->total);
9253 }
9254
9255 const char *
9256 kern_allocation_get_name(kern_allocation_name_t allocation)
9257 {
9258         return KA_NAME(allocation);
9259 }
9260
9261 kern_allocation_name_t
9262 kern_allocation_name_allocate(const char * name, uint32_t subtotalscount)
9263 {
9264         uint32_t namelen;
9265
9266         namelen = (uint32_t) strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
9267
9268         kern_allocation_name_t allocation;
9269         allocation = kalloc(KA_SIZE(namelen, subtotalscount));
9270         bzero(allocation, KA_SIZE(namelen, subtotalscount));
9271
9272         allocation->refcount       = 1;
9273         allocation->subtotalscount = subtotalscount;
9274         allocation->flags          = (namelen << VM_TAG_NAME_LEN_SHIFT);
9275         strlcpy(KA_NAME(allocation), name, namelen + 1);
9276
9277         return allocation;
9278 }
9279
9280 void
9281 kern_allocation_name_release(kern_allocation_name_t allocation)
9282 {
9283         assert(allocation->refcount > 0);
9284         if (1 == OSAddAtomic16(-1, &allocation->refcount)) {
9285                 kfree(allocation, KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
9286         }
9287 }
9288
9289 vm_tag_t
9290 kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
9291 {
9292         return vm_tag_alloc(allocation);
9293 }
9294
9295 #if !VM_TAG_ACTIVE_UPDATE
9296 static void
9297 vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
9298 {
9299         if (!object->wired_page_count) {
9300                 return;
9301         }
9302         if (object != kernel_object) {
9303                 assert(object->wire_tag < num_info);
9304                 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
9305         }
9306 }
9307
9308 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
9309     unsigned int num_info, vm_object_t object);
9310
9311 static void
9312 vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
9313     vm_page_iterate_proc proc, purgeable_q_t queue,
9314     int group)
9315 {
9316         vm_object_t object;
9317
9318         for (object = (vm_object_t) queue_first(&queue->objq[group]);
9319             !queue_end(&queue->objq[group], (queue_entry_t) object);
9320             object = (vm_object_t) queue_next(&object->objq)) {
9321                 proc(info, num_info, object);
9322         }
9323 }
9324
9325 static void
9326 vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
9327     vm_page_iterate_proc proc)
9328 {
9329         vm_object_t     object;
9330
9331         lck_spin_lock_grp(&vm_objects_wired_lock, &vm_page_lck_grp_bucket);
9332         queue_iterate(&vm_objects_wired,
9333             object,
9334             vm_object_t,
9335             wired_objq)
9336         {
9337                 proc(info, num_info, object);
9338         }
9339         lck_spin_unlock(&vm_objects_wired_lock);
9340 }
9341 #endif /* ! VM_TAG_ACTIVE_UPDATE */
9342
9343 static uint64_t
9344 process_account(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, boolean_t iterated)
9345 {
9346         size_t                 namelen;
9347         unsigned int           idx, count, nextinfo;
9348         vm_allocation_site_t * site;
9349         lck_spin_lock(&vm_allocation_sites_lock);
9350
9351         for (idx = 0; idx <= vm_allocation_tag_highest; idx++) {
9352                 site = vm_allocation_sites[idx];
9353                 if (!site) {
9354                         continue;
9355                 }
9356                 info[idx].mapped = site->mapped;
9357                 info[idx].tag    = site->tag;
9358                 if (!iterated) {
9359                         info[idx].size = site->total;
9360 #if DEBUG || DEVELOPMENT
9361                         info[idx].peak = site->peak;
9362 #endif /* DEBUG || DEVELOPMENT */
9363                 } else {
9364                         if (!site->subtotalscount && (site->total != info[idx].size)) {
9365                                 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
9366                                 info[idx].size = site->total;
9367                         }
9368                 }
9369                 info[idx].flags |= VM_KERN_SITE_WIRED;
9370                 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC) {
9371                         info[idx].site   = idx;
9372                         info[idx].flags |= VM_KERN_SITE_TAG;
9373                         if (VM_KERN_MEMORY_ZONE == idx) {
9374                                 info[idx].flags |= VM_KERN_SITE_HIDE;
9375                                 info[idx].flags &= ~VM_KERN_SITE_WIRED;
9376                                 info[idx].collectable_bytes = zones_collectable_bytes;
9377                         }
9378                 } else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT)))) {
9379                         info[idx].site   = 0;
9380                         info[idx].flags |= VM_KERN_SITE_NAMED;
9381                         if (namelen > sizeof(info[idx].name)) {
9382                                 namelen = sizeof(info[idx].name);
9383                         }
9384                         strncpy(&info[idx].name[0], KA_NAME(site), namelen);
9385                 } else if (VM_TAG_KMOD & site->flags) {
9386                         info[idx].site   = OSKextGetKmodIDForSite(site, NULL, 0);
9387                         info[idx].flags |= VM_KERN_SITE_KMOD;
9388                 } else {
9389                         info[idx].site   = VM_KERNEL_UNSLIDE(site);
9390                         info[idx].flags |= VM_KERN_SITE_KERNEL;
9391                 }
9392         }
9393
9394         nextinfo = (vm_allocation_tag_highest + 1);
9395         count    = nextinfo;
9396         if (count >= num_info) {
9397                 count = num_info;
9398         }
9399
9400         for (idx = 0; idx < count; idx++) {
9401                 site = vm_allocation_sites[idx];
9402                 if (!site) {
9403                         continue;
9404                 }
9405 #if VM_MAX_TAG_ZONES
9406                 vm_allocation_zone_total_t * zone;
9407                 unsigned int                 zidx;
9408                 vm_size_t                    elem_size;
9409
9410                 if (vm_allocation_zone_totals
9411                     && (zone = vm_allocation_zone_totals[idx])
9412                     && (nextinfo < num_info)) {
9413                         for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) {
9414                                 if (!zone[zidx].peak) {
9415                                         continue;
9416                                 }
9417                                 info[nextinfo]                   = info[idx];
9418                                 info[nextinfo].zone              = zone_index_from_tag_index(zidx, &elem_size);
9419                                 info[nextinfo].flags            &= ~VM_KERN_SITE_WIRED;
9420                                 info[nextinfo].flags            |= VM_KERN_SITE_ZONE;
9421                                 info[nextinfo].size              = zone[zidx].total;
9422                                 info[nextinfo].peak              = zone[zidx].peak;
9423                                 info[nextinfo].mapped            = 0;
9424                                 if (zone[zidx].wastediv) {
9425                                         info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
9426                                 }
9427                                 nextinfo++;
9428                         }
9429                 }
9430 #endif /* VM_MAX_TAG_ZONES */
9431                 if (site->subtotalscount) {
9432                         uint64_t mapped, mapcost, take;
9433                         uint32_t sub;
9434                         vm_tag_t alloctag;
9435
9436                         info[idx].size = site->total;
9437                         mapped = info[idx].size;
9438                         info[idx].mapped = mapped;
9439                         mapcost = 0;
9440                         for (sub = 0; sub < site->subtotalscount; sub++) {
9441                                 alloctag = site->subtotals[sub].tag;
9442                                 assert(alloctag < num_info);
9443                                 if (info[alloctag].name[0]) {
9444                                         continue;
9445                                 }
9446                                 take = site->subtotals[sub].total;
9447                                 if (take > info[alloctag].size) {
9448                                         take = info[alloctag].size;
9449                                 }
9450                                 if (take > mapped) {
9451                                         take = mapped;
9452                                 }
9453                                 info[alloctag].mapped  -= take;
9454                                 info[alloctag].size    -= take;
9455                                 mapped                 -= take;
9456                                 mapcost                += take;
9457                         }
9458                         info[idx].size = mapcost;
9459                 }
9460         }
9461         lck_spin_unlock(&vm_allocation_sites_lock);
9462
9463         return 0;
9464 }
9465
9466 uint32_t
9467 vm_page_diagnose_estimate(void)
9468 {
9469         vm_allocation_site_t * site;
9470         uint32_t               count;
9471         uint32_t               idx;
9472
9473         lck_spin_lock(&vm_allocation_sites_lock);
9474         for (count = idx = 0; idx < VM_MAX_TAG_VALUE; idx++) {
9475                 site = vm_allocation_sites[idx];
9476                 if (!site) {
9477                         continue;
9478                 }
9479                 count++;
9480 #if VM_MAX_TAG_ZONES
9481                 if (vm_allocation_zone_totals) {
9482                         vm_allocation_zone_total_t * zone;
9483                         zone = vm_allocation_zone_totals[idx];
9484                         if (!zone) {
9485                                 continue;
9486                         }
9487                         for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) {
9488                                 if (zone[zidx].peak) {
9489                                         count++;
9490                                 }
9491                         }
9492                 }
9493 #endif
9494         }
9495         lck_spin_unlock(&vm_allocation_sites_lock);
9496
9497         /* some slop for new tags created */
9498         count += 8;
9499         count += VM_KERN_COUNTER_COUNT;
9500
9501         return count;
9502 }
9503
9504 kern_return_t
9505 vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
9506 {
9507         uint64_t                 wired_size;
9508         uint64_t                 wired_managed_size;
9509         uint64_t                 wired_reserved_size;
9510         boolean_t                iterate;
9511         mach_memory_info_t     * counts;
9512
9513         bzero(info, num_info * sizeof(mach_memory_info_t));
9514
9515         if (!vm_page_wire_count_initial) {
9516                 return KERN_ABORTED;
9517         }
9518
9519 #if CONFIG_EMBEDDED
9520         wired_size          = ptoa_64(vm_page_wire_count);
9521         wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
9522 #else
9523         wired_size          = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
9524         wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
9525 #endif
9526         wired_managed_size  = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
9527
9528         wired_size += booter_size;
9529
9530         assert(num_info >= VM_KERN_COUNTER_COUNT);
9531         num_info -= VM_KERN_COUNTER_COUNT;
9532         counts = &info[num_info];
9533
9534 #define SET_COUNT(xcount, xsize, xflags)                        \
9535     counts[xcount].tag   = VM_MAX_TAG_VALUE + xcount;   \
9536     counts[xcount].site  = (xcount);                            \
9537     counts[xcount].size  = (xsize);                                 \
9538     counts[xcount].mapped  = (xsize);                           \
9539     counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
9540
9541         SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
9542         SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
9543         SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
9544         SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
9545         SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
9546         SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
9547         SET_COUNT(VM_KERN_COUNT_WIRED_BOOT, ptoa_64(vm_page_wire_count_on_boot), 0);
9548         SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN, booter_size, VM_KERN_SITE_WIRED);
9549
9550 #define SET_MAP(xcount, xsize, xfree, xlargest) \
9551     counts[xcount].site    = (xcount);                  \
9552     counts[xcount].size    = (xsize);                   \
9553     counts[xcount].mapped  = (xsize);                   \
9554     counts[xcount].free    = (xfree);                   \
9555     counts[xcount].largest = (xlargest);                \
9556     counts[xcount].flags   = VM_KERN_SITE_COUNTER;
9557
9558         vm_map_size_t map_size, map_free, map_largest;
9559
9560         vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
9561         SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
9562
9563         vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
9564         SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
9565
9566         vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
9567         SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
9568
9569         iterate = !VM_TAG_ACTIVE_UPDATE;
9570         if (iterate) {
9571                 enum                       { kMaxKernelDepth = 1 };
9572                 vm_map_t                     maps[kMaxKernelDepth];
9573                 vm_map_entry_t               entries[kMaxKernelDepth];
9574                 vm_map_t                     map;
9575                 vm_map_entry_t               entry;
9576                 vm_object_offset_t           offset;
9577                 vm_page_t                    page;
9578                 int                          stackIdx, count;
9579
9580 #if !VM_TAG_ACTIVE_UPDATE
9581                 vm_page_iterate_objects(info, num_info, &vm_page_count_object);
9582 #endif /* ! VM_TAG_ACTIVE_UPDATE */
9583
9584                 map = kernel_map;
9585                 stackIdx = 0;
9586                 while (map) {
9587                         vm_map_lock(map);
9588                         for (entry = map->hdr.links.next; map; entry = entry->links.next) {
9589                                 if (entry->is_sub_map) {
9590                                         assert(stackIdx < kMaxKernelDepth);
9591                                         maps[stackIdx] = map;
9592                                         entries[stackIdx] = entry;
9593                                         stackIdx++;
9594                                         map = VME_SUBMAP(entry);
9595                                         entry = NULL;
9596                                         break;
9597                                 }
9598                                 if (VME_OBJECT(entry) == kernel_object) {
9599                                         count = 0;
9600                                         vm_object_lock(VME_OBJECT(entry));
9601                                         for (offset = entry->links.start; offset < entry->links.end; offset += page_size) {
9602                                                 page = vm_page_lookup(VME_OBJECT(entry), offset);
9603                                                 if (page && VM_PAGE_WIRED(page)) {
9604                                                         count++;
9605                                                 }
9606                                         }
9607                                         vm_object_unlock(VME_OBJECT(entry));
9608
9609                                         if (count) {
9610                                                 assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
9611                                                 assert(VME_ALIAS(entry) < num_info);
9612                                                 info[VME_ALIAS(entry)].size += ptoa_64(count);
9613                                         }
9614                                 }
9615                                 while (map && (entry == vm_map_last_entry(map))) {
9616                                         vm_map_unlock(map);
9617                                         if (!stackIdx) {
9618                                                 map = NULL;
9619                                         } else {
9620                                                 --stackIdx;
9621                                                 map = maps[stackIdx];
9622                                                 entry = entries[stackIdx];
9623                                         }
9624                                 }
9625                         }
9626                 }
9627         }
9628
9629         process_account(info, num_info, zones_collectable_bytes, iterate);
9630
9631         return KERN_SUCCESS;
9632 }
9633
9634 #if DEBUG || DEVELOPMENT
9635
9636 kern_return_t
9637 vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
9638 {
9639         kern_return_t  ret;
9640         vm_size_t      zsize;
9641         vm_map_t       map;
9642         vm_map_entry_t entry;
9643
9644         zsize = zone_element_info((void *) addr, tag);
9645         if (zsize) {
9646                 *zone_size = *size = zsize;
9647                 return KERN_SUCCESS;
9648         }
9649
9650         *zone_size = 0;
9651         ret = KERN_INVALID_ADDRESS;
9652         for (map = kernel_map; map;) {
9653                 vm_map_lock(map);
9654                 if (!vm_map_lookup_entry(map, addr, &entry)) {
9655                         break;
9656                 }
9657                 if (entry->is_sub_map) {
9658                         if (map != kernel_map) {
9659                                 break;
9660                         }
9661                         map = VME_SUBMAP(entry);
9662                         continue;
9663                 }
9664                 if (entry->vme_start != addr) {
9665                         break;
9666                 }
9667                 *tag = VME_ALIAS(entry);
9668                 *size = (entry->vme_end - addr);
9669                 ret = KERN_SUCCESS;
9670                 break;
9671         }
9672         if (map != kernel_map) {
9673                 vm_map_unlock(map);
9674         }
9675         vm_map_unlock(kernel_map);
9676
9677         return ret;
9678 }
9679
9680 #endif /* DEBUG || DEVELOPMENT */
9681
9682 uint32_t
9683 vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
9684 {
9685         vm_allocation_site_t * site;
9686         uint32_t               kmodId;
9687
9688         kmodId = 0;
9689         lck_spin_lock(&vm_allocation_sites_lock);
9690         if ((site = vm_allocation_sites[tag])) {
9691                 if (VM_TAG_KMOD & site->flags) {
9692                         kmodId = OSKextGetKmodIDForSite(site, name, namelen);
9693                 }
9694         }
9695         lck_spin_unlock(&vm_allocation_sites_lock);
9696
9697         return kmodId;
9698 }
9699
9700
9701 #if CONFIG_SECLUDED_MEMORY
9702 /*
9703  * Note that there's no locking around other accesses to vm_page_secluded_target.
9704  * That should be OK, since these are the only place where it can be changed after
9705  * initialization. Other users (like vm_pageout) may see the wrong value briefly,
9706  * but will eventually get the correct value. This brief mismatch is OK as pageout
9707  * and page freeing will auto-adjust the vm_page_secluded_count to match the target
9708  * over time.
9709  */
9710 unsigned int vm_page_secluded_suppress_cnt = 0;
9711 unsigned int vm_page_secluded_save_target;
9712
9713
9714 lck_grp_attr_t  secluded_suppress_slock_grp_attr;
9715 lck_grp_t       secluded_suppress_slock_grp;
9716 lck_attr_t      secluded_suppress_slock_attr;
9717 lck_spin_t      secluded_suppress_slock;
9718
9719 void
9720 secluded_suppression_init(void)
9721 {
9722         lck_grp_attr_setdefault(&secluded_suppress_slock_grp_attr);
9723         lck_grp_init(&secluded_suppress_slock_grp,
9724             "secluded_suppress_slock", &secluded_suppress_slock_grp_attr);
9725         lck_attr_setdefault(&secluded_suppress_slock_attr);
9726         lck_spin_init(&secluded_suppress_slock,
9727             &secluded_suppress_slock_grp, &secluded_suppress_slock_attr);
9728 }
9729
9730 void
9731 start_secluded_suppression(task_t task)
9732 {
9733         if (task->task_suppressed_secluded) {
9734                 return;
9735         }
9736         lck_spin_lock(&secluded_suppress_slock);
9737         if (!task->task_suppressed_secluded && vm_page_secluded_suppress_cnt++ == 0) {
9738                 task->task_suppressed_secluded = TRUE;
9739                 vm_page_secluded_save_target = vm_page_secluded_target;
9740                 vm_page_secluded_target = 0;
9741                 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
9742         }
9743         lck_spin_unlock(&secluded_suppress_slock);
9744 }
9745
9746 void
9747 stop_secluded_suppression(task_t task)
9748 {
9749         lck_spin_lock(&secluded_suppress_slock);
9750         if (task->task_suppressed_secluded && --vm_page_secluded_suppress_cnt == 0) {
9751                 task->task_suppressed_secluded = FALSE;
9752                 vm_page_secluded_target = vm_page_secluded_save_target;
9753                 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
9754         }
9755         lck_spin_unlock(&secluded_suppress_slock);
9756 }
9757
9758 #endif /* CONFIG_SECLUDED_MEMORY */