osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67 #include <libkern/OSDebug.h>
  68
  69 #include <mach/clock_types.h>
  70 #include <mach/vm_prot.h>
  71 #include <mach/vm_statistics.h>
  72 #include <mach/sdt.h>
  73 #include <kern/counters.h>
  74 #include <kern/sched_prim.h>
  75 #include <kern/policy_internal.h>
  76 #include <kern/task.h>
  77 #include <kern/thread.h>
  78 #include <kern/kalloc.h>
  79 #include <kern/zalloc.h>
  80 #include <kern/xpr.h>
  81 #include <kern/ledger.h>
  82 #include <vm/pmap.h>
  83 #include <vm/vm_init.h>
  84 #include <vm/vm_map.h>
  85 #include <vm/vm_page.h>
  86 #include <vm/vm_pageout.h>
  87 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  88 #include <kern/misc_protos.h>
  89 #include <zone_debug.h>
  90 #include <mach_debug/zone_info.h>
  91 #include <vm/cpm.h>
  92 #include <pexpert/pexpert.h>
  93 #include <san/kasan.h>
  94
  95 #include <vm/vm_protos.h>
  96 #include <vm/memory_object.h>
  97 #include <vm/vm_purgeable_internal.h>
  98 #include <vm/vm_compressor.h>
  99
 100 #if CONFIG_PHANTOM_CACHE
 101 #include <vm/vm_phantom_cache.h>
 102 #endif
 103
 104 #include <IOKit/IOHibernatePrivate.h>
 105
 106 #include <sys/kdebug.h>
 107
 108
 109 char    vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 110 char    vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 111 char    vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 112 char    vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 113
 114 #if CONFIG_SECLUDED_MEMORY
 115 struct vm_page_secluded_data vm_page_secluded;
 116 #endif /* CONFIG_SECLUDED_MEMORY */
 117
 118 boolean_t       hibernate_cleaning_in_progress = FALSE;
 119 boolean_t       vm_page_free_verify = TRUE;
 120
 121 uint32_t        vm_lopage_free_count = 0;
 122 uint32_t        vm_lopage_free_limit = 0;
 123 uint32_t        vm_lopage_lowater    = 0;
 124 boolean_t       vm_lopage_refill = FALSE;
 125 boolean_t       vm_lopage_needed = FALSE;
 126
 127 lck_mtx_ext_t   vm_page_queue_lock_ext;
 128 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 129 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 130
 131 int             speculative_age_index = 0;
 132 int             speculative_steal_index = 0;
 133 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 134
 135
 136 __private_extern__ void         vm_page_init_lck_grp(void);
 137
 138 static void             vm_page_free_prepare(vm_page_t  page);
 139 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 140
 141 static void vm_tag_init(void);
 142
 143 uint64_t        vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
 144 uint32_t        vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
 145 uint32_t        vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
 146
 147 /*
 148  *      Associated with page of user-allocatable memory is a
 149  *      page structure.
 150  */
 151
 152 /*
 153  *      These variables record the values returned by vm_page_bootstrap,
 154  *      for debugging purposes.  The implementation of pmap_steal_memory
 155  *      and pmap_startup here also uses them internally.
 156  */
 157
 158 vm_offset_t virtual_space_start;
 159 vm_offset_t virtual_space_end;
 160 uint32_t        vm_page_pages;
 161
 162 /*
 163  *      The vm_page_lookup() routine, which provides for fast
 164  *      (virtual memory object, offset) to page lookup, employs
 165  *      the following hash table.  The vm_page_{insert,remove}
 166  *      routines install and remove associations in the table.
 167  *      [This table is often called the virtual-to-physical,
 168  *      or VP, table.]
 169  */
 170 typedef struct {
 171         vm_page_packed_t page_list;
 172 #if     MACH_PAGE_HASH_STATS
 173         int             cur_count;              /* current count */
 174         int             hi_count;               /* high water mark */
 175 #endif /* MACH_PAGE_HASH_STATS */
 176 } vm_page_bucket_t;
 177
 178
 179 #define BUCKETS_PER_LOCK        16
 180
 181 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 182 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 183 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 184 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 185 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 186 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 187
 188 #ifndef VM_TAG_ACTIVE_UPDATE
 189 #error VM_TAG_ACTIVE_UPDATE
 190 #endif
 191 #ifndef VM_MAX_TAG_ZONES
 192 #error VM_MAX_TAG_ZONES
 193 #endif
 194
 195 boolean_t   vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
 196 lck_spin_t      *vm_page_bucket_locks;
 197 lck_spin_t      vm_objects_wired_lock;
 198 lck_spin_t      vm_allocation_sites_lock;
 199
 200 vm_allocation_site_t            vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
 201 vm_allocation_site_t *          vm_allocation_sites[VM_MAX_TAG_VALUE];
 202 #if VM_MAX_TAG_ZONES
 203 vm_allocation_zone_total_t **   vm_allocation_zone_totals;
 204 #endif /* VM_MAX_TAG_ZONES */
 205
 206 vm_tag_t vm_allocation_tag_highest;
 207
 208 #if VM_PAGE_BUCKETS_CHECK
 209 boolean_t vm_page_buckets_check_ready = FALSE;
 210 #if VM_PAGE_FAKE_BUCKETS
 211 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
 212 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
 213 #endif /* VM_PAGE_FAKE_BUCKETS */
 214 #endif /* VM_PAGE_BUCKETS_CHECK */
 215
 216
 217
 218 #if     MACH_PAGE_HASH_STATS
 219 /* This routine is only for debug.  It is intended to be called by
 220  * hand by a developer using a kernel debugger.  This routine prints
 221  * out vm_page_hash table statistics to the kernel debug console.
 222  */
 223 void
 224 hash_debug(void)
 225 {
 226         int     i;
 227         int     numbuckets = 0;
 228         int     highsum = 0;
 229         int     maxdepth = 0;
 230
 231         for (i = 0; i < vm_page_bucket_count; i++) {
 232                 if (vm_page_buckets[i].hi_count) {
 233                         numbuckets++;
 234                         highsum += vm_page_buckets[i].hi_count;
 235                         if (vm_page_buckets[i].hi_count > maxdepth)
 236                                 maxdepth = vm_page_buckets[i].hi_count;
 237                 }
 238         }
 239         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 240         printf("Number used buckets:     %d = %d%%\n",
 241                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 242         printf("Number unused buckets:   %d = %d%%\n",
 243                 vm_page_bucket_count - numbuckets,
 244                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 245         printf("Sum of bucket max depth: %d\n", highsum);
 246         printf("Average bucket depth:    %d.%2d\n",
 247                 highsum/vm_page_bucket_count,
 248                 highsum%vm_page_bucket_count);
 249         printf("Maximum bucket depth:    %d\n", maxdepth);
 250 }
 251 #endif /* MACH_PAGE_HASH_STATS */
 252
 253 /*
 254  *      The virtual page size is currently implemented as a runtime
 255  *      variable, but is constant once initialized using vm_set_page_size.
 256  *      This initialization must be done in the machine-dependent
 257  *      bootstrap sequence, before calling other machine-independent
 258  *      initializations.
 259  *
 260  *      All references to the virtual page size outside this
 261  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 262  *      constants.
 263  */
 264 #if defined(__arm__) || defined(__arm64__)
 265 vm_size_t       page_size;
 266 vm_size_t       page_mask;
 267 int             page_shift;
 268 #else
 269 vm_size_t       page_size  = PAGE_SIZE;
 270 vm_size_t       page_mask  = PAGE_MASK;
 271 int             page_shift = PAGE_SHIFT;
 272 #endif
 273
 274 /*
 275  *      Resident page structures are initialized from
 276  *      a template (see vm_page_alloc).
 277  *
 278  *      When adding a new field to the virtual memory
 279  *      object structure, be sure to add initialization
 280  *      (see vm_page_bootstrap).
 281  */
 282 struct vm_page  vm_page_template;
 283
 284 vm_page_t       vm_pages = VM_PAGE_NULL;
 285 vm_page_t       vm_page_array_beginning_addr;
 286 vm_page_t       vm_page_array_ending_addr;
 287 vm_page_t       vm_page_array_boundary;
 288
 289 unsigned int    vm_pages_count = 0;
 290 ppnum_t         vm_page_lowest = 0;
 291
 292 /*
 293  *      Resident pages that represent real memory
 294  *      are allocated from a set of free lists,
 295  *      one per color.
 296  */
 297 unsigned int    vm_colors;
 298 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 299 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 300 unsigned int    vm_free_magazine_refill_limit = 0;
 301
 302
 303 struct vm_page_queue_free_head {
 304         vm_page_queue_head_t    qhead;
 305 } __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 306
 307 struct vm_page_queue_free_head  vm_page_queue_free[MAX_COLORS];
 308
 309
 310 unsigned int    vm_page_free_wanted;
 311 unsigned int    vm_page_free_wanted_privileged;
 312 #if CONFIG_SECLUDED_MEMORY
 313 unsigned int    vm_page_free_wanted_secluded;
 314 #endif /* CONFIG_SECLUDED_MEMORY */
 315 unsigned int    vm_page_free_count;
 316
 317 /*
 318  *      Occasionally, the virtual memory system uses
 319  *      resident page structures that do not refer to
 320  *      real pages, for example to leave a page with
 321  *      important state information in the VP table.
 322  *
 323  *      These page structures are allocated the way
 324  *      most other kernel structures are.
 325  */
 326 zone_t  vm_page_array_zone;
 327 zone_t  vm_page_zone;
 328 vm_locks_array_t vm_page_locks;
 329 decl_lck_mtx_data(,vm_page_alloc_lock)
 330 lck_mtx_ext_t vm_page_alloc_lock_ext;
 331
 332 unsigned int io_throttle_zero_fill;
 333
 334 unsigned int    vm_page_local_q_count = 0;
 335 unsigned int    vm_page_local_q_soft_limit = 250;
 336 unsigned int    vm_page_local_q_hard_limit = 500;
 337 struct vplq     *vm_page_local_q = NULL;
 338
 339 /* N.B. Guard and fictitious pages must not
 340  * be assigned a zero phys_page value.
 341  */
 342 /*
 343  *      Fictitious pages don't have a physical address,
 344  *      but we must initialize phys_page to something.
 345  *      For debugging, this should be a strange value
 346  *      that the pmap module can recognize in assertions.
 347  */
 348 const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 349
 350 /*
 351  *      Guard pages are not accessible so they don't
 352  *      need a physical address, but we need to enter
 353  *      one in the pmap.
 354  *      Let's make it recognizable and make sure that
 355  *      we don't use a real physical page with that
 356  *      physical address.
 357  */
 358 const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 359
 360 /*
 361  *      Resident page structures are also chained on
 362  *      queues that are used by the page replacement
 363  *      system (pageout daemon).  These queues are
 364  *      defined here, but are shared by the pageout
 365  *      module.  The inactive queue is broken into
 366  *      file backed and anonymous for convenience as the
 367  *      pageout daemon often assignes a higher
 368  *      importance to anonymous pages (less likely to pick)
 369  */
 370 vm_page_queue_head_t    vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 371 vm_page_queue_head_t    vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 372 #if CONFIG_SECLUDED_MEMORY
 373 vm_page_queue_head_t    vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 374 #endif /* CONFIG_SECLUDED_MEMORY */
 375 vm_page_queue_head_t    vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));  /* inactive memory queue for anonymous pages */
 376 vm_page_queue_head_t    vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 377
 378 queue_head_t    vm_objects_wired;
 379
 380 #if CONFIG_BACKGROUND_QUEUE
 381 vm_page_queue_head_t    vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 382 uint32_t        vm_page_background_target;
 383 uint32_t        vm_page_background_count;
 384 uint64_t        vm_page_background_promoted_count;
 385
 386 uint32_t        vm_page_background_internal_count;
 387 uint32_t        vm_page_background_external_count;
 388
 389 uint32_t        vm_page_background_mode;
 390 uint32_t        vm_page_background_exclude_external;
 391 #endif
 392
 393 unsigned int    vm_page_active_count;
 394 unsigned int    vm_page_inactive_count;
 395 #if CONFIG_SECLUDED_MEMORY
 396 unsigned int    vm_page_secluded_count;
 397 unsigned int    vm_page_secluded_count_free;
 398 unsigned int    vm_page_secluded_count_inuse;
 399 #endif /* CONFIG_SECLUDED_MEMORY */
 400 unsigned int    vm_page_anonymous_count;
 401 unsigned int    vm_page_throttled_count;
 402 unsigned int    vm_page_speculative_count;
 403
 404 unsigned int    vm_page_wire_count;
 405 unsigned int    vm_page_wire_count_on_boot = 0;
 406 unsigned int    vm_page_stolen_count;
 407 unsigned int    vm_page_wire_count_initial;
 408 unsigned int    vm_page_pages_initial;
 409 unsigned int    vm_page_gobble_count = 0;
 410
 411 #define VM_PAGE_WIRE_COUNT_WARNING      0
 412 #define VM_PAGE_GOBBLE_COUNT_WARNING    0
 413
 414 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 415 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 416 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 417
 418 unsigned int    vm_page_xpmapped_external_count = 0;
 419 unsigned int    vm_page_external_count = 0;
 420 unsigned int    vm_page_internal_count = 0;
 421 unsigned int    vm_page_pageable_external_count = 0;
 422 unsigned int    vm_page_pageable_internal_count = 0;
 423
 424 #if DEVELOPMENT || DEBUG
 425 unsigned int    vm_page_speculative_recreated = 0;
 426 unsigned int    vm_page_speculative_created = 0;
 427 unsigned int    vm_page_speculative_used = 0;
 428 #endif
 429
 430 vm_page_queue_head_t    vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 431
 432 unsigned int    vm_page_cleaned_count = 0;
 433 unsigned int    vm_pageout_enqueued_cleaned = 0;
 434
 435 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 436 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 437
 438
 439 /*
 440  *      Several page replacement parameters are also
 441  *      shared with this module, so that page allocation
 442  *      (done here in vm_page_alloc) can trigger the
 443  *      pageout daemon.
 444  */
 445 unsigned int    vm_page_free_target = 0;
 446 unsigned int    vm_page_free_min = 0;
 447 unsigned int    vm_page_throttle_limit = 0;
 448 unsigned int    vm_page_inactive_target = 0;
 449 #if CONFIG_SECLUDED_MEMORY
 450 unsigned int    vm_page_secluded_target = 0;
 451 #endif /* CONFIG_SECLUDED_MEMORY */
 452 unsigned int    vm_page_anonymous_min = 0;
 453 unsigned int    vm_page_inactive_min = 0;
 454 unsigned int    vm_page_free_reserved = 0;
 455 unsigned int    vm_page_throttle_count = 0;
 456
 457
 458 /*
 459  *      The VM system has a couple of heuristics for deciding
 460  *      that pages are "uninteresting" and should be placed
 461  *      on the inactive queue as likely candidates for replacement.
 462  *      These variables let the heuristics be controlled at run-time
 463  *      to make experimentation easier.
 464  */
 465
 466 boolean_t vm_page_deactivate_hint = TRUE;
 467
 468 struct vm_page_stats_reusable vm_page_stats_reusable;
 469
 470 /*
 471  *      vm_set_page_size:
 472  *
 473  *      Sets the page size, perhaps based upon the memory
 474  *      size.  Must be called before any use of page-size
 475  *      dependent functions.
 476  *
 477  *      Sets page_shift and page_mask from page_size.
 478  */
 479 void
 480 vm_set_page_size(void)
 481 {
 482         page_size  = PAGE_SIZE;
 483         page_mask  = PAGE_MASK;
 484         page_shift = PAGE_SHIFT;
 485
 486         if ((page_mask & page_size) != 0)
 487                 panic("vm_set_page_size: page size not a power of two");
 488
 489         for (page_shift = 0; ; page_shift++)
 490                 if ((1U << page_shift) == page_size)
 491                         break;
 492 }
 493
 494 #if defined (__x86_64__)
 495
 496 #define MAX_CLUMP_SIZE      16
 497 #define DEFAULT_CLUMP_SIZE  4
 498
 499 unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
 500
 501 #if DEVELOPMENT || DEBUG
 502 unsigned long vm_clump_stats[MAX_CLUMP_SIZE+1];
 503 unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
 504
 505 static inline void vm_clump_update_stats(unsigned int c) {
 506     assert(c<=vm_clump_size);
 507     if(c>0 && c<=vm_clump_size) vm_clump_stats[c]+=c;
 508     vm_clump_allocs+=c;
 509 }
 510 #endif  /*  if DEVELOPMENT || DEBUG */
 511
 512 /* Called once to setup the VM clump knobs */
 513 static void
 514 vm_page_setup_clump( void )
 515 {
 516     unsigned int override, n;
 517
 518     vm_clump_size = DEFAULT_CLUMP_SIZE;
 519     if ( PE_parse_boot_argn("clump_size", &override, sizeof (override)) ) vm_clump_size = override;
 520
 521     if(vm_clump_size > MAX_CLUMP_SIZE) panic("vm_page_setup_clump:: clump_size is too large!");
 522     if(vm_clump_size < 1) panic("vm_page_setup_clump:: clump_size must be >= 1");
 523     if((vm_clump_size & (vm_clump_size-1)) != 0)  panic("vm_page_setup_clump:: clump_size must be a power of 2");
 524
 525     vm_clump_promote_threshold = vm_clump_size;
 526     vm_clump_mask = vm_clump_size - 1;
 527     for(vm_clump_shift=0, n=vm_clump_size; n>1; n>>=1, vm_clump_shift++);
 528
 529 #if DEVELOPMENT || DEBUG
 530     bzero(vm_clump_stats, sizeof(vm_clump_stats));
 531     vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
 532 #endif  /*  if DEVELOPMENT || DEBUG */
 533 }
 534
 535 #endif  /* #if defined (__x86_64__) */
 536
 537 #define COLOR_GROUPS_TO_STEAL   4
 538
 539 /* Called once during statup, once the cache geometry is known.
 540  */
 541 static void
 542 vm_page_set_colors( void )
 543 {
 544         unsigned int    n, override;
 545
 546 #if defined (__x86_64__)
 547         /* adjust #colors because we need to color outside the clump boundary */
 548         vm_cache_geometry_colors >>= vm_clump_shift;
 549 #endif
 550         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 551                 n = override;
 552         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 553                 n = vm_cache_geometry_colors;
 554         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 555
 556         if ( n == 0 )
 557                 n = 1;
 558         if ( n > MAX_COLORS )
 559                 n = MAX_COLORS;
 560
 561         /* the count must be a power of 2  */
 562         if ( ( n & (n - 1)) != 0  )
 563                 n = DEFAULT_COLORS;                             /* use default if all else fails */
 564
 565         vm_colors = n;
 566         vm_color_mask = n - 1;
 567
 568         vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
 569
 570 #if defined (__x86_64__)
 571         /* adjust for reduction in colors due to clumping and multiple cores */
 572         if (real_ncpus)
 573                 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
 574 #endif
 575 }
 576
 577
 578 lck_grp_t               vm_page_lck_grp_free;
 579 lck_grp_t               vm_page_lck_grp_queue;
 580 lck_grp_t               vm_page_lck_grp_local;
 581 lck_grp_t               vm_page_lck_grp_purge;
 582 lck_grp_t               vm_page_lck_grp_alloc;
 583 lck_grp_t               vm_page_lck_grp_bucket;
 584 lck_grp_attr_t          vm_page_lck_grp_attr;
 585 lck_attr_t              vm_page_lck_attr;
 586
 587
 588 __private_extern__ void
 589 vm_page_init_lck_grp(void)
 590 {
 591         /*
 592          * initialze the vm_page lock world
 593          */
 594         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 595         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 596         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 597         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 598         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 599         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 600         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 601         lck_attr_setdefault(&vm_page_lck_attr);
 602         lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 603
 604         vm_compressor_init_locks();
 605 }
 606
 607 #define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
 608
 609 void
 610 vm_page_init_local_q()
 611 {
 612         unsigned int            num_cpus;
 613         unsigned int            i;
 614         struct vplq             *t_local_q;
 615
 616         num_cpus = ml_get_max_cpus();
 617
 618         /*
 619          * no point in this for a uni-processor system
 620          */
 621         if (num_cpus >= 2) {
 622 #if KASAN
 623                 /* KASAN breaks the expectation of a size-aligned object by adding a
 624                  * redzone, so explicitly align. */
 625                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq) + VM_PACKED_POINTER_ALIGNMENT);
 626                 t_local_q = (void *)(((uintptr_t)t_local_q + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT-1));
 627 #else
 628                 /* round the size up to the nearest power of two */
 629                 t_local_q = (struct vplq *)kalloc(ROUNDUP_NEXTP2(num_cpus * sizeof(struct vplq)));
 630 #endif
 631
 632                 for (i = 0; i < num_cpus; i++) {
 633                         struct vpl      *lq;
 634
 635                         lq = &t_local_q[i].vpl_un.vpl;
 636                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 637                         vm_page_queue_init(&lq->vpl_queue);
 638                         lq->vpl_count = 0;
 639                         lq->vpl_internal_count = 0;
 640                         lq->vpl_external_count = 0;
 641                 }
 642                 vm_page_local_q_count = num_cpus;
 643
 644                 vm_page_local_q = (struct vplq *)t_local_q;
 645         }
 646 }
 647
 648 /*
 649  * vm_init_before_launchd
 650  *
 651  * This should be called right before launchd is loaded.
 652  */
 653 void
 654 vm_init_before_launchd()
 655 {
 656         vm_page_wire_count_on_boot = vm_page_wire_count;
 657 }
 658
 659
 660 /*
 661  *      vm_page_bootstrap:
 662  *
 663  *      Initializes the resident memory module.
 664  *
 665  *      Allocates memory for the page cells, and
 666  *      for the object/offset-to-page hash table headers.
 667  *      Each page cell is initialized and placed on the free list.
 668  *      Returns the range of available kernel virtual memory.
 669  */
 670
 671 void
 672 vm_page_bootstrap(
 673         vm_offset_t             *startp,
 674         vm_offset_t             *endp)
 675 {
 676         vm_page_t               m;
 677         unsigned int            i;
 678         unsigned int            log1;
 679         unsigned int            log2;
 680         unsigned int            size;
 681
 682         /*
 683          *      Initialize the vm_page template.
 684          */
 685
 686         m = &vm_page_template;
 687         bzero(m, sizeof (*m));
 688
 689 #if CONFIG_BACKGROUND_QUEUE
 690         m->vm_page_backgroundq.next = 0;
 691         m->vm_page_backgroundq.prev = 0;
 692         m->vm_page_in_background = FALSE;
 693         m->vm_page_on_backgroundq = FALSE;
 694 #endif
 695
 696         VM_PAGE_ZERO_PAGEQ_ENTRY(m);
 697         m->listq.next = 0;
 698         m->listq.prev = 0;
 699         m->next_m = 0;
 700
 701         m->vm_page_object = 0;                  /* reset later */
 702         m->offset = (vm_object_offset_t) -1;    /* reset later */
 703
 704         m->wire_count = 0;
 705         m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
 706         m->laundry = FALSE;
 707         m->reference = FALSE;
 708         m->gobbled = FALSE;
 709         m->private = FALSE;
 710         m->__unused_pageq_bits = 0;
 711
 712 #if    !defined(__arm__) && !defined(__arm64__)
 713         VM_PAGE_SET_PHYS_PAGE(m, 0);            /* reset later */
 714 #endif
 715         m->busy = TRUE;
 716         m->wanted = FALSE;
 717         m->tabled = FALSE;
 718         m->hashed = FALSE;
 719         m->fictitious = FALSE;
 720         m->pmapped = FALSE;
 721         m->wpmapped = FALSE;
 722         m->free_when_done = FALSE;
 723         m->absent = FALSE;
 724         m->error = FALSE;
 725         m->dirty = FALSE;
 726         m->cleaning = FALSE;
 727         m->precious = FALSE;
 728         m->clustered = FALSE;
 729         m->overwriting = FALSE;
 730         m->restart = FALSE;
 731         m->unusual = FALSE;
 732         m->cs_validated = FALSE;
 733         m->cs_tainted = FALSE;
 734         m->cs_nx = FALSE;
 735         m->no_cache = FALSE;
 736         m->reusable = FALSE;
 737         m->slid = FALSE;
 738         m->xpmapped = FALSE;
 739         m->written_by_kernel = FALSE;
 740         m->__unused_object_bits = 0;
 741
 742         /*
 743          *      Initialize the page queues.
 744          */
 745         vm_page_init_lck_grp();
 746
 747         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 748         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 749         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 750
 751         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 752                 int group;
 753
 754                 purgeable_queues[i].token_q_head = 0;
 755                 purgeable_queues[i].token_q_tail = 0;
 756                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 757                         queue_init(&purgeable_queues[i].objq[group]);
 758
 759                 purgeable_queues[i].type = i;
 760                 purgeable_queues[i].new_pages = 0;
 761 #if MACH_ASSERT
 762                 purgeable_queues[i].debug_count_tokens = 0;
 763                 purgeable_queues[i].debug_count_objects = 0;
 764 #endif
 765         };
 766         purgeable_nonvolatile_count = 0;
 767         queue_init(&purgeable_nonvolatile_queue);
 768
 769         for (i = 0; i < MAX_COLORS; i++ )
 770                 vm_page_queue_init(&vm_page_queue_free[i].qhead);
 771
 772         vm_page_queue_init(&vm_lopage_queue_free);
 773         vm_page_queue_init(&vm_page_queue_active);
 774         vm_page_queue_init(&vm_page_queue_inactive);
 775 #if CONFIG_SECLUDED_MEMORY
 776         vm_page_queue_init(&vm_page_queue_secluded);
 777 #endif /* CONFIG_SECLUDED_MEMORY */
 778         vm_page_queue_init(&vm_page_queue_cleaned);
 779         vm_page_queue_init(&vm_page_queue_throttled);
 780         vm_page_queue_init(&vm_page_queue_anonymous);
 781         queue_init(&vm_objects_wired);
 782
 783         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 784                 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
 785
 786                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 787                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 788         }
 789 #if CONFIG_BACKGROUND_QUEUE
 790         vm_page_queue_init(&vm_page_queue_background);
 791
 792         vm_page_background_count = 0;
 793         vm_page_background_internal_count = 0;
 794         vm_page_background_external_count = 0;
 795         vm_page_background_promoted_count = 0;
 796
 797         vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
 798
 799         if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
 800                 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
 801
 802         vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
 803         vm_page_background_exclude_external = 0;
 804
 805         PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
 806         PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
 807         PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
 808
 809         if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1)
 810                 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
 811 #endif
 812         vm_page_free_wanted = 0;
 813         vm_page_free_wanted_privileged = 0;
 814 #if CONFIG_SECLUDED_MEMORY
 815         vm_page_free_wanted_secluded = 0;
 816 #endif /* CONFIG_SECLUDED_MEMORY */
 817
 818 #if defined (__x86_64__)
 819         /* this must be called before vm_page_set_colors() */
 820         vm_page_setup_clump();
 821 #endif
 822
 823         vm_page_set_colors();
 824
 825         bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
 826         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
 827         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
 828         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
 829
 830         bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
 831         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
 832         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
 833         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
 834         vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
 835         vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
 836         vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
 837 #if CONFIG_SECLUDED_MEMORY
 838         vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
 839 #endif /* CONFIG_SECLUDED_MEMORY */
 840
 841         bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
 842         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
 843         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
 844         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
 845         vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
 846         vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
 847 #if CONFIG_SECLUDED_MEMORY
 848         vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
 849 #endif /* CONFIG_SECLUDED_MEMORY */
 850
 851         bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
 852         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
 853         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
 854         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
 855         vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
 856 #if CONFIG_SECLUDED_MEMORY
 857         vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
 858 #endif /* CONFIG_SECLUDED_MEMORY */
 859
 860         for (i = 0; i < VM_KERN_MEMORY_FIRST_DYNAMIC; i++)
 861         {
 862                 vm_allocation_sites_static[i].refcount = 2;
 863                 vm_allocation_sites_static[i].tag = i;
 864                 vm_allocation_sites[i] = &vm_allocation_sites_static[i];
 865         }
 866         vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
 867         vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
 868         vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
 869
 870         /*
 871          *      Steal memory for the map and zone subsystems.
 872          */
 873 #if CONFIG_GZALLOC
 874         gzalloc_configure();
 875 #endif
 876         kernel_debug_string_early("vm_map_steal_memory");
 877         vm_map_steal_memory();
 878
 879         /*
 880          *      Allocate (and initialize) the virtual-to-physical
 881          *      table hash buckets.
 882          *
 883          *      The number of buckets should be a power of two to
 884          *      get a good hash function.  The following computation
 885          *      chooses the first power of two that is greater
 886          *      than the number of physical pages in the system.
 887          */
 888
 889         if (vm_page_bucket_count == 0) {
 890                 unsigned int npages = pmap_free_pages();
 891
 892                 vm_page_bucket_count = 1;
 893                 while (vm_page_bucket_count < npages)
 894                         vm_page_bucket_count <<= 1;
 895         }
 896         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 897
 898         vm_page_hash_mask = vm_page_bucket_count - 1;
 899
 900         /*
 901          *      Calculate object shift value for hashing algorithm:
 902          *              O = log2(sizeof(struct vm_object))
 903          *              B = log2(vm_page_bucket_count)
 904          *              hash shifts the object left by
 905          *              B/2 - O
 906          */
 907         size = vm_page_bucket_count;
 908         for (log1 = 0; size > 1; log1++)
 909                 size /= 2;
 910         size = sizeof(struct vm_object);
 911         for (log2 = 0; size > 1; log2++)
 912                 size /= 2;
 913         vm_page_hash_shift = log1/2 - log2 + 1;
 914
 915         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 916         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 917         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 918
 919         if (vm_page_hash_mask & vm_page_bucket_count)
 920                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 921
 922 #if VM_PAGE_BUCKETS_CHECK
 923 #if VM_PAGE_FAKE_BUCKETS
 924         /*
 925          * Allocate a decoy set of page buckets, to detect
 926          * any stomping there.
 927          */
 928         vm_page_fake_buckets = (vm_page_bucket_t *)
 929                 pmap_steal_memory(vm_page_bucket_count *
 930                                   sizeof(vm_page_bucket_t));
 931         vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
 932         vm_page_fake_buckets_end =
 933                 vm_map_round_page((vm_page_fake_buckets_start +
 934                                    (vm_page_bucket_count *
 935                                     sizeof (vm_page_bucket_t))),
 936                                   PAGE_MASK);
 937         char *cp;
 938         for (cp = (char *)vm_page_fake_buckets_start;
 939              cp < (char *)vm_page_fake_buckets_end;
 940              cp++) {
 941                 *cp = 0x5a;
 942         }
 943 #endif /* VM_PAGE_FAKE_BUCKETS */
 944 #endif /* VM_PAGE_BUCKETS_CHECK */
 945
 946         kernel_debug_string_early("vm_page_buckets");
 947         vm_page_buckets = (vm_page_bucket_t *)
 948                 pmap_steal_memory(vm_page_bucket_count *
 949                                   sizeof(vm_page_bucket_t));
 950
 951         kernel_debug_string_early("vm_page_bucket_locks");
 952         vm_page_bucket_locks = (lck_spin_t *)
 953                 pmap_steal_memory(vm_page_bucket_lock_count *
 954                                   sizeof(lck_spin_t));
 955
 956         for (i = 0; i < vm_page_bucket_count; i++) {
 957                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
 958
 959                 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
 960 #if     MACH_PAGE_HASH_STATS
 961                 bucket->cur_count = 0;
 962                 bucket->hi_count = 0;
 963 #endif /* MACH_PAGE_HASH_STATS */
 964         }
 965
 966         for (i = 0; i < vm_page_bucket_lock_count; i++)
 967                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 968
 969         lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 970         lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 971         vm_tag_init();
 972
 973 #if VM_PAGE_BUCKETS_CHECK
 974         vm_page_buckets_check_ready = TRUE;
 975 #endif /* VM_PAGE_BUCKETS_CHECK */
 976
 977         /*
 978          *      Machine-dependent code allocates the resident page table.
 979          *      It uses vm_page_init to initialize the page frames.
 980          *      The code also returns to us the virtual space available
 981          *      to the kernel.  We don't trust the pmap module
 982          *      to get the alignment right.
 983          */
 984
 985         kernel_debug_string_early("pmap_startup");
 986         pmap_startup(&virtual_space_start, &virtual_space_end);
 987         virtual_space_start = round_page(virtual_space_start);
 988         virtual_space_end = trunc_page(virtual_space_end);
 989
 990         *startp = virtual_space_start;
 991         *endp = virtual_space_end;
 992
 993         /*
 994          *      Compute the initial "wire" count.
 995          *      Up until now, the pages which have been set aside are not under
 996          *      the VM system's control, so although they aren't explicitly
 997          *      wired, they nonetheless can't be moved. At this moment,
 998          *      all VM managed pages are "free", courtesy of pmap_startup.
 999          */
1000         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
1001         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
1002 #if CONFIG_SECLUDED_MEMORY
1003         vm_page_wire_count -= vm_page_secluded_count;
1004 #endif
1005         vm_page_wire_count_initial = vm_page_wire_count;
1006         vm_page_pages_initial = vm_page_pages;
1007
1008         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
1009                vm_page_free_count, vm_page_wire_count);
1010
1011         kernel_debug_string_early("vm_page_bootstrap complete");
1012         simple_lock_init(&vm_paging_lock, 0);
1013 }
1014
1015 #ifndef MACHINE_PAGES
1016 /*
1017  *      We implement pmap_steal_memory and pmap_startup with the help
1018  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
1019  */
1020
1021 void *
1022 pmap_steal_memory(
1023         vm_size_t size)
1024 {
1025         kern_return_t kr;
1026         vm_offset_t addr, vaddr;
1027         ppnum_t phys_page;
1028
1029         /*
1030          *      We round the size to a round multiple.
1031          */
1032
1033         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
1034
1035         /*
1036          *      If this is the first call to pmap_steal_memory,
1037          *      we have to initialize ourself.
1038          */
1039
1040         if (virtual_space_start == virtual_space_end) {
1041                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
1042
1043                 /*
1044                  *      The initial values must be aligned properly, and
1045                  *      we don't trust the pmap module to do it right.
1046                  */
1047
1048                 virtual_space_start = round_page(virtual_space_start);
1049                 virtual_space_end = trunc_page(virtual_space_end);
1050         }
1051
1052         /*
1053          *      Allocate virtual memory for this request.
1054          */
1055
1056         addr = virtual_space_start;
1057         virtual_space_start += size;
1058
1059         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1060
1061         /*
1062          *      Allocate and map physical pages to back new virtual pages.
1063          */
1064
1065         for (vaddr = round_page(addr);
1066              vaddr < addr + size;
1067              vaddr += PAGE_SIZE) {
1068
1069                 if (!pmap_next_page_hi(&phys_page))
1070                         panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1071
1072                 /*
1073                  *      XXX Logically, these mappings should be wired,
1074                  *      but some pmap modules barf if they are.
1075                  */
1076 #if defined(__LP64__)
1077 #ifdef  __arm64__
1078                 /* ARM64_TODO: verify that we really don't need this */
1079 #else
1080                 pmap_pre_expand(kernel_pmap, vaddr);
1081 #endif
1082 #endif
1083
1084                 kr = pmap_enter(kernel_pmap, vaddr, phys_page,
1085                                 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
1086                                 VM_WIMG_USE_DEFAULT, FALSE);
1087
1088                 if (kr != KERN_SUCCESS) {
1089                         panic("pmap_steal_memory() pmap_enter failed, vaddr=%#lx, phys_page=%u",
1090                               (unsigned long)vaddr, phys_page);
1091                 }
1092
1093                 /*
1094                  * Account for newly stolen memory
1095                  */
1096                 vm_page_wire_count++;
1097                 vm_page_stolen_count++;
1098         }
1099
1100 #if KASAN
1101         kasan_notify_address(round_page(addr), size);
1102 #endif
1103         return (void *) addr;
1104 }
1105
1106 #if CONFIG_SECLUDED_MEMORY
1107 /* boot-args to control secluded memory */
1108 unsigned int secluded_mem_mb = 0;       /* # of MBs of RAM to seclude */
1109 int secluded_for_iokit = 1;             /* IOKit can use secluded memory */
1110 int secluded_for_apps = 1;              /* apps can use secluded memory */
1111 int secluded_for_filecache = 2;         /* filecache can use seclude memory */
1112 #if 11
1113 int secluded_for_fbdp = 0;
1114 #endif
1115 #endif /* CONFIG_SECLUDED_MEMORY */
1116
1117
1118 #if defined(__arm__) || defined(__arm64__)
1119 extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1120 unsigned int vm_first_phys_ppnum = 0;
1121 #endif
1122
1123
1124 void vm_page_release_startup(vm_page_t mem);
1125 void
1126 pmap_startup(
1127         vm_offset_t *startp,
1128         vm_offset_t *endp)
1129 {
1130         unsigned int i, npages, pages_initialized, fill, fillval;
1131         ppnum_t         phys_page;
1132         addr64_t        tmpaddr;
1133
1134 #if    defined(__LP64__)
1135         /*
1136          * make sure we are aligned on a 64 byte boundary
1137          * for VM_PAGE_PACK_PTR (it clips off the low-order
1138          * 6 bits of the pointer)
1139          */
1140         if (virtual_space_start != virtual_space_end)
1141                 virtual_space_start = round_page(virtual_space_start);
1142 #endif
1143
1144         /*
1145          *      We calculate how many page frames we will have
1146          *      and then allocate the page structures in one chunk.
1147          */
1148
1149         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
1150         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
1151         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1152
1153         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1154
1155         /*
1156          *      Initialize the page frames.
1157          */
1158         kernel_debug_string_early("Initialize the page frames");
1159
1160         vm_page_array_beginning_addr = &vm_pages[0];
1161         vm_page_array_ending_addr = &vm_pages[npages];
1162
1163         for (i = 0, pages_initialized = 0; i < npages; i++) {
1164                 if (!pmap_next_page(&phys_page))
1165                         break;
1166 #if defined(__arm__) || defined(__arm64__)
1167                 if (pages_initialized == 0) {
1168                         vm_first_phys_ppnum = phys_page;
1169                         patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr, (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1170                 }
1171                 assert((i + vm_first_phys_ppnum) == phys_page);
1172 #endif
1173                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1174                         vm_page_lowest = phys_page;
1175
1176                 vm_page_init(&vm_pages[i], phys_page, FALSE);
1177                 vm_page_pages++;
1178                 pages_initialized++;
1179         }
1180         vm_pages_count = pages_initialized;
1181         vm_page_array_boundary = &vm_pages[pages_initialized];
1182
1183 #if    defined(__LP64__)
1184
1185         if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
1186                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1187
1188         if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
1189                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1190 #endif
1191         kernel_debug_string_early("page fill/release");
1192         /*
1193          * Check if we want to initialize pages to a known value
1194          */
1195         fill = 0;                                                               /* Assume no fill */
1196         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
1197 #if     DEBUG
1198         /* This slows down booting the DEBUG kernel, particularly on
1199          * large memory systems, but is worthwhile in deterministically
1200          * trapping uninitialized memory usage.
1201          */
1202         if (fill == 0) {
1203                 fill = 1;
1204                 fillval = 0xDEB8F177;
1205         }
1206 #endif
1207         if (fill)
1208                 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
1209
1210 #if CONFIG_SECLUDED_MEMORY
1211         /* default: no secluded mem */
1212         secluded_mem_mb = 0;
1213         if (max_mem > 1*1024*1024*1024) {
1214                 /* default to 90MB for devices with > 1GB of RAM */
1215                 secluded_mem_mb = 90;
1216         }
1217         /* override with value from device tree, if provided */
1218         PE_get_default("kern.secluded_mem_mb",
1219                        &secluded_mem_mb, sizeof(secluded_mem_mb));
1220         /* override with value from boot-args, if provided */
1221         PE_parse_boot_argn("secluded_mem_mb",
1222                            &secluded_mem_mb,
1223                            sizeof (secluded_mem_mb));
1224
1225         vm_page_secluded_target = (unsigned int)
1226                 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1227         PE_parse_boot_argn("secluded_for_iokit",
1228                            &secluded_for_iokit,
1229                            sizeof (secluded_for_iokit));
1230         PE_parse_boot_argn("secluded_for_apps",
1231                            &secluded_for_apps,
1232                            sizeof (secluded_for_apps));
1233         PE_parse_boot_argn("secluded_for_filecache",
1234                            &secluded_for_filecache,
1235                            sizeof (secluded_for_filecache));
1236 #if 11
1237         PE_parse_boot_argn("secluded_for_fbdp",
1238                            &secluded_for_fbdp,
1239                            sizeof (secluded_for_fbdp));
1240 #endif
1241 #endif /* CONFIG_SECLUDED_MEMORY */
1242
1243         // -debug code remove
1244         if (2 == vm_himemory_mode) {
1245                 // free low -> high so high is preferred
1246                 for (i = 1; i <= pages_initialized; i++) {
1247                         if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval);            /* Fill the page with a know value if requested at boot */
1248                         vm_page_release_startup(&vm_pages[i - 1]);
1249                 }
1250         }
1251         else
1252         // debug code remove-
1253
1254         /*
1255          * Release pages in reverse order so that physical pages
1256          * initially get allocated in ascending addresses. This keeps
1257          * the devices (which must address physical memory) happy if
1258          * they require several consecutive pages.
1259          */
1260         for (i = pages_initialized; i > 0; i--) {
1261                 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval);            /* Fill the page with a know value if requested at boot */
1262                 vm_page_release_startup(&vm_pages[i - 1]);
1263         }
1264
1265         VM_CHECK_MEMORYSTATUS;
1266
1267 #if 0
1268         {
1269                 vm_page_t xx, xxo, xxl;
1270                 int i, j, k, l;
1271
1272                 j = 0;                                                                                                  /* (BRINGUP) */
1273                 xxl = 0;
1274
1275                 for( i = 0; i < vm_colors; i++ ) {
1276                         queue_iterate(&vm_page_queue_free[i].qhead,
1277                                       xx,
1278                                       vm_page_t,
1279                                       pageq) {  /* BRINGUP */
1280                                 j++;                                                                                            /* (BRINGUP) */
1281                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
1282                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
1283                                 }
1284
1285                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
1286                                 k = 0;                                                                                          /* (BRINGUP) */
1287
1288                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1289
1290                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
1291                                         k++;
1292                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1293                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
1294                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1295                                         }
1296                                 }
1297
1298                                 xxl = xx;
1299                         }
1300                 }
1301
1302                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
1303                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1304                 }
1305         }
1306 #endif
1307
1308
1309         /*
1310          *      We have to re-align virtual_space_start,
1311          *      because pmap_steal_memory has been using it.
1312          */
1313
1314         virtual_space_start = round_page(virtual_space_start);
1315
1316         *startp = virtual_space_start;
1317         *endp = virtual_space_end;
1318 }
1319 #endif  /* MACHINE_PAGES */
1320
1321 /*
1322  *      Routine:        vm_page_module_init
1323  *      Purpose:
1324  *              Second initialization pass, to be done after
1325  *              the basic VM system is ready.
1326  */
1327 void
1328 vm_page_module_init(void)
1329 {
1330         uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1331         vm_size_t       vm_page_with_ppnum_size;
1332
1333         vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1334                              0, PAGE_SIZE, "vm pages array");
1335
1336         zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1337         zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1338         zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1339         zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1340         zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
1341         /*
1342          * Adjust zone statistics to account for the real pages allocated
1343          * in vm_page_create(). [Q: is this really what we want?]
1344          */
1345         vm_page_array_zone->count += vm_page_pages;
1346         vm_page_array_zone->sum_count += vm_page_pages;
1347         vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1348         vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1349         vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1350         OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
1351         /* since zone accounts for these, take them out of stolen */
1352         VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1353
1354         vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1355
1356         vm_page_zone = zinit(vm_page_with_ppnum_size,
1357                              0, PAGE_SIZE, "vm pages");
1358
1359         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1360         zone_change(vm_page_zone, Z_EXPAND, FALSE);
1361         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1362         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1363         zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1364         zone_change(vm_page_zone, Z_ALIGNMENT_REQUIRED, TRUE);
1365 }
1366
1367 /*
1368  *      Routine:        vm_page_create
1369  *      Purpose:
1370  *              After the VM system is up, machine-dependent code
1371  *              may stumble across more physical memory.  For example,
1372  *              memory that it was reserving for a frame buffer.
1373  *              vm_page_create turns this memory into available pages.
1374  */
1375
1376 void
1377 vm_page_create(
1378         ppnum_t start,
1379         ppnum_t end)
1380 {
1381         ppnum_t         phys_page;
1382         vm_page_t       m;
1383
1384         for (phys_page = start;
1385              phys_page < end;
1386              phys_page++) {
1387                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1388                         == VM_PAGE_NULL)
1389                         vm_page_more_fictitious();
1390
1391                 m->fictitious = FALSE;
1392                 pmap_clear_noencrypt(phys_page);
1393
1394                 vm_page_pages++;
1395                 vm_page_release(m, FALSE);
1396         }
1397 }
1398
1399 /*
1400  *      vm_page_hash:
1401  *
1402  *      Distributes the object/offset key pair among hash buckets.
1403  *
1404  *      NOTE:   The bucket count must be a power of 2
1405  */
1406 #define vm_page_hash(object, offset) (\
1407         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1408          & vm_page_hash_mask)
1409
1410
1411 /*
1412  *      vm_page_insert:         [ internal use only ]
1413  *
1414  *      Inserts the given mem entry into the object/object-page
1415  *      table and object list.
1416  *
1417  *      The object must be locked.
1418  */
1419 void
1420 vm_page_insert(
1421         vm_page_t               mem,
1422         vm_object_t             object,
1423         vm_object_offset_t      offset)
1424 {
1425         vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1426 }
1427
1428 void
1429 vm_page_insert_wired(
1430         vm_page_t               mem,
1431         vm_object_t             object,
1432         vm_object_offset_t      offset,
1433         vm_tag_t                tag)
1434 {
1435         vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1436 }
1437
1438 void
1439 vm_page_insert_internal(
1440         vm_page_t               mem,
1441         vm_object_t             object,
1442         vm_object_offset_t      offset,
1443         vm_tag_t                tag,
1444         boolean_t               queues_lock_held,
1445         boolean_t               insert_in_hash,
1446         boolean_t               batch_pmap_op,
1447         boolean_t               batch_accounting,
1448         uint64_t                *delayed_ledger_update)
1449 {
1450         vm_page_bucket_t        *bucket;
1451         lck_spin_t              *bucket_lock;
1452         int                     hash_id;
1453         task_t                  owner;
1454
1455         XPR(XPR_VM_PAGE,
1456                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1457                 object, offset, mem, 0,0);
1458 #if 0
1459         /*
1460          * we may not hold the page queue lock
1461          * so this check isn't safe to make
1462          */
1463         VM_PAGE_CHECK(mem);
1464 #endif
1465
1466         assert(page_aligned(offset));
1467
1468         assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1469
1470         /* the vm_submap_object is only a placeholder for submaps */
1471         assert(object != vm_submap_object);
1472
1473         vm_object_lock_assert_exclusive(object);
1474         LCK_MTX_ASSERT(&vm_page_queue_lock,
1475                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1476                                         : LCK_MTX_ASSERT_NOTOWNED);
1477
1478         if (queues_lock_held == FALSE)
1479                 assert(!VM_PAGE_PAGEABLE(mem));
1480
1481         if (insert_in_hash == TRUE) {
1482 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1483                 if (mem->tabled || mem->vm_page_object)
1484                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1485                               "already in (obj=%p,off=0x%llx)",
1486                               mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1487 #endif
1488                 if (object->internal && (offset >= object->vo_size)) {
1489                         panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1490                               mem, object, offset, object->vo_size);
1491                 }
1492
1493                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1494
1495                 /*
1496                  *      Record the object/offset pair in this page
1497                  */
1498
1499                 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1500                 mem->offset = offset;
1501
1502 #if CONFIG_SECLUDED_MEMORY
1503                 if (object->eligible_for_secluded) {
1504                         vm_page_secluded.eligible_for_secluded++;
1505                 }
1506 #endif /* CONFIG_SECLUDED_MEMORY */
1507
1508                 /*
1509                  *      Insert it into the object_object/offset hash table
1510                  */
1511                 hash_id = vm_page_hash(object, offset);
1512                 bucket = &vm_page_buckets[hash_id];
1513                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1514
1515                 lck_spin_lock(bucket_lock);
1516
1517                 mem->next_m = bucket->page_list;
1518                 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1519                 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1520
1521 #if     MACH_PAGE_HASH_STATS
1522                 if (++bucket->cur_count > bucket->hi_count)
1523                         bucket->hi_count = bucket->cur_count;
1524 #endif /* MACH_PAGE_HASH_STATS */
1525                 mem->hashed = TRUE;
1526                 lck_spin_unlock(bucket_lock);
1527         }
1528
1529         {
1530                 unsigned int    cache_attr;
1531
1532                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1533
1534                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1535                         PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1536                 }
1537         }
1538         /*
1539          *      Now link into the object's list of backed pages.
1540          */
1541         vm_page_queue_enter(&object->memq, mem, vm_page_t, listq);
1542         object->memq_hint = mem;
1543         mem->tabled = TRUE;
1544
1545         /*
1546          *      Show that the object has one more resident page.
1547          */
1548
1549         object->resident_page_count++;
1550         if (VM_PAGE_WIRED(mem)) {
1551             assert(mem->wire_count > 0);
1552             VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1553             VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1554             VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
1555         }
1556         assert(object->resident_page_count >= object->wired_page_count);
1557
1558         if (batch_accounting == FALSE) {
1559                 if (object->internal) {
1560                         OSAddAtomic(1, &vm_page_internal_count);
1561                 } else {
1562                         OSAddAtomic(1, &vm_page_external_count);
1563                 }
1564         }
1565
1566         /*
1567          * It wouldn't make sense to insert a "reusable" page in
1568          * an object (the page would have been marked "reusable" only
1569          * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1570          * in the object at that time).
1571          * But a page could be inserted in a "all_reusable" object, if
1572          * something faults it in (a vm_read() from another task or a
1573          * "use-after-free" issue in user space, for example).  It can
1574          * also happen if we're relocating a page from that object to
1575          * a different physical page during a physically-contiguous
1576          * allocation.
1577          */
1578         assert(!mem->reusable);
1579         if (object->all_reusable) {
1580                 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1581         }
1582
1583         if (object->purgable == VM_PURGABLE_DENY) {
1584                 owner = TASK_NULL;
1585         } else {
1586                 owner = object->vo_purgeable_owner;
1587         }
1588         if (owner &&
1589             (object->purgable == VM_PURGABLE_NONVOLATILE ||
1590              VM_PAGE_WIRED(mem))) {
1591
1592                 if (delayed_ledger_update)
1593                         *delayed_ledger_update += PAGE_SIZE;
1594                 else {
1595                         /* more non-volatile bytes */
1596                         ledger_credit(owner->ledger,
1597                                       task_ledgers.purgeable_nonvolatile,
1598                                       PAGE_SIZE);
1599                         /* more footprint */
1600                         ledger_credit(owner->ledger,
1601                                       task_ledgers.phys_footprint,
1602                                       PAGE_SIZE);
1603                 }
1604
1605         } else if (owner &&
1606                    (object->purgable == VM_PURGABLE_VOLATILE ||
1607                     object->purgable == VM_PURGABLE_EMPTY)) {
1608                 assert(! VM_PAGE_WIRED(mem));
1609                 /* more volatile bytes */
1610                 ledger_credit(owner->ledger,
1611                               task_ledgers.purgeable_volatile,
1612                               PAGE_SIZE);
1613         }
1614
1615         if (object->purgable == VM_PURGABLE_VOLATILE) {
1616                 if (VM_PAGE_WIRED(mem)) {
1617                         OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1618                 } else {
1619                         OSAddAtomic(+1, &vm_page_purgeable_count);
1620                 }
1621         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1622                    mem->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q) {
1623                 /*
1624                  * This page belongs to a purged VM object but hasn't
1625                  * been purged (because it was "busy").
1626                  * It's in the "throttled" queue and hence not
1627                  * visible to vm_pageout_scan().  Move it to a pageable
1628                  * queue, so that it can eventually be reclaimed, instead
1629                  * of lingering in the "empty" object.
1630                  */
1631                 if (queues_lock_held == FALSE)
1632                         vm_page_lockspin_queues();
1633                 vm_page_deactivate(mem);
1634                 if (queues_lock_held == FALSE)
1635                         vm_page_unlock_queues();
1636         }
1637
1638 #if VM_OBJECT_TRACKING_OP_MODIFIED
1639         if (vm_object_tracking_inited &&
1640             object->internal &&
1641             object->resident_page_count == 0 &&
1642             object->pager == NULL &&
1643             object->shadow != NULL &&
1644             object->shadow->copy == object) {
1645                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1646                 int numsaved = 0;
1647
1648                 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1649                 btlog_add_entry(vm_object_tracking_btlog,
1650                                 object,
1651                                 VM_OBJECT_TRACKING_OP_MODIFIED,
1652                                 bt,
1653                                 numsaved);
1654         }
1655 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1656 }
1657
1658 /*
1659  *      vm_page_replace:
1660  *
1661  *      Exactly like vm_page_insert, except that we first
1662  *      remove any existing page at the given offset in object.
1663  *
1664  *      The object must be locked.
1665  */
1666 void
1667 vm_page_replace(
1668         vm_page_t               mem,
1669         vm_object_t             object,
1670         vm_object_offset_t      offset)
1671 {
1672         vm_page_bucket_t *bucket;
1673         vm_page_t        found_m = VM_PAGE_NULL;
1674         lck_spin_t      *bucket_lock;
1675         int             hash_id;
1676
1677 #if 0
1678         /*
1679          * we don't hold the page queue lock
1680          * so this check isn't safe to make
1681          */
1682         VM_PAGE_CHECK(mem);
1683 #endif
1684         vm_object_lock_assert_exclusive(object);
1685 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1686         if (mem->tabled || mem->vm_page_object)
1687                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1688                       "already in (obj=%p,off=0x%llx)",
1689                       mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1690 #endif
1691         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1692
1693         assert(!VM_PAGE_PAGEABLE(mem));
1694
1695         /*
1696          *      Record the object/offset pair in this page
1697          */
1698         mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1699         mem->offset = offset;
1700
1701         /*
1702          *      Insert it into the object_object/offset hash table,
1703          *      replacing any page that might have been there.
1704          */
1705
1706         hash_id = vm_page_hash(object, offset);
1707         bucket = &vm_page_buckets[hash_id];
1708         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1709
1710         lck_spin_lock(bucket_lock);
1711
1712         if (bucket->page_list) {
1713                 vm_page_packed_t *mp = &bucket->page_list;
1714                 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
1715
1716                 do {
1717                         /*
1718                          * compare packed object pointers
1719                          */
1720                         if (m->vm_page_object == mem->vm_page_object && m->offset == offset) {
1721                                 /*
1722                                  * Remove old page from hash list
1723                                  */
1724                                 *mp = m->next_m;
1725                                 m->hashed = FALSE;
1726                                 m->next_m = VM_PAGE_PACK_PTR(NULL);
1727
1728                                 found_m = m;
1729                                 break;
1730                         }
1731                         mp = &m->next_m;
1732                 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
1733
1734                 mem->next_m = bucket->page_list;
1735         } else {
1736                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
1737         }
1738         /*
1739          * insert new page at head of hash list
1740          */
1741         bucket->page_list = VM_PAGE_PACK_PTR(mem);
1742         mem->hashed = TRUE;
1743
1744         lck_spin_unlock(bucket_lock);
1745
1746         if (found_m) {
1747                 /*
1748                  * there was already a page at the specified
1749                  * offset for this object... remove it from
1750                  * the object and free it back to the free list
1751                  */
1752                 vm_page_free_unlocked(found_m, FALSE);
1753         }
1754         vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1755 }
1756
1757 /*
1758  *      vm_page_remove:         [ internal use only ]
1759  *
1760  *      Removes the given mem entry from the object/offset-page
1761  *      table and the object page list.
1762  *
1763  *      The object must be locked.
1764  */
1765
1766 void
1767 vm_page_remove(
1768         vm_page_t       mem,
1769         boolean_t       remove_from_hash)
1770 {
1771         vm_page_bucket_t *bucket;
1772         vm_page_t       this;
1773         lck_spin_t      *bucket_lock;
1774         int             hash_id;
1775         task_t          owner;
1776         vm_object_t     m_object;
1777
1778         m_object = VM_PAGE_OBJECT(mem);
1779
1780         XPR(XPR_VM_PAGE,
1781                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1782                 m_object, mem->offset,
1783                 mem, 0,0);
1784
1785         vm_object_lock_assert_exclusive(m_object);
1786         assert(mem->tabled);
1787         assert(!mem->cleaning);
1788         assert(!mem->laundry);
1789
1790         if (VM_PAGE_PAGEABLE(mem)) {
1791                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1792         }
1793 #if 0
1794         /*
1795          * we don't hold the page queue lock
1796          * so this check isn't safe to make
1797          */
1798         VM_PAGE_CHECK(mem);
1799 #endif
1800         if (remove_from_hash == TRUE) {
1801                 /*
1802                  *      Remove from the object_object/offset hash table
1803                  */
1804                 hash_id = vm_page_hash(m_object, mem->offset);
1805                 bucket = &vm_page_buckets[hash_id];
1806                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1807
1808                 lck_spin_lock(bucket_lock);
1809
1810                 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
1811                         /* optimize for common case */
1812
1813                         bucket->page_list = mem->next_m;
1814                 } else {
1815                         vm_page_packed_t        *prev;
1816
1817                         for (prev = &this->next_m;
1818                              (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
1819                              prev = &this->next_m)
1820                                 continue;
1821                         *prev = this->next_m;
1822                 }
1823 #if     MACH_PAGE_HASH_STATS
1824                 bucket->cur_count--;
1825 #endif /* MACH_PAGE_HASH_STATS */
1826                 mem->hashed = FALSE;
1827                 this->next_m = VM_PAGE_PACK_PTR(NULL);
1828                 lck_spin_unlock(bucket_lock);
1829         }
1830         /*
1831          *      Now remove from the object's list of backed pages.
1832          */
1833
1834         vm_page_remove_internal(mem);
1835
1836         /*
1837          *      And show that the object has one fewer resident
1838          *      page.
1839          */
1840
1841         assert(m_object->resident_page_count > 0);
1842         m_object->resident_page_count--;
1843
1844         if (m_object->internal) {
1845 #if DEBUG
1846                 assert(vm_page_internal_count);
1847 #endif /* DEBUG */
1848
1849                 OSAddAtomic(-1, &vm_page_internal_count);
1850         } else {
1851                 assert(vm_page_external_count);
1852                 OSAddAtomic(-1, &vm_page_external_count);
1853
1854                 if (mem->xpmapped) {
1855                         assert(vm_page_xpmapped_external_count);
1856                         OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1857                 }
1858         }
1859         if (!m_object->internal && (m_object->objq.next || m_object->objq.prev)) {
1860                 if (m_object->resident_page_count == 0)
1861                         vm_object_cache_remove(m_object);
1862         }
1863
1864         if (VM_PAGE_WIRED(mem)) {
1865                 assert(mem->wire_count > 0);
1866                 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
1867                 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
1868                 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
1869         }
1870         assert(m_object->resident_page_count >=
1871                m_object->wired_page_count);
1872         if (mem->reusable) {
1873                 assert(m_object->reusable_page_count > 0);
1874                 m_object->reusable_page_count--;
1875                 assert(m_object->reusable_page_count <=
1876                        m_object->resident_page_count);
1877                 mem->reusable = FALSE;
1878                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1879                 vm_page_stats_reusable.reused_remove++;
1880         } else if (m_object->all_reusable) {
1881                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1882                 vm_page_stats_reusable.reused_remove++;
1883         }
1884
1885         if (m_object->purgable == VM_PURGABLE_DENY) {
1886                 owner = TASK_NULL;
1887         } else {
1888                 owner = m_object->vo_purgeable_owner;
1889         }
1890         if (owner &&
1891             (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
1892              VM_PAGE_WIRED(mem))) {
1893                 /* less non-volatile bytes */
1894                 ledger_debit(owner->ledger,
1895                              task_ledgers.purgeable_nonvolatile,
1896                              PAGE_SIZE);
1897                 /* less footprint */
1898                 ledger_debit(owner->ledger,
1899                              task_ledgers.phys_footprint,
1900                              PAGE_SIZE);
1901         } else if (owner &&
1902                    (m_object->purgable == VM_PURGABLE_VOLATILE ||
1903                     m_object->purgable == VM_PURGABLE_EMPTY)) {
1904                 assert(! VM_PAGE_WIRED(mem));
1905                 /* less volatile bytes */
1906                 ledger_debit(owner->ledger,
1907                              task_ledgers.purgeable_volatile,
1908                              PAGE_SIZE);
1909         }
1910         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
1911                 if (VM_PAGE_WIRED(mem)) {
1912                         assert(vm_page_purgeable_wired_count > 0);
1913                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1914                 } else {
1915                         assert(vm_page_purgeable_count > 0);
1916                         OSAddAtomic(-1, &vm_page_purgeable_count);
1917                 }
1918         }
1919
1920         if (m_object->set_cache_attr == TRUE)
1921                 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
1922
1923         mem->tabled = FALSE;
1924         mem->vm_page_object = 0;
1925         mem->offset = (vm_object_offset_t) -1;
1926 }
1927
1928
1929 /*
1930  *      vm_page_lookup:
1931  *
1932  *      Returns the page associated with the object/offset
1933  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1934  *
1935  *      The object must be locked.  No side effects.
1936  */
1937
1938 #define VM_PAGE_HASH_LOOKUP_THRESHOLD   10
1939
1940 #if DEBUG_VM_PAGE_LOOKUP
1941
1942 struct {
1943         uint64_t        vpl_total;
1944         uint64_t        vpl_empty_obj;
1945         uint64_t        vpl_bucket_NULL;
1946         uint64_t        vpl_hit_hint;
1947         uint64_t        vpl_hit_hint_next;
1948         uint64_t        vpl_hit_hint_prev;
1949         uint64_t        vpl_fast;
1950         uint64_t        vpl_slow;
1951         uint64_t        vpl_hit;
1952         uint64_t        vpl_miss;
1953
1954         uint64_t        vpl_fast_elapsed;
1955         uint64_t        vpl_slow_elapsed;
1956 } vm_page_lookup_stats __attribute__((aligned(8)));
1957
1958 #endif
1959
1960 #define KDP_VM_PAGE_WALK_MAX    1000
1961
1962 vm_page_t
1963 kdp_vm_page_lookup(
1964         vm_object_t             object,
1965         vm_object_offset_t      offset)
1966 {
1967         vm_page_t cur_page;
1968         int num_traversed = 0;
1969
1970         if (not_in_kdp) {
1971                 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1972         }
1973
1974         vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
1975                 if (cur_page->offset == offset) {
1976                         return cur_page;
1977                 }
1978                 num_traversed++;
1979
1980                 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1981                         return VM_PAGE_NULL;
1982                 }
1983         }
1984
1985         return VM_PAGE_NULL;
1986 }
1987
1988 vm_page_t
1989 vm_page_lookup(
1990         vm_object_t             object,
1991         vm_object_offset_t      offset)
1992 {
1993         vm_page_t       mem;
1994         vm_page_bucket_t *bucket;
1995         vm_page_queue_entry_t   qe;
1996         lck_spin_t      *bucket_lock = NULL;
1997         int             hash_id;
1998 #if DEBUG_VM_PAGE_LOOKUP
1999         uint64_t        start, elapsed;
2000
2001         OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
2002 #endif
2003         vm_object_lock_assert_held(object);
2004
2005         if (object->resident_page_count == 0) {
2006 #if DEBUG_VM_PAGE_LOOKUP
2007                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2008 #endif
2009                 return (VM_PAGE_NULL);
2010         }
2011
2012         mem = object->memq_hint;
2013
2014         if (mem != VM_PAGE_NULL) {
2015                 assert(VM_PAGE_OBJECT(mem) == object);
2016
2017                 if (mem->offset == offset) {
2018 #if DEBUG_VM_PAGE_LOOKUP
2019                         OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2020 #endif
2021                         return (mem);
2022                 }
2023                 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->listq);
2024
2025                 if (! vm_page_queue_end(&object->memq, qe)) {
2026                         vm_page_t       next_page;
2027
2028                         next_page = (vm_page_t)((uintptr_t)qe);
2029                         assert(VM_PAGE_OBJECT(next_page) == object);
2030
2031                         if (next_page->offset == offset) {
2032                                 object->memq_hint = next_page; /* new hint */
2033 #if DEBUG_VM_PAGE_LOOKUP
2034                                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2035 #endif
2036                                 return (next_page);
2037                         }
2038                 }
2039                 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->listq);
2040
2041                 if (! vm_page_queue_end(&object->memq, qe)) {
2042                         vm_page_t prev_page;
2043
2044                         prev_page = (vm_page_t)((uintptr_t)qe);
2045                         assert(VM_PAGE_OBJECT(prev_page) == object);
2046
2047                         if (prev_page->offset == offset) {
2048                                 object->memq_hint = prev_page; /* new hint */
2049 #if DEBUG_VM_PAGE_LOOKUP
2050                                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2051 #endif
2052                                 return (prev_page);
2053                         }
2054                 }
2055         }
2056         /*
2057          * Search the hash table for this object/offset pair
2058          */
2059         hash_id = vm_page_hash(object, offset);
2060         bucket = &vm_page_buckets[hash_id];
2061
2062         /*
2063          * since we hold the object lock, we are guaranteed that no
2064          * new pages can be inserted into this object... this in turn
2065          * guarantess that the page we're looking for can't exist
2066          * if the bucket it hashes to is currently NULL even when looked
2067          * at outside the scope of the hash bucket lock... this is a
2068          * really cheap optimiztion to avoid taking the lock
2069          */
2070         if (!bucket->page_list) {
2071 #if DEBUG_VM_PAGE_LOOKUP
2072                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2073 #endif
2074                 return (VM_PAGE_NULL);
2075         }
2076
2077 #if DEBUG_VM_PAGE_LOOKUP
2078         start = mach_absolute_time();
2079 #endif
2080         if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
2081                 /*
2082                  * on average, it's roughly 3 times faster to run a short memq list
2083                  * than to take the spin lock and go through the hash list
2084                  */
2085                 mem = (vm_page_t)vm_page_queue_first(&object->memq);
2086
2087                 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2088
2089                         if (mem->offset == offset)
2090                                 break;
2091
2092                         mem = (vm_page_t)vm_page_queue_next(&mem->listq);
2093                 }
2094                 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
2095                         mem = NULL;
2096         } else {
2097                 vm_page_object_t        packed_object;
2098
2099                 packed_object = VM_PAGE_PACK_OBJECT(object);
2100
2101                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2102
2103                 lck_spin_lock(bucket_lock);
2104
2105                 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2106                      mem != VM_PAGE_NULL;
2107                      mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m))) {
2108 #if 0
2109                         /*
2110                          * we don't hold the page queue lock
2111                          * so this check isn't safe to make
2112                          */
2113                         VM_PAGE_CHECK(mem);
2114 #endif
2115                         if ((mem->vm_page_object == packed_object) && (mem->offset == offset))
2116                                 break;
2117                 }
2118                 lck_spin_unlock(bucket_lock);
2119         }
2120
2121 #if DEBUG_VM_PAGE_LOOKUP
2122         elapsed = mach_absolute_time() - start;
2123
2124         if (bucket_lock) {
2125                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2126                 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2127         } else {
2128                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2129                 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2130         }
2131         if (mem != VM_PAGE_NULL)
2132                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2133         else
2134                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2135 #endif
2136         if (mem != VM_PAGE_NULL) {
2137                 assert(VM_PAGE_OBJECT(mem) == object);
2138
2139                 object->memq_hint = mem;
2140         }
2141         return (mem);
2142 }
2143
2144
2145 /*
2146  *      vm_page_rename:
2147  *
2148  *      Move the given memory entry from its
2149  *      current object to the specified target object/offset.
2150  *
2151  *      The object must be locked.
2152  */
2153 void
2154 vm_page_rename(
2155         vm_page_t               mem,
2156         vm_object_t             new_object,
2157         vm_object_offset_t      new_offset)
2158 {
2159         boolean_t       internal_to_external, external_to_internal;
2160         vm_tag_t        tag;
2161         vm_object_t     m_object;
2162
2163         m_object = VM_PAGE_OBJECT(mem);
2164
2165         assert(m_object != new_object);
2166         assert(m_object);
2167
2168         XPR(XPR_VM_PAGE,
2169                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2170                 new_object, new_offset,
2171                 mem, 0,0);
2172
2173         /*
2174          *      Changes to mem->object require the page lock because
2175          *      the pageout daemon uses that lock to get the object.
2176          */
2177         vm_page_lockspin_queues();
2178
2179         internal_to_external = FALSE;
2180         external_to_internal = FALSE;
2181
2182         if (mem->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2183                 /*
2184                  * it's much easier to get the vm_page_pageable_xxx accounting correct
2185                  * if we first move the page to the active queue... it's going to end
2186                  * up there anyway, and we don't do vm_page_rename's frequently enough
2187                  * for this to matter.
2188                  */
2189                 vm_page_queues_remove(mem, FALSE);
2190                 vm_page_activate(mem);
2191         }
2192         if (VM_PAGE_PAGEABLE(mem)) {
2193                 if (m_object->internal && !new_object->internal) {
2194                         internal_to_external = TRUE;
2195                 }
2196                 if (!m_object->internal && new_object->internal) {
2197                         external_to_internal = TRUE;
2198                 }
2199         }
2200
2201         tag = m_object->wire_tag;
2202         vm_page_remove(mem, TRUE);
2203         vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2204
2205         if (internal_to_external) {
2206                 vm_page_pageable_internal_count--;
2207                 vm_page_pageable_external_count++;
2208         } else if (external_to_internal) {
2209                 vm_page_pageable_external_count--;
2210                 vm_page_pageable_internal_count++;
2211         }
2212
2213         vm_page_unlock_queues();
2214 }
2215
2216 /*
2217  *      vm_page_init:
2218  *
2219  *      Initialize the fields in a new page.
2220  *      This takes a structure with random values and initializes it
2221  *      so that it can be given to vm_page_release or vm_page_insert.
2222  */
2223 void
2224 vm_page_init(
2225         vm_page_t       mem,
2226         ppnum_t         phys_page,
2227         boolean_t       lopage)
2228 {
2229         assert(phys_page);
2230
2231 #if     DEBUG
2232         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2233                 if (!(pmap_valid_page(phys_page))) {
2234                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2235                 }
2236         }
2237 #endif
2238         *mem = vm_page_template;
2239
2240         VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2241 #if 0
2242         /*
2243          * we're leaving this turned off for now... currently pages
2244          * come off the free list and are either immediately dirtied/referenced
2245          * due to zero-fill or COW faults, or are used to read or write files...
2246          * in the file I/O case, the UPL mechanism takes care of clearing
2247          * the state of the HW ref/mod bits in a somewhat fragile way.
2248          * Since we may change the way this works in the future (to toughen it up),
2249          * I'm leaving this as a reminder of where these bits could get cleared
2250          */
2251
2252         /*
2253          * make sure both the h/w referenced and modified bits are
2254          * clear at this point... we are especially dependent on
2255          * not finding a 'stale' h/w modified in a number of spots
2256          * once this page goes back into use
2257          */
2258         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2259 #endif
2260         mem->lopage = lopage;
2261 }
2262
2263 /*
2264  *      vm_page_grab_fictitious:
2265  *
2266  *      Remove a fictitious page from the free list.
2267  *      Returns VM_PAGE_NULL if there are no free pages.
2268  */
2269 int     c_vm_page_grab_fictitious = 0;
2270 int     c_vm_page_grab_fictitious_failed = 0;
2271 int     c_vm_page_release_fictitious = 0;
2272 int     c_vm_page_more_fictitious = 0;
2273
2274 vm_page_t
2275 vm_page_grab_fictitious_common(
2276         ppnum_t phys_addr)
2277 {
2278         vm_page_t       m;
2279
2280         if ((m = (vm_page_t)zget(vm_page_zone))) {
2281
2282                 vm_page_init(m, phys_addr, FALSE);
2283                 m->fictitious = TRUE;
2284
2285                 c_vm_page_grab_fictitious++;
2286         } else
2287                 c_vm_page_grab_fictitious_failed++;
2288
2289         return m;
2290 }
2291
2292 vm_page_t
2293 vm_page_grab_fictitious(void)
2294 {
2295         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2296 }
2297
2298 int vm_guard_count;
2299
2300
2301 vm_page_t
2302 vm_page_grab_guard(void)
2303 {
2304         vm_page_t page;
2305         page = vm_page_grab_fictitious_common(vm_page_guard_addr);
2306     if (page) OSAddAtomic(1, &vm_guard_count);
2307         return page;
2308 }
2309
2310
2311 /*
2312  *      vm_page_release_fictitious:
2313  *
2314  *      Release a fictitious page to the zone pool
2315  */
2316 void
2317 vm_page_release_fictitious(
2318         vm_page_t m)
2319 {
2320         assert((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) || (m->vm_page_q_state == VM_PAGE_IS_WIRED));
2321         assert(m->fictitious);
2322         assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2323                VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2324
2325
2326 if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) OSAddAtomic(-1, &vm_guard_count);
2327
2328         c_vm_page_release_fictitious++;
2329
2330         zfree(vm_page_zone, m);
2331 }
2332
2333 /*
2334  *      vm_page_more_fictitious:
2335  *
2336  *      Add more fictitious pages to the zone.
2337  *      Allowed to block. This routine is way intimate
2338  *      with the zones code, for several reasons:
2339  *      1. we need to carve some page structures out of physical
2340  *         memory before zones work, so they _cannot_ come from
2341  *         the zone_map.
2342  *      2. the zone needs to be collectable in order to prevent
2343  *         growth without bound. These structures are used by
2344  *         the device pager (by the hundreds and thousands), as
2345  *         private pages for pageout, and as blocking pages for
2346  *         pagein. Temporary bursts in demand should not result in
2347  *         permanent allocation of a resource.
2348  *      3. To smooth allocation humps, we allocate single pages
2349  *         with kernel_memory_allocate(), and cram them into the
2350  *         zone.
2351  */
2352
2353 void vm_page_more_fictitious(void)
2354 {
2355         vm_offset_t     addr;
2356         kern_return_t   retval;
2357
2358         c_vm_page_more_fictitious++;
2359
2360         /*
2361          * Allocate a single page from the zone_map. Do not wait if no physical
2362          * pages are immediately available, and do not zero the space. We need
2363          * our own blocking lock here to prevent having multiple,
2364          * simultaneous requests from piling up on the zone_map lock. Exactly
2365          * one (of our) threads should be potentially waiting on the map lock.
2366          * If winner is not vm-privileged, then the page allocation will fail,
2367          * and it will temporarily block here in the vm_page_wait().
2368          */
2369         lck_mtx_lock(&vm_page_alloc_lock);
2370         /*
2371          * If another thread allocated space, just bail out now.
2372          */
2373         if (zone_free_count(vm_page_zone) > 5) {
2374                 /*
2375                  * The number "5" is a small number that is larger than the
2376                  * number of fictitious pages that any single caller will
2377                  * attempt to allocate. Otherwise, a thread will attempt to
2378                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
2379                  * release all of the resources and locks already acquired,
2380                  * and then call this routine. This routine finds the pages
2381                  * that the caller released, so fails to allocate new space.
2382                  * The process repeats infinitely. The largest known number
2383                  * of fictitious pages required in this manner is 2. 5 is
2384                  * simply a somewhat larger number.
2385                  */
2386                 lck_mtx_unlock(&vm_page_alloc_lock);
2387                 return;
2388         }
2389
2390         retval = kernel_memory_allocate(zone_map,
2391                                         &addr, PAGE_SIZE, 0,
2392                                         KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2393         if (retval != KERN_SUCCESS) {
2394                 /*
2395                  * No page was available. Drop the
2396                  * lock to give another thread a chance at it, and
2397                  * wait for the pageout daemon to make progress.
2398                  */
2399                 lck_mtx_unlock(&vm_page_alloc_lock);
2400                 vm_page_wait(THREAD_UNINT);
2401                 return;
2402         }
2403
2404         zcram(vm_page_zone, addr, PAGE_SIZE);
2405
2406         lck_mtx_unlock(&vm_page_alloc_lock);
2407 }
2408
2409
2410 /*
2411  *      vm_pool_low():
2412  *
2413  *      Return true if it is not likely that a non-vm_privileged thread
2414  *      can get memory without blocking.  Advisory only, since the
2415  *      situation may change under us.
2416  */
2417 int
2418 vm_pool_low(void)
2419 {
2420         /* No locking, at worst we will fib. */
2421         return( vm_page_free_count <= vm_page_free_reserved );
2422 }
2423
2424
2425 #if CONFIG_BACKGROUND_QUEUE
2426
2427 void
2428 vm_page_update_background_state(vm_page_t mem)
2429 {
2430         if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2431                 return;
2432
2433         if (mem->vm_page_in_background == FALSE)
2434                 return;
2435
2436 #if BACKGROUNDQ_BASED_ON_QOS
2437         if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2438                 return;
2439 #else
2440         task_t  my_task;
2441
2442         my_task = current_task();
2443
2444         if (my_task) {
2445                 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2446                         return;
2447         }
2448 #endif
2449         vm_page_lockspin_queues();
2450
2451         mem->vm_page_in_background = FALSE;
2452         vm_page_background_promoted_count++;
2453
2454         vm_page_remove_from_backgroundq(mem);
2455
2456         vm_page_unlock_queues();
2457 }
2458
2459
2460 void
2461 vm_page_assign_background_state(vm_page_t mem)
2462 {
2463         if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2464                 return;
2465
2466 #if BACKGROUNDQ_BASED_ON_QOS
2467         if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2468                 mem->vm_page_in_background = TRUE;
2469         else
2470                 mem->vm_page_in_background = FALSE;
2471 #else
2472         task_t  my_task;
2473
2474         my_task = current_task();
2475
2476         if (my_task)
2477                 mem->vm_page_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2478 #endif
2479 }
2480
2481
2482 void
2483 vm_page_remove_from_backgroundq(
2484         vm_page_t       mem)
2485 {
2486         vm_object_t     m_object;
2487
2488         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2489
2490         if (mem->vm_page_on_backgroundq) {
2491                 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2492
2493                 mem->vm_page_backgroundq.next = 0;
2494                 mem->vm_page_backgroundq.prev = 0;
2495                 mem->vm_page_on_backgroundq = FALSE;
2496
2497                 vm_page_background_count--;
2498
2499                 m_object = VM_PAGE_OBJECT(mem);
2500
2501                 if (m_object->internal)
2502                         vm_page_background_internal_count--;
2503                 else
2504                         vm_page_background_external_count--;
2505         } else {
2506                 assert(VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.next) == (uintptr_t)NULL &&
2507                        VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.prev) == (uintptr_t)NULL);
2508         }
2509 }
2510
2511
2512 void
2513 vm_page_add_to_backgroundq(
2514         vm_page_t       mem,
2515         boolean_t       first)
2516 {
2517         vm_object_t     m_object;
2518
2519         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2520
2521         if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2522                 return;
2523
2524         if (mem->vm_page_on_backgroundq == FALSE) {
2525
2526                 m_object = VM_PAGE_OBJECT(mem);
2527
2528                 if (vm_page_background_exclude_external && !m_object->internal)
2529                         return;
2530
2531                 if (first == TRUE)
2532                         vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2533                 else
2534                         vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2535                 mem->vm_page_on_backgroundq = TRUE;
2536
2537                 vm_page_background_count++;
2538
2539                 if (m_object->internal)
2540                         vm_page_background_internal_count++;
2541                 else
2542                         vm_page_background_external_count++;
2543         }
2544 }
2545
2546 #endif
2547
2548 /*
2549  * this is an interface to support bring-up of drivers
2550  * on platforms with physical memory > 4G...
2551  */
2552 int             vm_himemory_mode = 2;
2553
2554
2555 /*
2556  * this interface exists to support hardware controllers
2557  * incapable of generating DMAs with more than 32 bits
2558  * of address on platforms with physical memory > 4G...
2559  */
2560 unsigned int    vm_lopages_allocated_q = 0;
2561 unsigned int    vm_lopages_allocated_cpm_success = 0;
2562 unsigned int    vm_lopages_allocated_cpm_failed = 0;
2563 vm_page_queue_head_t    vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2564
2565 vm_page_t
2566 vm_page_grablo(void)
2567 {
2568         vm_page_t       mem;
2569
2570         if (vm_lopage_needed == FALSE)
2571                 return (vm_page_grab());
2572
2573         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2574
2575         if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2576                 vm_page_queue_remove_first(&vm_lopage_queue_free,
2577                                    mem,
2578                                    vm_page_t,
2579                                    pageq);
2580                 assert(vm_lopage_free_count);
2581                 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2582                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2583
2584                 vm_lopage_free_count--;
2585                 vm_lopages_allocated_q++;
2586
2587                 if (vm_lopage_free_count < vm_lopage_lowater)
2588                         vm_lopage_refill = TRUE;
2589
2590                 lck_mtx_unlock(&vm_page_queue_free_lock);
2591
2592 #if CONFIG_BACKGROUND_QUEUE
2593                 vm_page_assign_background_state(mem);
2594 #endif
2595         } else {
2596                 lck_mtx_unlock(&vm_page_queue_free_lock);
2597
2598                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2599
2600                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2601                         vm_lopages_allocated_cpm_failed++;
2602                         lck_mtx_unlock(&vm_page_queue_free_lock);
2603
2604                         return (VM_PAGE_NULL);
2605                 }
2606                 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2607
2608                 mem->busy = TRUE;
2609
2610                 vm_page_lockspin_queues();
2611
2612                 mem->gobbled = FALSE;
2613                 vm_page_gobble_count--;
2614                 vm_page_wire_count--;
2615
2616                 vm_lopages_allocated_cpm_success++;
2617                 vm_page_unlock_queues();
2618         }
2619         assert(mem->busy);
2620         assert(!mem->pmapped);
2621         assert(!mem->wpmapped);
2622         assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2623
2624         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2625
2626         return (mem);
2627 }
2628
2629
2630 /*
2631  *      vm_page_grab:
2632  *
2633  *      first try to grab a page from the per-cpu free list...
2634  *      this must be done while pre-emption is disabled... if
2635  *      a page is available, we're done...
2636  *      if no page is available, grab the vm_page_queue_free_lock
2637  *      and see if current number of free pages would allow us
2638  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
2639  *      if there are pages available, disable preemption and
2640  *      recheck the state of the per-cpu free list... we could
2641  *      have been preempted and moved to a different cpu, or
2642  *      some other thread could have re-filled it... if still
2643  *      empty, figure out how many pages we can steal from the
2644  *      global free queue and move to the per-cpu queue...
2645  *      return 1 of these pages when done... only wakeup the
2646  *      pageout_scan thread if we moved pages from the global
2647  *      list... no need for the wakeup if we've satisfied the
2648  *      request from the per-cpu queue.
2649  */
2650
2651 #if CONFIG_SECLUDED_MEMORY
2652 vm_page_t vm_page_grab_secluded(void);
2653 #endif /* CONFIG_SECLUDED_MEMORY */
2654
2655 vm_page_t
2656 vm_page_grab(void)
2657 {
2658         return vm_page_grab_options(0);
2659 }
2660
2661 #if HIBERNATION
2662 boolean_t       hibernate_rebuild_needed = FALSE;
2663 #endif /* HIBERNATION */
2664
2665 vm_page_t
2666 vm_page_grab_options(
2667         int grab_options)
2668 {
2669         vm_page_t       mem;
2670
2671         disable_preemption();
2672
2673         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2674 return_page_from_cpu_list:
2675                 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2676
2677 #if HIBERNATION
2678                 if (hibernate_rebuild_needed) {
2679                         panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2680                 }
2681 #endif /* HIBERNATION */
2682                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2683                 PROCESSOR_DATA(current_processor(), free_pages) = mem->snext;
2684
2685                 enable_preemption();
2686                 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2687                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2688
2689                 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2690                 assert(mem->tabled == FALSE);
2691                 assert(mem->vm_page_object == 0);
2692                 assert(!mem->laundry);
2693                 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2694                 assert(mem->busy);
2695                 assert(!mem->pmapped);
2696                 assert(!mem->wpmapped);
2697                 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2698
2699 #if CONFIG_BACKGROUND_QUEUE
2700                 vm_page_assign_background_state(mem);
2701 #endif
2702                 return mem;
2703         }
2704         enable_preemption();
2705
2706
2707         /*
2708          *      Optionally produce warnings if the wire or gobble
2709          *      counts exceed some threshold.
2710          */
2711 #if VM_PAGE_WIRE_COUNT_WARNING
2712         if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2713                 printf("mk: vm_page_grab(): high wired page count of %d\n",
2714                         vm_page_wire_count);
2715         }
2716 #endif
2717 #if VM_PAGE_GOBBLE_COUNT_WARNING
2718         if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2719                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2720                         vm_page_gobble_count);
2721         }
2722 #endif
2723
2724         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2725
2726         /*
2727          *      Only let privileged threads (involved in pageout)
2728          *      dip into the reserved pool.
2729          */
2730         if ((vm_page_free_count < vm_page_free_reserved) &&
2731             !(current_thread()->options & TH_OPT_VMPRIV)) {
2732                 /* no page for us in the free queue... */
2733                 lck_mtx_unlock(&vm_page_queue_free_lock);
2734                 mem = VM_PAGE_NULL;
2735
2736 #if CONFIG_SECLUDED_MEMORY
2737                 /* ... but can we try and grab from the secluded queue? */
2738                 if (vm_page_secluded_count > 0 &&
2739                     ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
2740                      task_can_use_secluded_mem(current_task()))) {
2741                         mem = vm_page_grab_secluded();
2742                         if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2743                                 vm_page_secluded.grab_for_iokit++;
2744                                 if (mem) {
2745                                         vm_page_secluded.grab_for_iokit_success++;
2746                                 }
2747                         }
2748                         if (mem) {
2749                                 VM_CHECK_MEMORYSTATUS;
2750                                 return mem;
2751                         }
2752                 }
2753 #else /* CONFIG_SECLUDED_MEMORY */
2754                 (void) grab_options;
2755 #endif /* CONFIG_SECLUDED_MEMORY */
2756         }
2757         else {
2758                vm_page_t        head;
2759                vm_page_t        tail;
2760                unsigned int     pages_to_steal;
2761                unsigned int     color;
2762                unsigned int clump_end, sub_count;
2763
2764                while ( vm_page_free_count == 0 ) {
2765
2766                         lck_mtx_unlock(&vm_page_queue_free_lock);
2767                         /*
2768                          * must be a privileged thread to be
2769                          * in this state since a non-privileged
2770                          * thread would have bailed if we were
2771                          * under the vm_page_free_reserved mark
2772                          */
2773                         VM_PAGE_WAIT();
2774                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2775                 }
2776
2777                 disable_preemption();
2778
2779                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2780                         lck_mtx_unlock(&vm_page_queue_free_lock);
2781
2782                         /*
2783                          * we got preempted and moved to another processor
2784                          * or we got preempted and someone else ran and filled the cache
2785                          */
2786                         goto return_page_from_cpu_list;
2787                 }
2788                 if (vm_page_free_count <= vm_page_free_reserved)
2789                         pages_to_steal = 1;
2790                 else {
2791                         if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2792                                 pages_to_steal = vm_free_magazine_refill_limit;
2793                         else
2794                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2795                 }
2796                 color = PROCESSOR_DATA(current_processor(), start_color);
2797                 head = tail = NULL;
2798
2799                 vm_page_free_count -= pages_to_steal;
2800                 clump_end = sub_count = 0;
2801
2802                 while (pages_to_steal--) {
2803
2804                         while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2805                                 color = (color + 1) & vm_color_mask;
2806 #if defined(__x86_64__)
2807                         vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
2808                                                               mem,
2809                                                               vm_page_t,
2810                                                               pageq,
2811                                                               clump_end);
2812 #else
2813                         vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
2814                                                               mem,
2815                                                               vm_page_t,
2816                                                               pageq);
2817 #endif
2818
2819                         assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_Q);
2820
2821                         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2822
2823 #if defined(__arm__) || defined(__arm64__)
2824                         color = (color + 1) & vm_color_mask;
2825 #else
2826
2827 #if DEVELOPMENT || DEBUG
2828
2829                         sub_count++;
2830                         if (clump_end) {
2831                                 vm_clump_update_stats(sub_count);
2832                                 sub_count = 0;
2833                                 color = (color + 1) & vm_color_mask;
2834                         }
2835 #else
2836                         if (clump_end) color = (color + 1) & vm_color_mask;
2837
2838 #endif /* if DEVELOPMENT || DEBUG */
2839
2840 #endif  /* if defined(__arm__) || defined(__arm64__) */
2841
2842                         if (head == NULL)
2843                                 head = mem;
2844                         else
2845                                 tail->snext = mem;
2846                         tail = mem;
2847
2848                         assert(mem->listq.next == 0 && mem->listq.prev == 0);
2849                         assert(mem->tabled == FALSE);
2850                         assert(mem->vm_page_object == 0);
2851                         assert(!mem->laundry);
2852
2853                         mem->vm_page_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
2854
2855                         assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2856                         assert(mem->busy);
2857                         assert(!mem->pmapped);
2858                         assert(!mem->wpmapped);
2859                         assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2860                 }
2861 #if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
2862                 vm_clump_update_stats(sub_count);
2863 #endif
2864                 lck_mtx_unlock(&vm_page_queue_free_lock);
2865
2866 #if HIBERNATION
2867                 if (hibernate_rebuild_needed) {
2868                         panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2869                 }
2870 #endif /* HIBERNATION */
2871                 PROCESSOR_DATA(current_processor(), free_pages) = head->snext;
2872                 PROCESSOR_DATA(current_processor(), start_color) = color;
2873
2874                 /*
2875                  * satisfy this request
2876                  */
2877                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2878                 mem = head;
2879                 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2880
2881                 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2882                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2883
2884                 enable_preemption();
2885         }
2886         /*
2887          *      Decide if we should poke the pageout daemon.
2888          *      We do this if the free count is less than the low
2889          *      water mark, or if the free count is less than the high
2890          *      water mark (but above the low water mark) and the inactive
2891          *      count is less than its target.
2892          *
2893          *      We don't have the counts locked ... if they change a little,
2894          *      it doesn't really matter.
2895          */
2896         if ((vm_page_free_count < vm_page_free_min) ||
2897              ((vm_page_free_count < vm_page_free_target) &&
2898               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2899                  thread_wakeup((event_t) &vm_page_free_wanted);
2900
2901         VM_CHECK_MEMORYSTATUS;
2902
2903         if (mem) {
2904 //              dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4);  /* (TEST/DEBUG) */
2905
2906 #if CONFIG_BACKGROUND_QUEUE
2907                 vm_page_assign_background_state(mem);
2908 #endif
2909         }
2910         return mem;
2911 }
2912
2913 #if CONFIG_SECLUDED_MEMORY
2914 vm_page_t
2915 vm_page_grab_secluded(void)
2916 {
2917         vm_page_t       mem;
2918         vm_object_t     object;
2919         int             refmod_state;
2920
2921         if (vm_page_secluded_count == 0) {
2922                 /* no secluded pages to grab... */
2923                 return VM_PAGE_NULL;
2924         }
2925
2926         /* secluded queue is protected by the VM page queue lock */
2927         vm_page_lock_queues();
2928
2929         if (vm_page_secluded_count == 0) {
2930                 /* no secluded pages to grab... */
2931                 vm_page_unlock_queues();
2932                 return VM_PAGE_NULL;
2933         }
2934
2935 #if 00
2936         /* can we grab from the secluded queue? */
2937         if (vm_page_secluded_count > vm_page_secluded_target ||
2938             (vm_page_secluded_count > 0 &&
2939              task_can_use_secluded_mem(current_task()))) {
2940                 /* OK */
2941         } else {
2942                 /* can't grab from secluded queue... */
2943                 vm_page_unlock_queues();
2944                 return VM_PAGE_NULL;
2945         }
2946 #endif
2947
2948         /* we can grab a page from secluded queue! */
2949         assert((vm_page_secluded_count_free +
2950                 vm_page_secluded_count_inuse) ==
2951                vm_page_secluded_count);
2952         if (current_task()->task_can_use_secluded_mem) {
2953                 assert(num_tasks_can_use_secluded_mem > 0);
2954         }
2955         assert(!vm_page_queue_empty(&vm_page_queue_secluded));
2956         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2957         mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
2958         assert(mem->vm_page_q_state == VM_PAGE_ON_SECLUDED_Q);
2959         vm_page_queues_remove(mem, TRUE);
2960
2961         object = VM_PAGE_OBJECT(mem);
2962
2963         assert(!mem->fictitious);
2964         assert(!VM_PAGE_WIRED(mem));
2965         if (object == VM_OBJECT_NULL) {
2966                 /* free for grab! */
2967                 vm_page_unlock_queues();
2968                 vm_page_secluded.grab_success_free++;
2969
2970                 assert(mem->busy);
2971                 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2972                 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
2973                 assert(mem->pageq.next == 0);
2974                 assert(mem->pageq.prev == 0);
2975                 assert(mem->listq.next == 0);
2976                 assert(mem->listq.prev == 0);
2977 #if CONFIG_BACKGROUND_QUEUE
2978                 assert(mem->vm_page_on_backgroundq == 0);
2979                 assert(mem->vm_page_backgroundq.next == 0);
2980                 assert(mem->vm_page_backgroundq.prev == 0);
2981 #endif /* CONFIG_BACKGROUND_QUEUE */
2982                 return mem;
2983         }
2984
2985         assert(!object->internal);
2986 //      vm_page_pageable_external_count--;
2987
2988         if (!vm_object_lock_try(object)) {
2989 //              printf("SECLUDED: page %p: object %p locked\n", mem, object);
2990                 vm_page_secluded.grab_failure_locked++;
2991         reactivate_secluded_page:
2992                 vm_page_activate(mem);
2993                 vm_page_unlock_queues();
2994                 return VM_PAGE_NULL;
2995         }
2996         if (mem->busy ||
2997             mem->cleaning ||
2998             mem->laundry) {
2999                 /* can't steal page in this state... */
3000                 vm_object_unlock(object);
3001                 vm_page_secluded.grab_failure_state++;
3002                 goto reactivate_secluded_page;
3003         }
3004
3005         mem->busy = TRUE;
3006         refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3007         if (refmod_state & VM_MEM_REFERENCED) {
3008                 mem->reference = TRUE;
3009         }
3010         if (refmod_state & VM_MEM_MODIFIED) {
3011                 SET_PAGE_DIRTY(mem, FALSE);
3012         }
3013         if (mem->dirty || mem->precious) {
3014                 /* can't grab a dirty page; re-activate */
3015 //              printf("SECLUDED: dirty page %p\n", mem);
3016                 PAGE_WAKEUP_DONE(mem);
3017                 vm_page_secluded.grab_failure_dirty++;
3018                 vm_object_unlock(object);
3019                 goto reactivate_secluded_page;
3020         }
3021         if (mem->reference) {
3022                 /* it's been used but we do need to grab a page... */
3023         }
3024
3025         vm_page_unlock_queues();
3026
3027         /* finish what vm_page_free() would have done... */
3028         vm_page_free_prepare_object(mem, TRUE);
3029         vm_object_unlock(object);
3030         object = VM_OBJECT_NULL;
3031         if (vm_page_free_verify) {
3032                 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3033         }
3034         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3035         vm_page_secluded.grab_success_other++;
3036
3037         assert(mem->busy);
3038         assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3039         assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3040         assert(mem->pageq.next == 0);
3041         assert(mem->pageq.prev == 0);
3042         assert(mem->listq.next == 0);
3043         assert(mem->listq.prev == 0);
3044 #if CONFIG_BACKGROUND_QUEUE
3045         assert(mem->vm_page_on_backgroundq == 0);
3046         assert(mem->vm_page_backgroundq.next == 0);
3047         assert(mem->vm_page_backgroundq.prev == 0);
3048 #endif /* CONFIG_BACKGROUND_QUEUE */
3049
3050         return mem;
3051 }
3052 #endif /* CONFIG_SECLUDED_MEMORY */
3053
3054 /*
3055  *      vm_page_release:
3056  *
3057  *      Return a page to the free list.
3058  */
3059
3060 void
3061 vm_page_release(
3062         vm_page_t       mem,
3063         boolean_t       page_queues_locked)
3064 {
3065         unsigned int    color;
3066         int     need_wakeup = 0;
3067         int     need_priv_wakeup = 0;
3068 #if CONFIG_SECLUDED_MEMORY
3069         int     need_secluded_wakeup = 0;
3070 #endif /* CONFIG_SECLUDED_MEMORY */
3071
3072         if (page_queues_locked) {
3073                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3074         } else {
3075                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3076         }
3077
3078         assert(!mem->private && !mem->fictitious);
3079         if (vm_page_free_verify) {
3080                 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3081         }
3082 //      dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5);  /* (TEST/DEBUG) */
3083
3084         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3085
3086         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3087
3088         assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3089         assert(mem->busy);
3090         assert(!mem->laundry);
3091         assert(mem->vm_page_object == 0);
3092         assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3093         assert(mem->listq.next == 0 && mem->listq.prev == 0);
3094 #if CONFIG_BACKGROUND_QUEUE
3095         assert(mem->vm_page_backgroundq.next == 0 &&
3096                mem->vm_page_backgroundq.prev == 0 &&
3097                mem->vm_page_on_backgroundq == FALSE);
3098 #endif
3099         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3100             vm_lopage_free_count < vm_lopage_free_limit &&
3101             VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3102                 /*
3103                  * this exists to support hardware controllers
3104                  * incapable of generating DMAs with more than 32 bits
3105                  * of address on platforms with physical memory > 4G...
3106                  */
3107                 vm_page_queue_enter_first(&vm_lopage_queue_free,
3108                                           mem,
3109                                           vm_page_t,
3110                                           pageq);
3111                 vm_lopage_free_count++;
3112
3113                 if (vm_lopage_free_count >= vm_lopage_free_limit)
3114                         vm_lopage_refill = FALSE;
3115
3116                 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3117                 mem->lopage = TRUE;
3118 #if CONFIG_SECLUDED_MEMORY
3119         } else if (vm_page_free_count > vm_page_free_reserved &&
3120                    vm_page_secluded_count < vm_page_secluded_target &&
3121                    num_tasks_can_use_secluded_mem == 0) {
3122                 /*
3123                  * XXX FBDP TODO: also avoid refilling secluded queue
3124                  * when some IOKit objects are already grabbing from it...
3125                  */
3126                 if (!page_queues_locked) {
3127                         if (!vm_page_trylock_queues()) {
3128                                 /* take locks in right order */
3129                                 lck_mtx_unlock(&vm_page_queue_free_lock);
3130                                 vm_page_lock_queues();
3131                                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3132                         }
3133                 }
3134                 mem->lopage = FALSE;
3135                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3136                 vm_page_queue_enter_first(&vm_page_queue_secluded,
3137                                           mem,
3138                                           vm_page_t,
3139                                           pageq);
3140                 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3141                 vm_page_secluded_count++;
3142                 vm_page_secluded_count_free++;
3143                 if (!page_queues_locked) {
3144                         vm_page_unlock_queues();
3145                 }
3146                 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3147                 if (vm_page_free_wanted_secluded > 0) {
3148                         vm_page_free_wanted_secluded--;
3149                         need_secluded_wakeup = 1;
3150                 }
3151 #endif /* CONFIG_SECLUDED_MEMORY */
3152         } else {
3153                 mem->lopage = FALSE;
3154                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3155
3156                 color = VM_PAGE_GET_COLOR(mem);
3157 #if defined(__x86_64__)
3158                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3159                                           mem,
3160                                           vm_page_t,
3161                                           pageq);
3162 #else
3163                 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3164                                           mem,
3165                                           vm_page_t,
3166                                           pageq);
3167 #endif
3168                 vm_page_free_count++;
3169                 /*
3170                  *      Check if we should wake up someone waiting for page.
3171                  *      But don't bother waking them unless they can allocate.
3172                  *
3173                  *      We wakeup only one thread, to prevent starvation.
3174                  *      Because the scheduling system handles wait queues FIFO,
3175                  *      if we wakeup all waiting threads, one greedy thread
3176                  *      can starve multiple niceguy threads.  When the threads
3177                  *      all wakeup, the greedy threads runs first, grabs the page,
3178                  *      and waits for another page.  It will be the first to run
3179                  *      when the next page is freed.
3180                  *
3181                  *      However, there is a slight danger here.
3182                  *      The thread we wake might not use the free page.
3183                  *      Then the other threads could wait indefinitely
3184                  *      while the page goes unused.  To forestall this,
3185                  *      the pageout daemon will keep making free pages
3186                  *      as long as vm_page_free_wanted is non-zero.
3187                  */
3188
3189                 assert(vm_page_free_count > 0);
3190                 if (vm_page_free_wanted_privileged > 0) {
3191                         vm_page_free_wanted_privileged--;
3192                         need_priv_wakeup = 1;
3193 #if CONFIG_SECLUDED_MEMORY
3194                 } else if (vm_page_free_wanted_secluded > 0 &&
3195                            vm_page_free_count > vm_page_free_reserved) {
3196                         vm_page_free_wanted_secluded--;
3197                         need_secluded_wakeup = 1;
3198 #endif /* CONFIG_SECLUDED_MEMORY */
3199                 } else if (vm_page_free_wanted > 0 &&
3200                            vm_page_free_count > vm_page_free_reserved) {
3201                         vm_page_free_wanted--;
3202                         need_wakeup = 1;
3203                 }
3204         }
3205         lck_mtx_unlock(&vm_page_queue_free_lock);
3206
3207         if (need_priv_wakeup)
3208                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
3209 #if CONFIG_SECLUDED_MEMORY
3210         else if (need_secluded_wakeup)
3211                 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3212 #endif /* CONFIG_SECLUDED_MEMORY */
3213         else if (need_wakeup)
3214                 thread_wakeup_one((event_t) &vm_page_free_count);
3215
3216         VM_CHECK_MEMORYSTATUS;
3217 }
3218
3219 /*
3220  * This version of vm_page_release() is used only at startup
3221  * when we are single-threaded and pages are being released
3222  * for the first time. Hence, no locking or unnecessary checks are made.
3223  * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3224  */
3225 void
3226 vm_page_release_startup(
3227         vm_page_t       mem)
3228 {
3229         vm_page_queue_t queue_free;
3230
3231         if (vm_lopage_free_count < vm_lopage_free_limit &&
3232             VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3233                 mem->lopage = TRUE;
3234                 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3235                 vm_lopage_free_count++;
3236                 queue_free = &vm_lopage_queue_free;
3237 #if CONFIG_SECLUDED_MEMORY
3238         } else if (vm_page_secluded_count < vm_page_secluded_target) {
3239                 mem->lopage = FALSE;
3240                 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3241                 vm_page_secluded_count++;
3242                 vm_page_secluded_count_free++;
3243                 queue_free = &vm_page_queue_secluded;
3244 #endif /* CONFIG_SECLUDED_MEMORY */
3245         } else {
3246                 mem->lopage = FALSE;
3247                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3248                 vm_page_free_count++;
3249                 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
3250         }
3251         if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
3252 #if defined(__x86_64__)
3253                 vm_page_queue_enter_clump(queue_free, mem, vm_page_t, pageq);
3254 #else
3255                 vm_page_queue_enter(queue_free, mem, vm_page_t, pageq);
3256 #endif
3257         } else
3258                 vm_page_queue_enter_first(queue_free, mem, vm_page_t, pageq);
3259 }
3260
3261 /*
3262  *      vm_page_wait:
3263  *
3264  *      Wait for a page to become available.
3265  *      If there are plenty of free pages, then we don't sleep.
3266  *
3267  *      Returns:
3268  *              TRUE:  There may be another page, try again
3269  *              FALSE: We were interrupted out of our wait, don't try again
3270  */
3271
3272 boolean_t
3273 vm_page_wait(
3274         int     interruptible )
3275 {
3276         /*
3277          *      We can't use vm_page_free_reserved to make this
3278          *      determination.  Consider: some thread might
3279          *      need to allocate two pages.  The first allocation
3280          *      succeeds, the second fails.  After the first page is freed,
3281          *      a call to vm_page_wait must really block.
3282          */
3283         kern_return_t   wait_result;
3284         int             need_wakeup = 0;
3285         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3286
3287         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3288
3289         if (is_privileged && vm_page_free_count) {
3290                 lck_mtx_unlock(&vm_page_queue_free_lock);
3291                 return TRUE;
3292         }
3293
3294         if (vm_page_free_count >= vm_page_free_target) {
3295                 lck_mtx_unlock(&vm_page_queue_free_lock);
3296                 return TRUE;
3297         }
3298
3299         if (is_privileged) {
3300                 if (vm_page_free_wanted_privileged++ == 0)
3301                         need_wakeup = 1;
3302                 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3303 #if CONFIG_SECLUDED_MEMORY
3304         } else if (secluded_for_apps &&
3305                    task_can_use_secluded_mem(current_task())) {
3306 #if 00
3307                 /* XXX FBDP: need pageq lock for this... */
3308                 /* XXX FBDP: might wait even if pages available, */
3309                 /* XXX FBDP: hopefully not for too long... */
3310                 if (vm_page_secluded_count > 0) {
3311                         lck_mtx_unlock(&vm_page_queue_free_lock);
3312                         return TRUE;
3313                 }
3314 #endif
3315                 if (vm_page_free_wanted_secluded++ == 0) {
3316                         need_wakeup = 1;
3317                 }
3318                 wait_result = assert_wait(
3319                         (event_t)&vm_page_free_wanted_secluded,
3320                         interruptible);
3321 #endif /* CONFIG_SECLUDED_MEMORY */
3322         } else {
3323                 if (vm_page_free_wanted++ == 0)
3324                         need_wakeup = 1;
3325                 wait_result = assert_wait((event_t)&vm_page_free_count,
3326                                           interruptible);
3327         }
3328         lck_mtx_unlock(&vm_page_queue_free_lock);
3329         counter(c_vm_page_wait_block++);
3330
3331         if (need_wakeup)
3332                 thread_wakeup((event_t)&vm_page_free_wanted);
3333
3334         if (wait_result == THREAD_WAITING) {
3335                 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3336                                vm_page_free_wanted_privileged,
3337                                vm_page_free_wanted,
3338 #if CONFIG_SECLUDED_MEMORY
3339                                vm_page_free_wanted_secluded,
3340 #else /* CONFIG_SECLUDED_MEMORY */
3341                                0,
3342 #endif /* CONFIG_SECLUDED_MEMORY */
3343                                0);
3344                 wait_result = thread_block(THREAD_CONTINUE_NULL);
3345                 VM_DEBUG_EVENT(vm_page_wait_block,
3346                                VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3347         }
3348
3349         return (wait_result == THREAD_AWAKENED);
3350 }
3351
3352 /*
3353  *      vm_page_alloc:
3354  *
3355  *      Allocate and return a memory cell associated
3356  *      with this VM object/offset pair.
3357  *
3358  *      Object must be locked.
3359  */
3360
3361 vm_page_t
3362 vm_page_alloc(
3363         vm_object_t             object,
3364         vm_object_offset_t      offset)
3365 {
3366         vm_page_t       mem;
3367         int             grab_options;
3368
3369         vm_object_lock_assert_exclusive(object);
3370         grab_options = 0;
3371 #if CONFIG_SECLUDED_MEMORY
3372         if (object->can_grab_secluded) {
3373                 grab_options |= VM_PAGE_GRAB_SECLUDED;
3374         }
3375 #endif /* CONFIG_SECLUDED_MEMORY */
3376         mem = vm_page_grab_options(grab_options);
3377         if (mem == VM_PAGE_NULL)
3378                 return VM_PAGE_NULL;
3379
3380         vm_page_insert(mem, object, offset);
3381
3382         return(mem);
3383 }
3384
3385 /*
3386  *      vm_page_alloc_guard:
3387  *
3388  *      Allocate a fictitious page which will be used
3389  *      as a guard page.  The page will be inserted into
3390  *      the object and returned to the caller.
3391  */
3392
3393 vm_page_t
3394 vm_page_alloc_guard(
3395         vm_object_t             object,
3396         vm_object_offset_t      offset)
3397 {
3398         vm_page_t       mem;
3399
3400         vm_object_lock_assert_exclusive(object);
3401         mem = vm_page_grab_guard();
3402         if (mem == VM_PAGE_NULL)
3403                 return VM_PAGE_NULL;
3404
3405         vm_page_insert(mem, object, offset);
3406
3407         return(mem);
3408 }
3409
3410
3411 counter(unsigned int c_laundry_pages_freed = 0;)
3412
3413 /*
3414  *      vm_page_free_prepare:
3415  *
3416  *      Removes page from any queue it may be on
3417  *      and disassociates it from its VM object.
3418  *
3419  *      Object and page queues must be locked prior to entry.
3420  */
3421 static void
3422 vm_page_free_prepare(
3423         vm_page_t       mem)
3424 {
3425         vm_page_free_prepare_queues(mem);
3426         vm_page_free_prepare_object(mem, TRUE);
3427 }
3428
3429
3430 void
3431 vm_page_free_prepare_queues(
3432         vm_page_t       mem)
3433 {
3434         vm_object_t     m_object;
3435
3436         VM_PAGE_CHECK(mem);
3437
3438         assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
3439         assert(!mem->cleaning);
3440         m_object = VM_PAGE_OBJECT(mem);
3441
3442         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3443         if (m_object) {
3444                 vm_object_lock_assert_exclusive(m_object);
3445         }
3446         if (mem->laundry) {
3447                 /*
3448                  * We may have to free a page while it's being laundered
3449                  * if we lost its pager (due to a forced unmount, for example).
3450                  * We need to call vm_pageout_steal_laundry() before removing
3451                  * the page from its VM object, so that we can remove it
3452                  * from its pageout queue and adjust the laundry accounting
3453                  */
3454                 vm_pageout_steal_laundry(mem, TRUE);
3455                 counter(++c_laundry_pages_freed);
3456         }
3457
3458         vm_page_queues_remove(mem, TRUE);
3459
3460         if (VM_PAGE_WIRED(mem)) {
3461                 assert(mem->wire_count > 0);
3462
3463                 if (m_object) {
3464
3465                         VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3466                         VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3467                         VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3468
3469                         assert(m_object->resident_page_count >=
3470                                m_object->wired_page_count);
3471
3472                         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3473                                 OSAddAtomic(+1, &vm_page_purgeable_count);
3474                                 assert(vm_page_purgeable_wired_count > 0);
3475                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3476                         }
3477                         if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3478                              m_object->purgable == VM_PURGABLE_EMPTY) &&
3479                             m_object->vo_purgeable_owner != TASK_NULL) {
3480                                 task_t owner;
3481
3482                                 owner = m_object->vo_purgeable_owner;
3483                                 /*
3484                                  * While wired, this page was accounted
3485                                  * as "non-volatile" but it should now
3486                                  * be accounted as "volatile".
3487                                  */
3488                                 /* one less "non-volatile"... */
3489                                 ledger_debit(owner->ledger,
3490                                              task_ledgers.purgeable_nonvolatile,
3491                                              PAGE_SIZE);
3492                                 /* ... and "phys_footprint" */
3493                                 ledger_debit(owner->ledger,
3494                                              task_ledgers.phys_footprint,
3495                                              PAGE_SIZE);
3496                                 /* one more "volatile" */
3497                                 ledger_credit(owner->ledger,
3498                                               task_ledgers.purgeable_volatile,
3499                                               PAGE_SIZE);
3500                         }
3501                 }
3502                 if (!mem->private && !mem->fictitious)
3503                         vm_page_wire_count--;
3504
3505                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3506                 mem->wire_count = 0;
3507                 assert(!mem->gobbled);
3508         } else if (mem->gobbled) {
3509                 if (!mem->private && !mem->fictitious)
3510                         vm_page_wire_count--;
3511                 vm_page_gobble_count--;
3512         }
3513 }
3514
3515
3516 void
3517 vm_page_free_prepare_object(
3518         vm_page_t       mem,
3519         boolean_t       remove_from_hash)
3520 {
3521         if (mem->tabled)
3522                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
3523
3524         PAGE_WAKEUP(mem);               /* clears wanted */
3525
3526         if (mem->private) {
3527                 mem->private = FALSE;
3528                 mem->fictitious = TRUE;
3529                 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
3530         }
3531         if ( !mem->fictitious) {
3532                 assert(mem->pageq.next == 0);
3533                 assert(mem->pageq.prev == 0);
3534                 assert(mem->listq.next == 0);
3535                 assert(mem->listq.prev == 0);
3536 #if CONFIG_BACKGROUND_QUEUE
3537                 assert(mem->vm_page_backgroundq.next == 0);
3538                 assert(mem->vm_page_backgroundq.prev == 0);
3539 #endif /* CONFIG_BACKGROUND_QUEUE */
3540                 assert(mem->next_m == 0);
3541                 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->lopage);
3542         }
3543 }
3544
3545
3546 /*
3547  *      vm_page_free:
3548  *
3549  *      Returns the given page to the free list,
3550  *      disassociating it with any VM object.
3551  *
3552  *      Object and page queues must be locked prior to entry.
3553  */
3554 void
3555 vm_page_free(
3556         vm_page_t       mem)
3557 {
3558         vm_page_free_prepare(mem);
3559
3560         if (mem->fictitious) {
3561                 vm_page_release_fictitious(mem);
3562         } else {
3563                 vm_page_release(mem,
3564                                 TRUE); /* page queues are locked */
3565         }
3566 }
3567
3568
3569 void
3570 vm_page_free_unlocked(
3571         vm_page_t       mem,
3572         boolean_t       remove_from_hash)
3573 {
3574         vm_page_lockspin_queues();
3575         vm_page_free_prepare_queues(mem);
3576         vm_page_unlock_queues();
3577
3578         vm_page_free_prepare_object(mem, remove_from_hash);
3579
3580         if (mem->fictitious) {
3581                 vm_page_release_fictitious(mem);
3582         } else {
3583                 vm_page_release(mem, FALSE); /* page queues are not locked */
3584         }
3585 }
3586
3587
3588 /*
3589  * Free a list of pages.  The list can be up to several hundred pages,
3590  * as blocked up by vm_pageout_scan().
3591  * The big win is not having to take the free list lock once
3592  * per page.
3593  *
3594  * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3595  * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
3596  */
3597 void
3598 vm_page_free_list(
3599         vm_page_t       freeq,
3600         boolean_t       prepare_object)
3601 {
3602         vm_page_t       mem;
3603         vm_page_t       nxt;
3604         vm_page_t       local_freeq;
3605         int             pg_count;
3606
3607         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3608         LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
3609
3610         while (freeq) {
3611
3612                 pg_count = 0;
3613                 local_freeq = VM_PAGE_NULL;
3614                 mem = freeq;
3615
3616                 /*
3617                  * break up the processing into smaller chunks so
3618                  * that we can 'pipeline' the pages onto the
3619                  * free list w/o introducing too much
3620                  * contention on the global free queue lock
3621                  */
3622                 while (mem && pg_count < 64) {
3623
3624                         assert((mem->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
3625                                (mem->vm_page_q_state == VM_PAGE_IS_WIRED));
3626 #if CONFIG_BACKGROUND_QUEUE
3627                         assert(mem->vm_page_backgroundq.next == 0 &&
3628                                mem->vm_page_backgroundq.prev == 0 &&
3629                                mem->vm_page_on_backgroundq == FALSE);
3630 #endif
3631                         nxt = mem->snext;
3632                         mem->snext = NULL;
3633                         assert(mem->pageq.prev == 0);
3634
3635                         if (vm_page_free_verify && !mem->fictitious && !mem->private) {
3636                                 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3637                         }
3638                         if (prepare_object == TRUE)
3639                                 vm_page_free_prepare_object(mem, TRUE);
3640
3641                         if (!mem->fictitious) {
3642                                 assert(mem->busy);
3643
3644                                 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3645                                     vm_lopage_free_count < vm_lopage_free_limit &&
3646                                     VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3647                                         vm_page_release(mem, FALSE); /* page queues are not locked */
3648 #if CONFIG_SECLUDED_MEMORY
3649                                 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3650                                            num_tasks_can_use_secluded_mem == 0) {
3651                                         vm_page_release(mem,
3652                                                         FALSE); /* page queues are not locked */
3653 #endif /* CONFIG_SECLUDED_MEMORY */
3654                                 } else {
3655                                         /*
3656                                          * IMPORTANT: we can't set the page "free" here
3657                                          * because that would make the page eligible for
3658                                          * a physically-contiguous allocation (see
3659                                          * vm_page_find_contiguous()) right away (we don't
3660                                          * hold the vm_page_queue_free lock).  That would
3661                                          * cause trouble because the page is not actually
3662                                          * in the free queue yet...
3663                                          */
3664                                         mem->snext = local_freeq;
3665                                         local_freeq = mem;
3666                                         pg_count++;
3667
3668                                         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3669                                 }
3670                         } else {
3671                                 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3672                                        VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
3673                                 vm_page_release_fictitious(mem);
3674                         }
3675                         mem = nxt;
3676                 }
3677                 freeq = mem;
3678
3679                 if ( (mem = local_freeq) ) {
3680                         unsigned int    avail_free_count;
3681                         unsigned int    need_wakeup = 0;
3682                         unsigned int    need_priv_wakeup = 0;
3683 #if CONFIG_SECLUDED_MEMORY
3684                         unsigned int    need_wakeup_secluded = 0;
3685 #endif /* CONFIG_SECLUDED_MEMORY */
3686
3687                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3688
3689                         while (mem) {
3690                                 int     color;
3691
3692                                 nxt = mem->snext;
3693
3694                                 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3695                                 assert(mem->busy);
3696                                 mem->lopage = FALSE;
3697                                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3698
3699                                 color = VM_PAGE_GET_COLOR(mem);
3700 #if defined(__x86_64__)
3701                                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3702                                                           mem,
3703                                                           vm_page_t,
3704                                                           pageq);
3705 #else
3706                                 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3707                                                           mem,
3708                                                           vm_page_t,
3709                                                           pageq);
3710 #endif
3711                                 mem = nxt;
3712                         }
3713                         vm_page_free_count += pg_count;
3714                         avail_free_count = vm_page_free_count;
3715
3716                         if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3717
3718                                 if (avail_free_count < vm_page_free_wanted_privileged) {
3719                                         need_priv_wakeup = avail_free_count;
3720                                         vm_page_free_wanted_privileged -= avail_free_count;
3721                                         avail_free_count = 0;
3722                                 } else {
3723                                         need_priv_wakeup = vm_page_free_wanted_privileged;
3724                                         avail_free_count -= vm_page_free_wanted_privileged;
3725                                         vm_page_free_wanted_privileged = 0;
3726                                 }
3727                         }
3728 #if CONFIG_SECLUDED_MEMORY
3729                         if (vm_page_free_wanted_secluded > 0 &&
3730                             avail_free_count > vm_page_free_reserved) {
3731                                 unsigned int available_pages;
3732                                 available_pages = (avail_free_count -
3733                                                    vm_page_free_reserved);
3734                                 if (available_pages <
3735                                     vm_page_free_wanted_secluded) {
3736                                         need_wakeup_secluded = available_pages;
3737                                         vm_page_free_wanted_secluded -=
3738                                                 available_pages;
3739                                         avail_free_count -= available_pages;
3740                                 } else {
3741                                         need_wakeup_secluded =
3742                                                 vm_page_free_wanted_secluded;
3743                                         avail_free_count -=
3744                                                 vm_page_free_wanted_secluded;
3745                                         vm_page_free_wanted_secluded = 0;
3746                                 }
3747                         }
3748 #endif /* CONFIG_SECLUDED_MEMORY */
3749                         if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3750                                 unsigned int  available_pages;
3751
3752                                 available_pages = avail_free_count - vm_page_free_reserved;
3753
3754                                 if (available_pages >= vm_page_free_wanted) {
3755                                         need_wakeup = vm_page_free_wanted;
3756                                         vm_page_free_wanted = 0;
3757                                 } else {
3758                                         need_wakeup = available_pages;
3759                                         vm_page_free_wanted -= available_pages;
3760                                 }
3761                         }
3762                         lck_mtx_unlock(&vm_page_queue_free_lock);
3763
3764                         if (need_priv_wakeup != 0) {
3765                                 /*
3766                                  * There shouldn't be that many VM-privileged threads,
3767                                  * so let's wake them all up, even if we don't quite
3768                                  * have enough pages to satisfy them all.
3769                                  */
3770                                 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3771                         }
3772 #if CONFIG_SECLUDED_MEMORY
3773                         if (need_wakeup_secluded != 0 &&
3774                             vm_page_free_wanted_secluded == 0) {
3775                                 thread_wakeup((event_t)
3776                                               &vm_page_free_wanted_secluded);
3777                         } else {
3778                                 for (;
3779                                      need_wakeup_secluded != 0;
3780                                      need_wakeup_secluded--) {
3781                                         thread_wakeup_one(
3782                                                 (event_t)
3783                                                 &vm_page_free_wanted_secluded);
3784                                 }
3785                         }
3786 #endif /* CONFIG_SECLUDED_MEMORY */
3787                         if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3788                                 /*
3789                                  * We don't expect to have any more waiters
3790                                  * after this, so let's wake them all up at
3791                                  * once.
3792                                  */
3793                                 thread_wakeup((event_t) &vm_page_free_count);
3794                         } else for (; need_wakeup != 0; need_wakeup--) {
3795                                 /*
3796                                  * Wake up one waiter per page we just released.
3797                                  */
3798                                 thread_wakeup_one((event_t) &vm_page_free_count);
3799                         }
3800
3801                         VM_CHECK_MEMORYSTATUS;
3802                 }
3803         }
3804 }
3805
3806
3807 /*
3808  *      vm_page_wire:
3809  *
3810  *      Mark this page as wired down by yet
3811  *      another map, removing it from paging queues
3812  *      as necessary.
3813  *
3814  *      The page's object and the page queues must be locked.
3815  */
3816
3817
3818 void
3819 vm_page_wire(
3820         vm_page_t mem,
3821         vm_tag_t           tag,
3822         boolean_t          check_memorystatus)
3823 {
3824         vm_object_t     m_object;
3825
3826         m_object = VM_PAGE_OBJECT(mem);
3827
3828 //      dbgLog(current_thread(), mem->offset, m_object, 1);     /* (TEST/DEBUG) */
3829
3830         VM_PAGE_CHECK(mem);
3831         if (m_object) {
3832                 vm_object_lock_assert_exclusive(m_object);
3833         } else {
3834                 /*
3835                  * In theory, the page should be in an object before it
3836                  * gets wired, since we need to hold the object lock
3837                  * to update some fields in the page structure.
3838                  * However, some code (i386 pmap, for example) might want
3839                  * to wire a page before it gets inserted into an object.
3840                  * That's somewhat OK, as long as nobody else can get to
3841                  * that page and update it at the same time.
3842                  */
3843         }
3844         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3845         if ( !VM_PAGE_WIRED(mem)) {
3846
3847                 if (mem->laundry)
3848                         vm_pageout_steal_laundry(mem, TRUE);
3849
3850                 vm_page_queues_remove(mem, TRUE);
3851
3852                 assert(mem->wire_count == 0);
3853                 mem->vm_page_q_state = VM_PAGE_IS_WIRED;
3854
3855                 if (m_object) {
3856
3857                         VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3858                         VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
3859                         VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
3860
3861                         assert(m_object->resident_page_count >=
3862                                m_object->wired_page_count);
3863                         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3864                                 assert(vm_page_purgeable_count > 0);
3865                                 OSAddAtomic(-1, &vm_page_purgeable_count);
3866                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3867                         }
3868                         if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3869                              m_object->purgable == VM_PURGABLE_EMPTY) &&
3870                             m_object->vo_purgeable_owner != TASK_NULL) {
3871                                 task_t owner;
3872
3873                                 owner = m_object->vo_purgeable_owner;
3874                                 /* less volatile bytes */
3875                                 ledger_debit(owner->ledger,
3876                                              task_ledgers.purgeable_volatile,
3877                                              PAGE_SIZE);
3878                                 /* more not-quite-volatile bytes */
3879                                 ledger_credit(owner->ledger,
3880                                               task_ledgers.purgeable_nonvolatile,
3881                                               PAGE_SIZE);
3882                                 /* more footprint */
3883                                 ledger_credit(owner->ledger,
3884                                               task_ledgers.phys_footprint,
3885                                               PAGE_SIZE);
3886                         }
3887                         if (m_object->all_reusable) {
3888                                 /*
3889                                  * Wired pages are not counted as "re-usable"
3890                                  * in "all_reusable" VM objects, so nothing
3891                                  * to do here.
3892                                  */
3893                         } else if (mem->reusable) {
3894                                 /*
3895                                  * This page is not "re-usable" when it's
3896                                  * wired, so adjust its state and the
3897                                  * accounting.
3898                                  */
3899                                 vm_object_reuse_pages(m_object,
3900                                                       mem->offset,
3901                                                       mem->offset+PAGE_SIZE_64,
3902                                                       FALSE);
3903                         }
3904                 }
3905                 assert(!mem->reusable);
3906
3907                 if (!mem->private && !mem->fictitious && !mem->gobbled)
3908                         vm_page_wire_count++;
3909                 if (mem->gobbled)
3910                         vm_page_gobble_count--;
3911                 mem->gobbled = FALSE;
3912
3913                 if (check_memorystatus == TRUE) {
3914                         VM_CHECK_MEMORYSTATUS;
3915                 }
3916         }
3917         assert(!mem->gobbled);
3918         assert(mem->vm_page_q_state == VM_PAGE_IS_WIRED);
3919         mem->wire_count++;
3920         if (__improbable(mem->wire_count == 0)) {
3921                 panic("vm_page_wire(%p): wire_count overflow", mem);
3922         }
3923         VM_PAGE_CHECK(mem);
3924 }
3925
3926 /*
3927  *      vm_page_unwire:
3928  *
3929  *      Release one wiring of this page, potentially
3930  *      enabling it to be paged again.
3931  *
3932  *      The page's object and the page queues must be locked.
3933  */
3934 void
3935 vm_page_unwire(
3936         vm_page_t       mem,
3937         boolean_t       queueit)
3938 {
3939         vm_object_t     m_object;
3940
3941         m_object = VM_PAGE_OBJECT(mem);
3942
3943 //      dbgLog(current_thread(), mem->offset, m_object, 0);     /* (TEST/DEBUG) */
3944
3945         VM_PAGE_CHECK(mem);
3946         assert(VM_PAGE_WIRED(mem));
3947         assert(mem->wire_count > 0);
3948         assert(!mem->gobbled);
3949         assert(m_object != VM_OBJECT_NULL);
3950         vm_object_lock_assert_exclusive(m_object);
3951         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3952         if (--mem->wire_count == 0) {
3953
3954                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3955
3956                 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3957                 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3958                 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3959                 if (!mem->private && !mem->fictitious) {
3960                         vm_page_wire_count--;
3961                 }
3962
3963                 assert(m_object->resident_page_count >=
3964                        m_object->wired_page_count);
3965                 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3966                         OSAddAtomic(+1, &vm_page_purgeable_count);
3967                         assert(vm_page_purgeable_wired_count > 0);
3968                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3969                 }
3970                 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3971                      m_object->purgable == VM_PURGABLE_EMPTY) &&
3972                     m_object->vo_purgeable_owner != TASK_NULL) {
3973                         task_t owner;
3974
3975                         owner = m_object->vo_purgeable_owner;
3976                         /* more volatile bytes */
3977                         ledger_credit(owner->ledger,
3978                                       task_ledgers.purgeable_volatile,
3979                                       PAGE_SIZE);
3980                         /* less not-quite-volatile bytes */
3981                         ledger_debit(owner->ledger,
3982                                      task_ledgers.purgeable_nonvolatile,
3983                                      PAGE_SIZE);
3984                         /* less footprint */
3985                         ledger_debit(owner->ledger,
3986                                      task_ledgers.phys_footprint,
3987                                      PAGE_SIZE);
3988                 }
3989                 assert(m_object != kernel_object);
3990                 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3991
3992                 if (queueit == TRUE) {
3993                         if (m_object->purgable == VM_PURGABLE_EMPTY) {
3994                                 vm_page_deactivate(mem);
3995                         } else {
3996                                 vm_page_activate(mem);
3997                         }
3998                 }
3999
4000                 VM_CHECK_MEMORYSTATUS;
4001
4002         }
4003         VM_PAGE_CHECK(mem);
4004 }
4005
4006 /*
4007  *      vm_page_deactivate:
4008  *
4009  *      Returns the given page to the inactive list,
4010  *      indicating that no physical maps have access
4011  *      to this page.  [Used by the physical mapping system.]
4012  *
4013  *      The page queues must be locked.
4014  */
4015 void
4016 vm_page_deactivate(
4017         vm_page_t       m)
4018 {
4019         vm_page_deactivate_internal(m, TRUE);
4020 }
4021
4022
4023 void
4024 vm_page_deactivate_internal(
4025         vm_page_t       m,
4026         boolean_t       clear_hw_reference)
4027 {
4028         vm_object_t     m_object;
4029
4030         m_object = VM_PAGE_OBJECT(m);
4031
4032         VM_PAGE_CHECK(m);
4033         assert(m_object != kernel_object);
4034         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4035
4036 //      dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6);    /* (TEST/DEBUG) */
4037         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4038         /*
4039          *      This page is no longer very interesting.  If it was
4040          *      interesting (active or inactive/referenced), then we
4041          *      clear the reference bit and (re)enter it in the
4042          *      inactive queue.  Note wired pages should not have
4043          *      their reference bit cleared.
4044          */
4045         assert ( !(m->absent && !m->unusual));
4046
4047         if (m->gobbled) {               /* can this happen? */
4048                 assert( !VM_PAGE_WIRED(m));
4049
4050                 if (!m->private && !m->fictitious)
4051                         vm_page_wire_count--;
4052                 vm_page_gobble_count--;
4053                 m->gobbled = FALSE;
4054         }
4055         /*
4056          * if this page is currently on the pageout queue, we can't do the
4057          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4058          * and we can't remove it manually since we would need the object lock
4059          * (which is not required here) to decrement the activity_in_progress
4060          * reference which is held on the object while the page is in the pageout queue...
4061          * just let the normal laundry processing proceed
4062          */
4063         if (m->laundry || m->private || m->fictitious ||
4064             (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4065             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4066             VM_PAGE_WIRED(m)) {
4067                 return;
4068         }
4069         if (!m->absent && clear_hw_reference == TRUE)
4070                 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
4071
4072         m->reference = FALSE;
4073         m->no_cache = FALSE;
4074
4075         if ( !VM_PAGE_INACTIVE(m)) {
4076                 vm_page_queues_remove(m, FALSE);
4077
4078                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4079                     m->dirty && m_object->internal &&
4080                     (m_object->purgable == VM_PURGABLE_DENY ||
4081                      m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4082                      m_object->purgable == VM_PURGABLE_VOLATILE)) {
4083                         vm_page_check_pageable_safe(m);
4084                         vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4085                         m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4086                         vm_page_throttled_count++;
4087                 } else {
4088                         if (m_object->named && m_object->ref_count == 1) {
4089                                 vm_page_speculate(m, FALSE);
4090 #if DEVELOPMENT || DEBUG
4091                                 vm_page_speculative_recreated++;
4092 #endif
4093                         } else {
4094                                 vm_page_enqueue_inactive(m, FALSE);
4095                         }
4096                 }
4097         }
4098 }
4099
4100 /*
4101  * vm_page_enqueue_cleaned
4102  *
4103  * Put the page on the cleaned queue, mark it cleaned, etc.
4104  * Being on the cleaned queue (and having m->clean_queue set)
4105  * does ** NOT ** guarantee that the page is clean!
4106  *
4107  * Call with the queues lock held.
4108  */
4109
4110 void vm_page_enqueue_cleaned(vm_page_t m)
4111 {
4112         vm_object_t     m_object;
4113
4114         m_object = VM_PAGE_OBJECT(m);
4115
4116         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4117         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4118         assert( !(m->absent && !m->unusual));
4119
4120         if (VM_PAGE_WIRED(m)) {
4121                 return;
4122         }
4123
4124         if (m->gobbled) {
4125                 if (!m->private && !m->fictitious)
4126                         vm_page_wire_count--;
4127                 vm_page_gobble_count--;
4128                 m->gobbled = FALSE;
4129         }
4130         /*
4131          * if this page is currently on the pageout queue, we can't do the
4132          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4133          * and we can't remove it manually since we would need the object lock
4134          * (which is not required here) to decrement the activity_in_progress
4135          * reference which is held on the object while the page is in the pageout queue...
4136          * just let the normal laundry processing proceed
4137          */
4138         if (m->laundry || m->private || m->fictitious ||
4139             (m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4140             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4141                     return;
4142         }
4143         vm_page_queues_remove(m, FALSE);
4144
4145         vm_page_check_pageable_safe(m);
4146         vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
4147         m->vm_page_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
4148         vm_page_cleaned_count++;
4149
4150         vm_page_inactive_count++;
4151         if (m_object->internal) {
4152                 vm_page_pageable_internal_count++;
4153         } else {
4154                 vm_page_pageable_external_count++;
4155         }
4156 #if CONFIG_BACKGROUND_QUEUE
4157         if (m->vm_page_in_background)
4158                 vm_page_add_to_backgroundq(m, TRUE);
4159 #endif
4160         vm_pageout_enqueued_cleaned++;
4161 }
4162
4163 /*
4164  *      vm_page_activate:
4165  *
4166  *      Put the specified page on the active list (if appropriate).
4167  *
4168  *      The page queues must be locked.
4169  */
4170
4171 void
4172 vm_page_activate(
4173         vm_page_t       m)
4174 {
4175         vm_object_t     m_object;
4176
4177         m_object = VM_PAGE_OBJECT(m);
4178
4179         VM_PAGE_CHECK(m);
4180 #ifdef  FIXME_4778297
4181         assert(m_object != kernel_object);
4182 #endif
4183         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4184         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4185         assert( !(m->absent && !m->unusual));
4186
4187         if (m->gobbled) {
4188                 assert( !VM_PAGE_WIRED(m));
4189                 if (!m->private && !m->fictitious)
4190                         vm_page_wire_count--;
4191                 vm_page_gobble_count--;
4192                 m->gobbled = FALSE;
4193         }
4194         /*
4195          * if this page is currently on the pageout queue, we can't do the
4196          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4197          * and we can't remove it manually since we would need the object lock
4198          * (which is not required here) to decrement the activity_in_progress
4199          * reference which is held on the object while the page is in the pageout queue...
4200          * just let the normal laundry processing proceed
4201          */
4202         if (m->laundry || m->private || m->fictitious ||
4203             (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4204             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4205                 return;
4206
4207 #if DEBUG
4208         if (m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q)
4209                 panic("vm_page_activate: already active");
4210 #endif
4211
4212         if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
4213                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4214                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4215         }
4216
4217         vm_page_queues_remove(m, FALSE);
4218
4219         if ( !VM_PAGE_WIRED(m)) {
4220                 vm_page_check_pageable_safe(m);
4221                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4222                     m->dirty && m_object->internal &&
4223                     (m_object->purgable == VM_PURGABLE_DENY ||
4224                      m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4225                      m_object->purgable == VM_PURGABLE_VOLATILE)) {
4226                         vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4227                         m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4228                         vm_page_throttled_count++;
4229                 } else {
4230 #if CONFIG_SECLUDED_MEMORY
4231                         if (secluded_for_filecache &&
4232                             vm_page_secluded_target != 0 &&
4233                             num_tasks_can_use_secluded_mem == 0 &&
4234                             m_object->eligible_for_secluded) {
4235                                 vm_page_queue_enter(&vm_page_queue_secluded, m,
4236                                                     vm_page_t, pageq);
4237                                 m->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
4238                                 vm_page_secluded_count++;
4239                                 vm_page_secluded_count_inuse++;
4240                                 assert(!m_object->internal);
4241 //                              vm_page_pageable_external_count++;
4242                         } else
4243 #endif /* CONFIG_SECLUDED_MEMORY */
4244                         vm_page_enqueue_active(m, FALSE);
4245                 }
4246                 m->reference = TRUE;
4247                 m->no_cache = FALSE;
4248         }
4249         VM_PAGE_CHECK(m);
4250 }
4251
4252
4253 /*
4254  *      vm_page_speculate:
4255  *
4256  *      Put the specified page on the speculative list (if appropriate).
4257  *
4258  *      The page queues must be locked.
4259  */
4260 void
4261 vm_page_speculate(
4262         vm_page_t       m,
4263         boolean_t       new)
4264 {
4265         struct vm_speculative_age_q     *aq;
4266         vm_object_t     m_object;
4267
4268         m_object = VM_PAGE_OBJECT(m);
4269
4270         VM_PAGE_CHECK(m);
4271         vm_page_check_pageable_safe(m);
4272
4273         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4274         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4275         assert( !(m->absent && !m->unusual));
4276         assert(m_object->internal == FALSE);
4277
4278         /*
4279          * if this page is currently on the pageout queue, we can't do the
4280          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4281          * and we can't remove it manually since we would need the object lock
4282          * (which is not required here) to decrement the activity_in_progress
4283          * reference which is held on the object while the page is in the pageout queue...
4284          * just let the normal laundry processing proceed
4285          */
4286         if (m->laundry || m->private || m->fictitious ||
4287             (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4288             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4289                 return;
4290
4291         vm_page_queues_remove(m, FALSE);
4292
4293         if ( !VM_PAGE_WIRED(m)) {
4294                 mach_timespec_t         ts;
4295                 clock_sec_t sec;
4296                 clock_nsec_t nsec;
4297
4298                 clock_get_system_nanotime(&sec, &nsec);
4299                 ts.tv_sec = (unsigned int) sec;
4300                 ts.tv_nsec = nsec;
4301
4302                 if (vm_page_speculative_count == 0) {
4303
4304                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4305                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4306
4307                         aq = &vm_page_queue_speculative[speculative_age_index];
4308
4309                         /*
4310                          * set the timer to begin a new group
4311                          */
4312                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4313                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4314
4315                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4316                 } else {
4317                         aq = &vm_page_queue_speculative[speculative_age_index];
4318
4319                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4320
4321                                 speculative_age_index++;
4322
4323                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4324                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4325                                 if (speculative_age_index == speculative_steal_index) {
4326                                         speculative_steal_index = speculative_age_index + 1;
4327
4328                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4329                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4330                                 }
4331                                 aq = &vm_page_queue_speculative[speculative_age_index];
4332
4333                                 if (!vm_page_queue_empty(&aq->age_q))
4334                                         vm_page_speculate_ageit(aq);
4335
4336                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4337                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4338
4339                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4340                         }
4341                 }
4342                 vm_page_enqueue_tail(&aq->age_q, &m->pageq);
4343                 m->vm_page_q_state = VM_PAGE_ON_SPECULATIVE_Q;
4344                 vm_page_speculative_count++;
4345                 vm_page_pageable_external_count++;
4346
4347                 if (new == TRUE) {
4348                         vm_object_lock_assert_exclusive(m_object);
4349
4350                         m_object->pages_created++;
4351 #if DEVELOPMENT || DEBUG
4352                         vm_page_speculative_created++;
4353 #endif
4354                 }
4355         }
4356         VM_PAGE_CHECK(m);
4357 }
4358
4359
4360 /*
4361  * move pages from the specified aging bin to
4362  * the speculative bin that pageout_scan claims from
4363  *
4364  *      The page queues must be locked.
4365  */
4366 void
4367 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4368 {
4369         struct vm_speculative_age_q     *sq;
4370         vm_page_t       t;
4371
4372         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4373
4374         if (vm_page_queue_empty(&sq->age_q)) {
4375                 sq->age_q.next = aq->age_q.next;
4376                 sq->age_q.prev = aq->age_q.prev;
4377
4378                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
4379                 t->pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
4380
4381                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4382                 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4383         } else {
4384                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4385                 t->pageq.next = aq->age_q.next;
4386
4387                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
4388                 t->pageq.prev = sq->age_q.prev;
4389
4390                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
4391                 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4392
4393                 sq->age_q.prev = aq->age_q.prev;
4394         }
4395         vm_page_queue_init(&aq->age_q);
4396 }
4397
4398
4399 void
4400 vm_page_lru(
4401         vm_page_t       m)
4402 {
4403         VM_PAGE_CHECK(m);
4404         assert(VM_PAGE_OBJECT(m) != kernel_object);
4405         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4406
4407         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4408         /*
4409          * if this page is currently on the pageout queue, we can't do the
4410          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4411          * and we can't remove it manually since we would need the object lock
4412          * (which is not required here) to decrement the activity_in_progress
4413          * reference which is held on the object while the page is in the pageout queue...
4414          * just let the normal laundry processing proceed
4415          */
4416         if (m->laundry || m->private ||
4417             (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4418             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4419             VM_PAGE_WIRED(m))
4420                 return;
4421
4422         m->no_cache = FALSE;
4423
4424         vm_page_queues_remove(m, FALSE);
4425
4426         vm_page_enqueue_inactive(m, FALSE);
4427 }
4428
4429
4430 void
4431 vm_page_reactivate_all_throttled(void)
4432 {
4433         vm_page_t       first_throttled, last_throttled;
4434         vm_page_t       first_active;
4435         vm_page_t       m;
4436         int             extra_active_count;
4437         int             extra_internal_count, extra_external_count;
4438         vm_object_t     m_object;
4439
4440         if (!VM_DYNAMIC_PAGING_ENABLED())
4441                 return;
4442
4443         extra_active_count = 0;
4444         extra_internal_count = 0;
4445         extra_external_count = 0;
4446         vm_page_lock_queues();
4447         if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
4448                 /*
4449                  * Switch "throttled" pages to "active".
4450                  */
4451                 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
4452                         VM_PAGE_CHECK(m);
4453                         assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
4454
4455                         m_object = VM_PAGE_OBJECT(m);
4456
4457                         extra_active_count++;
4458                         if (m_object->internal) {
4459                                 extra_internal_count++;
4460                         } else {
4461                                 extra_external_count++;
4462                         }
4463
4464                         m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4465                         VM_PAGE_CHECK(m);
4466 #if CONFIG_BACKGROUND_QUEUE
4467                         if (m->vm_page_in_background)
4468                                 vm_page_add_to_backgroundq(m, FALSE);
4469 #endif
4470                 }
4471
4472                 /*
4473                  * Transfer the entire throttled queue to a regular LRU page queues.
4474                  * We insert it at the head of the active queue, so that these pages
4475                  * get re-evaluated by the LRU algorithm first, since they've been
4476                  * completely out of it until now.
4477                  */
4478                 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4479                 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4480                 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4481                 if (vm_page_queue_empty(&vm_page_queue_active)) {
4482                         vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4483                 } else {
4484                         first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4485                 }
4486                 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
4487                 first_throttled->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4488                 last_throttled->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4489
4490 #if DEBUG
4491                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4492 #endif
4493                 vm_page_queue_init(&vm_page_queue_throttled);
4494                 /*
4495                  * Adjust the global page counts.
4496                  */
4497                 vm_page_active_count += extra_active_count;
4498                 vm_page_pageable_internal_count += extra_internal_count;
4499                 vm_page_pageable_external_count += extra_external_count;
4500                 vm_page_throttled_count = 0;
4501         }
4502         assert(vm_page_throttled_count == 0);
4503         assert(vm_page_queue_empty(&vm_page_queue_throttled));
4504         vm_page_unlock_queues();
4505 }
4506
4507
4508 /*
4509  * move pages from the indicated local queue to the global active queue
4510  * its ok to fail if we're below the hard limit and force == FALSE
4511  * the nolocks == TRUE case is to allow this function to be run on
4512  * the hibernate path
4513  */
4514
4515 void
4516 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4517 {
4518         struct vpl      *lq;
4519         vm_page_t       first_local, last_local;
4520         vm_page_t       first_active;
4521         vm_page_t       m;
4522         uint32_t        count = 0;
4523
4524         if (vm_page_local_q == NULL)
4525                 return;
4526
4527         lq = &vm_page_local_q[lid].vpl_un.vpl;
4528
4529         if (nolocks == FALSE) {
4530                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4531                         if ( !vm_page_trylockspin_queues())
4532                                 return;
4533                 } else
4534                         vm_page_lockspin_queues();
4535
4536                 VPL_LOCK(&lq->vpl_lock);
4537         }
4538         if (lq->vpl_count) {
4539                 /*
4540                  * Switch "local" pages to "active".
4541                  */
4542                 assert(!vm_page_queue_empty(&lq->vpl_queue));
4543
4544                 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
4545                         VM_PAGE_CHECK(m);
4546                         vm_page_check_pageable_safe(m);
4547                         assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
4548                         assert(!m->fictitious);
4549
4550                         if (m->local_id != lid)
4551                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4552
4553                         m->local_id = 0;
4554                         m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4555                         VM_PAGE_CHECK(m);
4556 #if CONFIG_BACKGROUND_QUEUE
4557                         if (m->vm_page_in_background)
4558                                 vm_page_add_to_backgroundq(m, FALSE);
4559 #endif
4560                         count++;
4561                 }
4562                 if (count != lq->vpl_count)
4563                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4564
4565                 /*
4566                  * Transfer the entire local queue to a regular LRU page queues.
4567                  */
4568                 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4569                 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4570                 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4571
4572                 if (vm_page_queue_empty(&vm_page_queue_active)) {
4573                         vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4574                 } else {
4575                         first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4576                 }
4577                 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
4578                 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4579                 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4580
4581                 vm_page_queue_init(&lq->vpl_queue);
4582                 /*
4583                  * Adjust the global page counts.
4584                  */
4585                 vm_page_active_count += lq->vpl_count;
4586                 vm_page_pageable_internal_count += lq->vpl_internal_count;
4587                 vm_page_pageable_external_count += lq->vpl_external_count;
4588                 lq->vpl_count = 0;
4589                 lq->vpl_internal_count = 0;
4590                 lq->vpl_external_count = 0;
4591         }
4592         assert(vm_page_queue_empty(&lq->vpl_queue));
4593
4594         if (nolocks == FALSE) {
4595                 VPL_UNLOCK(&lq->vpl_lock);
4596                 vm_page_unlock_queues();
4597         }
4598 }
4599
4600 /*
4601  *      vm_page_part_zero_fill:
4602  *
4603  *      Zero-fill a part of the page.
4604  */
4605 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
4606 void
4607 vm_page_part_zero_fill(
4608         vm_page_t       m,
4609         vm_offset_t     m_pa,
4610         vm_size_t       len)
4611 {
4612
4613 #if 0
4614         /*
4615          * we don't hold the page queue lock
4616          * so this check isn't safe to make
4617          */
4618         VM_PAGE_CHECK(m);
4619 #endif
4620
4621 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
4622         pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
4623 #else
4624         vm_page_t       tmp;
4625         while (1) {
4626                 tmp = vm_page_grab();
4627                 if (tmp == VM_PAGE_NULL) {
4628                         vm_page_wait(THREAD_UNINT);
4629                         continue;
4630                 }
4631                 break;
4632         }
4633         vm_page_zero_fill(tmp);
4634         if(m_pa != 0) {
4635                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4636         }
4637         if((m_pa + len) <  PAGE_SIZE) {
4638                 vm_page_part_copy(m, m_pa + len, tmp,
4639                                 m_pa + len, PAGE_SIZE - (m_pa + len));
4640         }
4641         vm_page_copy(tmp,m);
4642         VM_PAGE_FREE(tmp);
4643 #endif
4644
4645 }
4646
4647 /*
4648  *      vm_page_zero_fill:
4649  *
4650  *      Zero-fill the specified page.
4651  */
4652 void
4653 vm_page_zero_fill(
4654         vm_page_t       m)
4655 {
4656         XPR(XPR_VM_PAGE,
4657             "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4658             VM_PAGE_OBJECT(m), m->offset, m, 0,0);
4659 #if 0
4660         /*
4661          * we don't hold the page queue lock
4662          * so this check isn't safe to make
4663          */
4664         VM_PAGE_CHECK(m);
4665 #endif
4666
4667 //      dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0);              /* (BRINGUP) */
4668         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
4669 }
4670
4671 /*
4672  *      vm_page_part_copy:
4673  *
4674  *      copy part of one page to another
4675  */
4676
4677 void
4678 vm_page_part_copy(
4679         vm_page_t       src_m,
4680         vm_offset_t     src_pa,
4681         vm_page_t       dst_m,
4682         vm_offset_t     dst_pa,
4683         vm_size_t       len)
4684 {
4685 #if 0
4686         /*
4687          * we don't hold the page queue lock
4688          * so this check isn't safe to make
4689          */
4690         VM_PAGE_CHECK(src_m);
4691         VM_PAGE_CHECK(dst_m);
4692 #endif
4693         pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4694                             VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
4695 }
4696
4697 /*
4698  *      vm_page_copy:
4699  *
4700  *      Copy one page to another
4701  */
4702
4703 int vm_page_copy_cs_validations = 0;
4704 int vm_page_copy_cs_tainted = 0;
4705
4706 void
4707 vm_page_copy(
4708         vm_page_t       src_m,
4709         vm_page_t       dest_m)
4710 {
4711         vm_object_t     src_m_object;
4712
4713         src_m_object = VM_PAGE_OBJECT(src_m);
4714
4715         XPR(XPR_VM_PAGE,
4716             "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4717             src_m_object, src_m->offset,
4718             VM_PAGE_OBJECT(dest_m), dest_m->offset,
4719             0);
4720 #if 0
4721         /*
4722          * we don't hold the page queue lock
4723          * so this check isn't safe to make
4724          */
4725         VM_PAGE_CHECK(src_m);
4726         VM_PAGE_CHECK(dest_m);
4727 #endif
4728         vm_object_lock_assert_held(src_m_object);
4729
4730         if (src_m_object != VM_OBJECT_NULL &&
4731             src_m_object->code_signed) {
4732                 /*
4733                  * We're copying a page from a code-signed object.
4734                  * Whoever ends up mapping the copy page might care about
4735                  * the original page's integrity, so let's validate the
4736                  * source page now.
4737                  */
4738                 vm_page_copy_cs_validations++;
4739                 vm_page_validate_cs(src_m);
4740 #if DEVELOPMENT || DEBUG
4741                 DTRACE_VM4(codesigned_copy,
4742                            vm_object_t, src_m_object,
4743                            vm_object_offset_t, src_m->offset,
4744                            int, src_m->cs_validated,
4745                            int, src_m->cs_tainted);
4746 #endif /* DEVELOPMENT || DEBUG */
4747
4748         }
4749
4750         if (vm_page_is_slideable(src_m)) {
4751                 boolean_t was_busy = src_m->busy;
4752                 src_m->busy = TRUE;
4753                 (void) vm_page_slide(src_m, 0);
4754                 assert(src_m->busy);
4755                 if (!was_busy) {
4756                         PAGE_WAKEUP_DONE(src_m);
4757                 }
4758         }
4759
4760         /*
4761          * Propagate the cs_tainted bit to the copy page. Do not propagate
4762          * the cs_validated bit.
4763          */
4764         dest_m->cs_tainted = src_m->cs_tainted;
4765         if (dest_m->cs_tainted) {
4766                 vm_page_copy_cs_tainted++;
4767         }
4768         dest_m->slid = src_m->slid;
4769         dest_m->error = src_m->error; /* sliding src_m might have failed... */
4770         pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
4771 }
4772
4773 #if MACH_ASSERT
4774 static void
4775 _vm_page_print(
4776         vm_page_t       p)
4777 {
4778         printf("vm_page %p: \n", p);
4779         printf("  pageq: next=%p prev=%p\n",
4780                (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.next),
4781                (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.prev));
4782         printf("  listq: next=%p prev=%p\n",
4783                (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.next)),
4784                (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.prev)));
4785         printf("  next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m)));
4786         printf("  object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->offset);
4787         printf("  wire_count=%u\n", p->wire_count);
4788         printf("  q_state=%u\n", p->vm_page_q_state);
4789
4790         printf("  %slaundry, %sref, %sgobbled, %sprivate\n",
4791                (p->laundry ? "" : "!"),
4792                (p->reference ? "" : "!"),
4793                (p->gobbled ? "" : "!"),
4794                (p->private ? "" : "!"));
4795         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
4796                (p->busy ? "" : "!"),
4797                (p->wanted ? "" : "!"),
4798                (p->tabled ? "" : "!"),
4799                (p->fictitious ? "" : "!"),
4800                (p->pmapped ? "" : "!"),
4801                (p->wpmapped ? "" : "!"));
4802         printf("  %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4803                (p->free_when_done ? "" : "!"),
4804                (p->absent ? "" : "!"),
4805                (p->error ? "" : "!"),
4806                (p->dirty ? "" : "!"),
4807                (p->cleaning ? "" : "!"),
4808                (p->precious ? "" : "!"),
4809                (p->clustered ? "" : "!"));
4810         printf("  %soverwriting, %srestart, %sunusual\n",
4811                (p->overwriting ? "" : "!"),
4812                (p->restart ? "" : "!"),
4813                (p->unusual ? "" : "!"));
4814         printf("  %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
4815                (p->cs_validated ? "" : "!"),
4816                (p->cs_tainted ? "" : "!"),
4817                (p->cs_nx ? "" : "!"),
4818                (p->no_cache ? "" : "!"));
4819
4820         printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
4821 }
4822
4823 /*
4824  *      Check that the list of pages is ordered by
4825  *      ascending physical address and has no holes.
4826  */
4827 static int
4828 vm_page_verify_contiguous(
4829         vm_page_t       pages,
4830         unsigned int    npages)
4831 {
4832         vm_page_t               m;
4833         unsigned int            page_count;
4834         vm_offset_t             prev_addr;
4835
4836         prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
4837         page_count = 1;
4838         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
4839                 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4840                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
4841                                m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
4842                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
4843                         panic("vm_page_verify_contiguous:  not contiguous!");
4844                 }
4845                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4846                 ++page_count;
4847         }
4848         if (page_count != npages) {
4849                 printf("pages %p actual count 0x%x but requested 0x%x\n",
4850                        pages, page_count, npages);
4851                 panic("vm_page_verify_contiguous:  count error");
4852         }
4853         return 1;
4854 }
4855
4856
4857 /*
4858  *      Check the free lists for proper length etc.
4859  */
4860 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
4861 static unsigned int
4862 vm_page_verify_free_list(
4863         vm_page_queue_head_t    *vm_page_queue,
4864         unsigned int    color,
4865         vm_page_t       look_for_page,
4866         boolean_t       expect_page)
4867 {
4868         unsigned int    npages;
4869         vm_page_t       m;
4870         vm_page_t       prev_m;
4871         boolean_t       found_page;
4872
4873         if (! vm_page_verify_this_free_list_enabled)
4874                 return 0;
4875
4876         found_page = FALSE;
4877         npages = 0;
4878         prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
4879
4880         vm_page_queue_iterate(vm_page_queue,
4881                               m,
4882                               vm_page_t,
4883                               pageq) {
4884
4885                 if (m == look_for_page) {
4886                         found_page = TRUE;
4887                 }
4888                 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev) != prev_m)
4889                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4890                               color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev), prev_m);
4891                 if ( ! m->busy )
4892                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4893                               color, npages, m);
4894                 if (color != (unsigned int) -1) {
4895                         if (VM_PAGE_GET_COLOR(m) != color)
4896                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4897                                       color, npages, m, VM_PAGE_GET_COLOR(m), color);
4898                         if (m->vm_page_q_state != VM_PAGE_ON_FREE_Q)
4899                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
4900                                       color, npages, m, m->vm_page_q_state);
4901                 } else {
4902                         if (m->vm_page_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
4903                                 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
4904                                       npages, m, m->vm_page_q_state);
4905                 }
4906                 ++npages;
4907                 prev_m = m;
4908         }
4909         if (look_for_page != VM_PAGE_NULL) {
4910                 unsigned int other_color;
4911
4912                 if (expect_page && !found_page) {
4913                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4914                                color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4915                         _vm_page_print(look_for_page);
4916                         for (other_color = 0;
4917                              other_color < vm_colors;
4918                              other_color++) {
4919                                 if (other_color == color)
4920                                         continue;
4921                                 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
4922                                                          other_color, look_for_page, FALSE);
4923                         }
4924                         if (color == (unsigned int) -1) {
4925                                 vm_page_verify_free_list(&vm_lopage_queue_free,
4926                                                          (unsigned int) -1, look_for_page, FALSE);
4927                         }
4928                         panic("vm_page_verify_free_list(color=%u)\n", color);
4929                 }
4930                 if (!expect_page && found_page) {
4931                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4932                                color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4933                 }
4934         }
4935         return npages;
4936 }
4937
4938 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
4939 static void
4940 vm_page_verify_free_lists( void )
4941 {
4942         unsigned int    color, npages, nlopages;
4943         boolean_t       toggle = TRUE;
4944
4945         if (! vm_page_verify_all_free_lists_enabled)
4946                 return;
4947
4948         npages = 0;
4949
4950         lck_mtx_lock(&vm_page_queue_free_lock);
4951
4952         if (vm_page_verify_this_free_list_enabled == TRUE) {
4953                 /*
4954                  * This variable has been set globally for extra checking of
4955                  * each free list Q. Since we didn't set it, we don't own it
4956                  * and we shouldn't toggle it.
4957                  */
4958                 toggle = FALSE;
4959         }
4960
4961         if (toggle == TRUE) {
4962                 vm_page_verify_this_free_list_enabled = TRUE;
4963         }
4964
4965         for( color = 0; color < vm_colors; color++ ) {
4966                 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
4967                                                    color, VM_PAGE_NULL, FALSE);
4968         }
4969         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4970                                             (unsigned int) -1,
4971                                             VM_PAGE_NULL, FALSE);
4972         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4973                 panic("vm_page_verify_free_lists:  "
4974                       "npages %u free_count %d nlopages %u lo_free_count %u",
4975                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
4976
4977         if (toggle == TRUE) {
4978                 vm_page_verify_this_free_list_enabled = FALSE;
4979         }
4980
4981         lck_mtx_unlock(&vm_page_queue_free_lock);
4982 }
4983
4984 #endif  /* MACH_ASSERT */
4985
4986
4987
4988 #if __arm64__
4989 /*
4990  *      1 or more clients (currently only SEP) ask for a large contiguous chunk of memory
4991  *      after the system has 'aged'. To ensure that other allocation requests don't mess
4992  *      with the chances of that request being satisfied, we pre-allocate a single contiguous
4993  *      10MB buffer and hand it out to the first request of >= 4MB.
4994  */
4995
4996 kern_return_t cpm_preallocate_early(void);
4997
4998 vm_page_t cpm_preallocated_pages_list = NULL;
4999 boolean_t preallocated_buffer_available = FALSE;
5000
5001 #define PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT                  ((10 * 1024 * 1024) / PAGE_SIZE_64) /* 10 MB */
5002 #define MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER        ((4 * 1024 *1024) / PAGE_SIZE_64)   /* 4 MB */
5003
5004 kern_return_t
5005 cpm_preallocate_early(void)
5006 {
5007
5008         kern_return_t   kr = KERN_SUCCESS;
5009         vm_map_size_t   prealloc_size = (PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT * PAGE_SIZE_64);
5010
5011         printf("cpm_preallocate_early called to preallocate contiguous buffer of %llu pages\n", PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT);
5012
5013         kr = cpm_allocate(CAST_DOWN(vm_size_t, prealloc_size), &cpm_preallocated_pages_list, 0, 0, TRUE, 0);
5014
5015         if (kr != KERN_SUCCESS) {
5016                 printf("cpm_allocate for preallocated contig buffer failed with %d.\n", kr);
5017         } else {
5018                 preallocated_buffer_available = TRUE;
5019         }
5020
5021         return kr;
5022 }
5023 #endif /* __arm64__ */
5024
5025
5026 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
5027
5028 /*
5029  *      CONTIGUOUS PAGE ALLOCATION
5030  *
5031  *      Find a region large enough to contain at least n pages
5032  *      of contiguous physical memory.
5033  *
5034  *      This is done by traversing the vm_page_t array in a linear fashion
5035  *      we assume that the vm_page_t array has the avaiable physical pages in an
5036  *      ordered, ascending list... this is currently true of all our implementations
5037  *      and must remain so... there can be 'holes' in the array...  we also can
5038  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5039  *      which use to happen via 'vm_page_convert'... that function was no longer
5040  *      being called and was removed...
5041  *
5042  *      The basic flow consists of stabilizing some of the interesting state of
5043  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5044  *      sweep at the beginning of the array looking for pages that meet our criterea
5045  *      for a 'stealable' page... currently we are pretty conservative... if the page
5046  *      meets this criterea and is physically contiguous to the previous page in the 'run'
5047  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
5048  *      and start to develop a new run... if at this point we've already considered
5049  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5050  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
5051  *      to other threads trying to acquire free pages (or move pages from q to q),
5052  *      and then continue from the spot we left off... we only make 1 pass through the
5053  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
5054  *      which steals the pages from the queues they're currently on... pages on the free
5055  *      queue can be stolen directly... pages that are on any of the other queues
5056  *      must be removed from the object they are tabled on... this requires taking the
5057  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5058  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
5059  *      dump the pages we've currently stolen back to the free list, and pick up our
5060  *      scan from the point where we aborted the 'current' run.
5061  *
5062  *
5063  *      Requirements:
5064  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
5065  *
5066  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5067  *
5068  * Algorithm:
5069  */
5070
5071 #define MAX_CONSIDERED_BEFORE_YIELD     1000
5072
5073
5074 #define RESET_STATE_OF_RUN()    \
5075         MACRO_BEGIN             \
5076         prevcontaddr = -2;      \
5077         start_pnum = -1;        \
5078         free_considered = 0;    \
5079         substitute_needed = 0;  \
5080         npages = 0;             \
5081         MACRO_END
5082
5083 /*
5084  * Can we steal in-use (i.e. not free) pages when searching for
5085  * physically-contiguous pages ?
5086  */
5087 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5088
5089 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
5090 #if DEBUG
5091 int vm_page_find_contig_debug = 0;
5092 #endif
5093
5094 static vm_page_t
5095 vm_page_find_contiguous(
5096         unsigned int    contig_pages,
5097         ppnum_t         max_pnum,
5098         ppnum_t     pnum_mask,
5099         boolean_t       wire,
5100         int             flags)
5101 {
5102         vm_page_t       m = NULL;
5103         ppnum_t         prevcontaddr = 0;
5104         ppnum_t         start_pnum = 0;
5105         unsigned int    npages = 0, considered = 0, scanned = 0;
5106         unsigned int    page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5107         unsigned int    idx_last_contig_page_found = 0;
5108         int             free_considered = 0, free_available = 0;
5109         int             substitute_needed = 0;
5110         boolean_t       wrapped, zone_gc_called = FALSE;
5111         kern_return_t   kr;
5112 #if DEBUG
5113         clock_sec_t     tv_start_sec = 0, tv_end_sec = 0;
5114         clock_usec_t    tv_start_usec = 0, tv_end_usec = 0;
5115 #endif
5116
5117         int             yielded = 0;
5118         int             dumped_run = 0;
5119         int             stolen_pages = 0;
5120         int             compressed_pages = 0;
5121
5122
5123         if (contig_pages == 0)
5124                 return VM_PAGE_NULL;
5125
5126 full_scan_again:
5127
5128 #if MACH_ASSERT
5129         vm_page_verify_free_lists();
5130 #endif
5131 #if DEBUG
5132         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5133 #endif
5134         PAGE_REPLACEMENT_ALLOWED(TRUE);
5135
5136         vm_page_lock_queues();
5137
5138 #if __arm64__
5139         if (preallocated_buffer_available) {
5140
5141                 if ((contig_pages >= MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER) && (contig_pages <= PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT)) {
5142
5143                         m = cpm_preallocated_pages_list;
5144
5145                         start_idx = (unsigned int) (m - &vm_pages[0]);
5146
5147                         if (wire == FALSE) {
5148
5149                                 last_idx = start_idx;
5150
5151                                 for(npages = 0; npages < contig_pages; npages++, last_idx++) {
5152
5153                                         assert(vm_pages[last_idx].gobbled == FALSE);
5154
5155                                         vm_pages[last_idx].gobbled = TRUE;
5156                                         vm_page_gobble_count++;
5157
5158                                         assert(1 == vm_pages[last_idx].wire_count);
5159                                         /*
5160                                          * Gobbled pages are counted as wired pages. So no need to drop
5161                                          * the global wired page count. Just the page's wire count is fine.
5162                                          */
5163                                         vm_pages[last_idx].wire_count--;
5164                                         vm_pages[last_idx].vm_page_q_state = VM_PAGE_NOT_ON_Q;
5165                                 }
5166
5167                         }
5168
5169                         last_idx = start_idx + contig_pages - 1;
5170
5171                         vm_pages[last_idx].snext = NULL;
5172
5173                         printf("Using preallocated buffer: Requested size (pages):%d... index range: %d-%d...freeing %llu pages\n", contig_pages, start_idx, last_idx, PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT - contig_pages);
5174
5175                         last_idx += 1;
5176                         for(npages = contig_pages; npages < PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT; npages++, last_idx++) {
5177
5178                                 VM_PAGE_ZERO_PAGEQ_ENTRY(&vm_pages[last_idx]);
5179                                 vm_page_free(&vm_pages[last_idx]);
5180                         }
5181
5182                         cpm_preallocated_pages_list = NULL;
5183                         preallocated_buffer_available = FALSE;
5184
5185                         goto done_scanning;
5186                 }
5187         }
5188 #endif /* __arm64__ */
5189
5190         lck_mtx_lock(&vm_page_queue_free_lock);
5191
5192         RESET_STATE_OF_RUN();
5193
5194         scanned = 0;
5195         considered = 0;
5196         free_available = vm_page_free_count - vm_page_free_reserved;
5197
5198         wrapped = FALSE;
5199
5200         if(flags & KMA_LOMEM)
5201                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
5202         else
5203                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
5204
5205         orig_last_idx = idx_last_contig_page_found;
5206         last_idx = orig_last_idx;
5207
5208         for (page_idx = last_idx, start_idx = last_idx;
5209              npages < contig_pages && page_idx < vm_pages_count;
5210              page_idx++) {
5211 retry:
5212                 if (wrapped &&
5213                     npages == 0 &&
5214                     page_idx >= orig_last_idx) {
5215                         /*
5216                          * We're back where we started and we haven't
5217                          * found any suitable contiguous range.  Let's
5218                          * give up.
5219                          */
5220                         break;
5221                 }
5222                 scanned++;
5223                 m = &vm_pages[page_idx];
5224
5225                 assert(!m->fictitious);
5226                 assert(!m->private);
5227
5228                 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
5229                         /* no more low pages... */
5230                         break;
5231                 }
5232                 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
5233                         /*
5234                          * not aligned
5235                          */
5236                         RESET_STATE_OF_RUN();
5237
5238                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
5239                            m->laundry || m->wanted ||
5240                            m->cleaning || m->overwriting || m->free_when_done) {
5241                         /*
5242                          * page is in a transient state
5243                          * or a state we don't want to deal
5244                          * with, so don't consider it which
5245                          * means starting a new run
5246                          */
5247                         RESET_STATE_OF_RUN();
5248
5249                 } else if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
5250                            (m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5251                            (m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5252                            (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5253                         /*
5254                          * page needs to be on one of our queues (other then the pageout or special free queues)
5255                          * or it needs to belong to the compressor pool (which is now indicated
5256                          * by vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5257                          * from the check for VM_PAGE_NOT_ON_Q)
5258                          * in order for it to be stable behind the
5259                          * locks we hold at this point...
5260                          * if not, don't consider it which
5261                          * means starting a new run
5262                          */
5263                         RESET_STATE_OF_RUN();
5264
5265                 } else if ((m->vm_page_q_state != VM_PAGE_ON_FREE_Q) && (!m->tabled || m->busy)) {
5266                         /*
5267                          * pages on the free list are always 'busy'
5268                          * so we couldn't test for 'busy' in the check
5269                          * for the transient states... pages that are
5270                          * 'free' are never 'tabled', so we also couldn't
5271                          * test for 'tabled'.  So we check here to make
5272                          * sure that a non-free page is not busy and is
5273                          * tabled on an object...
5274                          * if not, don't consider it which
5275                          * means starting a new run
5276                          */
5277                         RESET_STATE_OF_RUN();
5278
5279                 } else {
5280                         if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5281                                 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
5282                                         RESET_STATE_OF_RUN();
5283                                         goto did_consider;
5284                                 } else {
5285                                         npages = 1;
5286                                         start_idx = page_idx;
5287                                         start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
5288                                 }
5289                         } else {
5290                                 npages++;
5291                         }
5292                         prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
5293
5294                         VM_PAGE_CHECK(m);
5295                         if (m->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5296                                 free_considered++;
5297                         } else {
5298                                 /*
5299                                  * This page is not free.
5300                                  * If we can't steal used pages,
5301                                  * we have to give up this run
5302                                  * and keep looking.
5303                                  * Otherwise, we might need to
5304                                  * move the contents of this page
5305                                  * into a substitute page.
5306                                  */
5307 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5308                                 if (m->pmapped || m->dirty || m->precious) {
5309                                         substitute_needed++;
5310                                 }
5311 #else
5312                                 RESET_STATE_OF_RUN();
5313 #endif
5314                         }
5315
5316                         if ((free_considered + substitute_needed) > free_available) {
5317                                 /*
5318                                  * if we let this run continue
5319                                  * we will end up dropping the vm_page_free_count
5320                                  * below the reserve limit... we need to abort
5321                                  * this run, but we can at least re-consider this
5322                                  * page... thus the jump back to 'retry'
5323                                  */
5324                                 RESET_STATE_OF_RUN();
5325
5326                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5327                                         considered++;
5328                                         goto retry;
5329                                 }
5330                                 /*
5331                                  * free_available == 0
5332                                  * so can't consider any free pages... if
5333                                  * we went to retry in this case, we'd
5334                                  * get stuck looking at the same page
5335                                  * w/o making any forward progress
5336                                  * we also want to take this path if we've already
5337                                  * reached our limit that controls the lock latency
5338                                  */
5339                         }
5340                 }
5341 did_consider:
5342                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
5343
5344                         PAGE_REPLACEMENT_ALLOWED(FALSE);
5345
5346                         lck_mtx_unlock(&vm_page_queue_free_lock);
5347                         vm_page_unlock_queues();
5348
5349                         mutex_pause(0);
5350
5351                         PAGE_REPLACEMENT_ALLOWED(TRUE);
5352
5353                         vm_page_lock_queues();
5354                         lck_mtx_lock(&vm_page_queue_free_lock);
5355
5356                         RESET_STATE_OF_RUN();
5357                         /*
5358                          * reset our free page limit since we
5359                          * dropped the lock protecting the vm_page_free_queue
5360                          */
5361                         free_available = vm_page_free_count - vm_page_free_reserved;
5362                         considered = 0;
5363
5364                         yielded++;
5365
5366                         goto retry;
5367                 }
5368                 considered++;
5369         }
5370         m = VM_PAGE_NULL;
5371
5372         if (npages != contig_pages) {
5373                 if (!wrapped) {
5374                         /*
5375                          * We didn't find a contiguous range but we didn't
5376                          * start from the very first page.
5377                          * Start again from the very first page.
5378                          */
5379                         RESET_STATE_OF_RUN();
5380                         if( flags & KMA_LOMEM)
5381                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
5382                         else
5383                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5384                         last_idx = 0;
5385                         page_idx = last_idx;
5386                         wrapped = TRUE;
5387                         goto retry;
5388                 }
5389                 lck_mtx_unlock(&vm_page_queue_free_lock);
5390         } else {
5391                 vm_page_t       m1;
5392                 vm_page_t       m2;
5393                 unsigned int    cur_idx;
5394                 unsigned int    tmp_start_idx;
5395                 vm_object_t     locked_object = VM_OBJECT_NULL;
5396                 boolean_t       abort_run = FALSE;
5397
5398                 assert(page_idx - start_idx == contig_pages);
5399
5400                 tmp_start_idx = start_idx;
5401
5402                 /*
5403                  * first pass through to pull the free pages
5404                  * off of the free queue so that in case we
5405                  * need substitute pages, we won't grab any
5406                  * of the free pages in the run... we'll clear
5407                  * the 'free' bit in the 2nd pass, and even in
5408                  * an abort_run case, we'll collect all of the
5409                  * free pages in this run and return them to the free list
5410                  */
5411                 while (start_idx < page_idx) {
5412
5413                         m1 = &vm_pages[start_idx++];
5414
5415 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5416                         assert(m1->vm_page_q_state == VM_PAGE_ON_FREE_Q);
5417 #endif
5418
5419                         if (m1->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5420                                 unsigned int color;
5421
5422                                 color = VM_PAGE_GET_COLOR(m1);
5423 #if MACH_ASSERT
5424                                 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
5425 #endif
5426                                 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5427                                                      m1,
5428                                                      vm_page_t,
5429                                                      pageq);
5430
5431                                 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5432 #if MACH_ASSERT
5433                                 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
5434 #endif
5435                                 /*
5436                                  * Clear the "free" bit so that this page
5437                                  * does not get considered for another
5438                                  * concurrent physically-contiguous allocation.
5439                                  */
5440                                 m1->vm_page_q_state = VM_PAGE_NOT_ON_Q;
5441                                 assert(m1->busy);
5442
5443                                 vm_page_free_count--;
5444                         }
5445                 }
5446                 if( flags & KMA_LOMEM)
5447                         vm_page_lomem_find_contiguous_last_idx = page_idx;
5448                 else
5449                         vm_page_find_contiguous_last_idx = page_idx;
5450
5451                 /*
5452                  * we can drop the free queue lock at this point since
5453                  * we've pulled any 'free' candidates off of the list
5454                  * we need it dropped so that we can do a vm_page_grab
5455                  * when substituing for pmapped/dirty pages
5456                  */
5457                 lck_mtx_unlock(&vm_page_queue_free_lock);
5458
5459                 start_idx = tmp_start_idx;
5460                 cur_idx = page_idx - 1;
5461
5462                 while (start_idx++ < page_idx) {
5463                         /*
5464                          * must go through the list from back to front
5465                          * so that the page list is created in the
5466                          * correct order - low -> high phys addresses
5467                          */
5468                         m1 = &vm_pages[cur_idx--];
5469
5470                         if (m1->vm_page_object == 0) {
5471                                 /*
5472                                  * page has already been removed from
5473                                  * the free list in the 1st pass
5474                                  */
5475                                 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5476                                 assert(m1->offset == (vm_object_offset_t) -1);
5477                                 assert(m1->busy);
5478                                 assert(!m1->wanted);
5479                                 assert(!m1->laundry);
5480                         } else {
5481                                 vm_object_t object;
5482                                 int refmod;
5483                                 boolean_t disconnected, reusable;
5484
5485                                 if (abort_run == TRUE)
5486                                         continue;
5487
5488                                 assert(m1->vm_page_q_state != VM_PAGE_NOT_ON_Q);
5489
5490                                 object = VM_PAGE_OBJECT(m1);
5491
5492                                 if (object != locked_object) {
5493                                         if (locked_object) {
5494                                                 vm_object_unlock(locked_object);
5495                                                 locked_object = VM_OBJECT_NULL;
5496                                         }
5497                                         if (vm_object_lock_try(object))
5498                                                 locked_object = object;
5499                                 }
5500                                 if (locked_object == VM_OBJECT_NULL ||
5501                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
5502                                      m1->laundry || m1->wanted ||
5503                                      m1->cleaning || m1->overwriting || m1->free_when_done || m1->busy) ||
5504                                     (m1->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5505
5506                                         if (locked_object) {
5507                                                 vm_object_unlock(locked_object);
5508                                                 locked_object = VM_OBJECT_NULL;
5509                                         }
5510                                         tmp_start_idx = cur_idx;
5511                                         abort_run = TRUE;
5512                                         continue;
5513                                 }
5514
5515                                 disconnected = FALSE;
5516                                 reusable = FALSE;
5517
5518                                 if ((m1->reusable ||
5519                                      object->all_reusable) &&
5520                                     (m1->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
5521                                     !m1->dirty &&
5522                                     !m1->reference) {
5523                                         /* reusable page... */
5524                                         refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5525                                         disconnected = TRUE;
5526                                         if (refmod == 0) {
5527                                                 /*
5528                                                  * ... not reused: can steal
5529                                                  * without relocating contents.
5530                                                  */
5531                                                 reusable = TRUE;
5532                                         }
5533                                 }
5534
5535                                 if ((m1->pmapped &&
5536                                      ! reusable) ||
5537                                     m1->dirty ||
5538                                     m1->precious) {
5539                                         vm_object_offset_t offset;
5540
5541                                         m2 = vm_page_grab();
5542
5543                                         if (m2 == VM_PAGE_NULL) {
5544                                                 if (locked_object) {
5545                                                         vm_object_unlock(locked_object);
5546                                                         locked_object = VM_OBJECT_NULL;
5547                                                 }
5548                                                 tmp_start_idx = cur_idx;
5549                                                 abort_run = TRUE;
5550                                                 continue;
5551                                         }
5552                                         if (! disconnected) {
5553                                                 if (m1->pmapped)
5554                                                         refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5555                                                 else
5556                                                         refmod = 0;
5557                                         }
5558
5559                                         /* copy the page's contents */
5560                                         pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
5561                                         /* copy the page's state */
5562                                         assert(!VM_PAGE_WIRED(m1));
5563                                         assert(m1->vm_page_q_state != VM_PAGE_ON_FREE_Q);
5564                                         assert(m1->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q);
5565                                         assert(!m1->laundry);
5566                                         m2->reference   = m1->reference;
5567                                         assert(!m1->gobbled);
5568                                         assert(!m1->private);
5569                                         m2->no_cache    = m1->no_cache;
5570                                         m2->xpmapped    = 0;
5571                                         assert(!m1->busy);
5572                                         assert(!m1->wanted);
5573                                         assert(!m1->fictitious);
5574                                         m2->pmapped     = m1->pmapped; /* should flush cache ? */
5575                                         m2->wpmapped    = m1->wpmapped;
5576                                         assert(!m1->free_when_done);
5577                                         m2->absent      = m1->absent;
5578                                         m2->error       = m1->error;
5579                                         m2->dirty       = m1->dirty;
5580                                         assert(!m1->cleaning);
5581                                         m2->precious    = m1->precious;
5582                                         m2->clustered   = m1->clustered;
5583                                         assert(!m1->overwriting);
5584                                         m2->restart     = m1->restart;
5585                                         m2->unusual     = m1->unusual;
5586                                         m2->cs_validated = m1->cs_validated;
5587                                         m2->cs_tainted  = m1->cs_tainted;
5588                                         m2->cs_nx       = m1->cs_nx;
5589
5590                                         /*
5591                                          * If m1 had really been reusable,
5592                                          * we would have just stolen it, so
5593                                          * let's not propagate it's "reusable"
5594                                          * bit and assert that m2 is not
5595                                          * marked as "reusable".
5596                                          */
5597                                         // m2->reusable = m1->reusable;
5598                                         assert(!m2->reusable);
5599
5600                                         // assert(!m1->lopage);
5601                                         m2->slid        = m1->slid;
5602
5603                                         if (m1->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5604                                                 m2->vm_page_q_state = VM_PAGE_USED_BY_COMPRESSOR;
5605
5606                                         /*
5607                                          * page may need to be flushed if
5608                                          * it is marshalled into a UPL
5609                                          * that is going to be used by a device
5610                                          * that doesn't support coherency
5611                                          */
5612                                         m2->written_by_kernel = TRUE;
5613
5614                                         /*
5615                                          * make sure we clear the ref/mod state
5616                                          * from the pmap layer... else we risk
5617                                          * inheriting state from the last time
5618                                          * this page was used...
5619                                          */
5620                                         pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5621
5622                                         if (refmod & VM_MEM_REFERENCED)
5623                                                 m2->reference = TRUE;
5624                                         if (refmod & VM_MEM_MODIFIED) {
5625                                                 SET_PAGE_DIRTY(m2, TRUE);
5626                                         }
5627                                         offset = m1->offset;
5628
5629                                         /*
5630                                          * completely cleans up the state
5631                                          * of the page so that it is ready
5632                                          * to be put onto the free list, or
5633                                          * for this purpose it looks like it
5634                                          * just came off of the free list
5635                                          */
5636                                         vm_page_free_prepare(m1);
5637
5638                                         /*
5639                                          * now put the substitute page
5640                                          * on the object
5641                                          */
5642                                         vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
5643
5644                                         if (m2->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
5645                                                 m2->pmapped = TRUE;
5646                                                 m2->wpmapped = TRUE;
5647
5648                                                 PMAP_ENTER(kernel_pmap, m2->offset, m2,
5649                                                            VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
5650
5651                                                 assert(kr == KERN_SUCCESS);
5652
5653                                                 compressed_pages++;
5654
5655                                         } else {
5656                                                 if (m2->reference)
5657                                                         vm_page_activate(m2);
5658                                                 else
5659                                                         vm_page_deactivate(m2);
5660                                         }
5661                                         PAGE_WAKEUP_DONE(m2);
5662
5663                                 } else {
5664                                         assert(m1->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
5665
5666                                         /*
5667                                          * completely cleans up the state
5668                                          * of the page so that it is ready
5669                                          * to be put onto the free list, or
5670                                          * for this purpose it looks like it
5671                                          * just came off of the free list
5672                                          */
5673                                         vm_page_free_prepare(m1);
5674                                 }
5675
5676                                 stolen_pages++;
5677
5678                         }
5679 #if CONFIG_BACKGROUND_QUEUE
5680                         vm_page_assign_background_state(m1);
5681 #endif
5682                         VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5683                         m1->snext = m;
5684                         m = m1;
5685                 }
5686                 if (locked_object) {
5687                         vm_object_unlock(locked_object);
5688                         locked_object = VM_OBJECT_NULL;
5689                 }
5690
5691                 if (abort_run == TRUE) {
5692                         /*
5693                          * want the index of the last
5694                          * page in this run that was
5695                          * successfully 'stolen', so back
5696                          * it up 1 for the auto-decrement on use
5697                          * and 1 more to bump back over this page
5698                          */
5699                         page_idx = tmp_start_idx + 2;
5700                         if (page_idx >= vm_pages_count) {
5701                                 if (wrapped) {
5702                                         if (m != VM_PAGE_NULL) {
5703                                                 vm_page_unlock_queues();
5704                                                 vm_page_free_list(m, FALSE);
5705                                                 vm_page_lock_queues();
5706                                                 m = VM_PAGE_NULL;
5707                                         }
5708                                         dumped_run++;
5709                                         goto done_scanning;
5710                                 }
5711                                 page_idx = last_idx = 0;
5712                                 wrapped = TRUE;
5713                         }
5714                         abort_run = FALSE;
5715
5716                         /*
5717                          * We didn't find a contiguous range but we didn't
5718                          * start from the very first page.
5719                          * Start again from the very first page.
5720                          */
5721                         RESET_STATE_OF_RUN();
5722
5723                         if( flags & KMA_LOMEM)
5724                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
5725                         else
5726                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5727
5728                         last_idx = page_idx;
5729
5730                         if (m != VM_PAGE_NULL) {
5731                                 vm_page_unlock_queues();
5732                                 vm_page_free_list(m, FALSE);
5733                                 vm_page_lock_queues();
5734                                 m = VM_PAGE_NULL;
5735                         }
5736                         dumped_run++;
5737
5738                         lck_mtx_lock(&vm_page_queue_free_lock);
5739                         /*
5740                         * reset our free page limit since we
5741                         * dropped the lock protecting the vm_page_free_queue
5742                         */
5743                         free_available = vm_page_free_count - vm_page_free_reserved;
5744                         goto retry;
5745                 }
5746
5747                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
5748
5749                         assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5750                         assert(m1->wire_count == 0);
5751
5752                         if (wire == TRUE) {
5753                                 m1->wire_count++;
5754                                 m1->vm_page_q_state = VM_PAGE_IS_WIRED;
5755                         } else
5756                                 m1->gobbled = TRUE;
5757                 }
5758                 if (wire == FALSE)
5759                         vm_page_gobble_count += npages;
5760
5761                 /*
5762                  * gobbled pages are also counted as wired pages
5763                  */
5764                 vm_page_wire_count += npages;
5765
5766                 assert(vm_page_verify_contiguous(m, npages));
5767         }
5768 done_scanning:
5769         PAGE_REPLACEMENT_ALLOWED(FALSE);
5770
5771         vm_page_unlock_queues();
5772
5773 #if DEBUG
5774         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5775
5776         tv_end_sec -= tv_start_sec;
5777         if (tv_end_usec < tv_start_usec) {
5778                 tv_end_sec--;
5779                 tv_end_usec += 1000000;
5780         }
5781         tv_end_usec -= tv_start_usec;
5782         if (tv_end_usec >= 1000000) {
5783                 tv_end_sec++;
5784                 tv_end_sec -= 1000000;
5785         }
5786         if (vm_page_find_contig_debug) {
5787                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages\n",
5788                        __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5789                        (long)tv_end_sec, tv_end_usec, orig_last_idx,
5790                        scanned, yielded, dumped_run, stolen_pages, compressed_pages);
5791         }
5792
5793 #endif
5794 #if MACH_ASSERT
5795         vm_page_verify_free_lists();
5796 #endif
5797         if (m == NULL && zone_gc_called == FALSE) {
5798                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5799                        __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5800                        scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5801
5802                 if (consider_buffer_cache_collect != NULL) {
5803                         (void)(*consider_buffer_cache_collect)(1);
5804                 }
5805
5806                 consider_zone_gc(FALSE);
5807
5808                 zone_gc_called = TRUE;
5809
5810                 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5811                 goto full_scan_again;
5812         }
5813
5814         return m;
5815 }
5816
5817 /*
5818  *      Allocate a list of contiguous, wired pages.
5819  */
5820 kern_return_t
5821 cpm_allocate(
5822         vm_size_t       size,
5823         vm_page_t       *list,
5824         ppnum_t         max_pnum,
5825         ppnum_t         pnum_mask,
5826         boolean_t       wire,
5827         int             flags)
5828 {
5829         vm_page_t               pages;
5830         unsigned int            npages;
5831
5832         if (size % PAGE_SIZE != 0)
5833                 return KERN_INVALID_ARGUMENT;
5834
5835         npages = (unsigned int) (size / PAGE_SIZE);
5836         if (npages != size / PAGE_SIZE) {
5837                 /* 32-bit overflow */
5838                 return KERN_INVALID_ARGUMENT;
5839         }
5840
5841         /*
5842          *      Obtain a pointer to a subset of the free
5843          *      list large enough to satisfy the request;
5844          *      the region will be physically contiguous.
5845          */
5846         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
5847
5848         if (pages == VM_PAGE_NULL)
5849                 return KERN_NO_SPACE;
5850         /*
5851          * determine need for wakeups
5852          */
5853         if ((vm_page_free_count < vm_page_free_min) ||
5854              ((vm_page_free_count < vm_page_free_target) &&
5855               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
5856                  thread_wakeup((event_t) &vm_page_free_wanted);
5857
5858         VM_CHECK_MEMORYSTATUS;
5859
5860         /*
5861          *      The CPM pages should now be available and
5862          *      ordered by ascending physical address.
5863          */
5864         assert(vm_page_verify_contiguous(pages, npages));
5865
5866         *list = pages;
5867         return KERN_SUCCESS;
5868 }
5869
5870
5871 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
5872
5873 /*
5874  * when working on a 'run' of pages, it is necessary to hold
5875  * the vm_page_queue_lock (a hot global lock) for certain operations
5876  * on the page... however, the majority of the work can be done
5877  * while merely holding the object lock... in fact there are certain
5878  * collections of pages that don't require any work brokered by the
5879  * vm_page_queue_lock... to mitigate the time spent behind the global
5880  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
5881  * while doing all of the work that doesn't require the vm_page_queue_lock...
5882  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
5883  * necessary work for each page... we will grab the busy bit on the page
5884  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
5885  * if it can't immediately take the vm_page_queue_lock in order to compete
5886  * for the locks in the same order that vm_pageout_scan takes them.
5887  * the operation names are modeled after the names of the routines that
5888  * need to be called in order to make the changes very obvious in the
5889  * original loop
5890  */
5891
5892 void
5893 vm_page_do_delayed_work(
5894         vm_object_t     object,
5895         vm_tag_t        tag,
5896         struct vm_page_delayed_work *dwp,
5897         int             dw_count)
5898 {
5899         int             j;
5900         vm_page_t       m;
5901         vm_page_t       local_free_q = VM_PAGE_NULL;
5902
5903         /*
5904          * pageout_scan takes the vm_page_lock_queues first
5905          * then tries for the object lock... to avoid what
5906          * is effectively a lock inversion, we'll go to the
5907          * trouble of taking them in that same order... otherwise
5908          * if this object contains the majority of the pages resident
5909          * in the UBC (or a small set of large objects actively being
5910          * worked on contain the majority of the pages), we could
5911          * cause the pageout_scan thread to 'starve' in its attempt
5912          * to find pages to move to the free queue, since it has to
5913          * successfully acquire the object lock of any candidate page
5914          * before it can steal/clean it.
5915          */
5916         if (!vm_page_trylockspin_queues()) {
5917                 vm_object_unlock(object);
5918
5919                 vm_page_lockspin_queues();
5920
5921                 for (j = 0; ; j++) {
5922                         if (!vm_object_lock_avoid(object) &&
5923                             _vm_object_lock_try(object))
5924                                 break;
5925                         vm_page_unlock_queues();
5926                         mutex_pause(j);
5927                         vm_page_lockspin_queues();
5928                 }
5929         }
5930         for (j = 0; j < dw_count; j++, dwp++) {
5931
5932                 m = dwp->dw_m;
5933
5934                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
5935                         vm_pageout_throttle_up(m);
5936 #if CONFIG_PHANTOM_CACHE
5937                 if (dwp->dw_mask & DW_vm_phantom_cache_update)
5938                         vm_phantom_cache_update(m);
5939 #endif
5940                 if (dwp->dw_mask & DW_vm_page_wire)
5941                         vm_page_wire(m, tag, FALSE);
5942                 else if (dwp->dw_mask & DW_vm_page_unwire) {
5943                         boolean_t       queueit;
5944
5945                         queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
5946
5947                         vm_page_unwire(m, queueit);
5948                 }
5949                 if (dwp->dw_mask & DW_vm_page_free) {
5950                         vm_page_free_prepare_queues(m);
5951
5952                         assert(m->pageq.next == 0 && m->pageq.prev == 0);
5953                         /*
5954                          * Add this page to our list of reclaimed pages,
5955                          * to be freed later.
5956                          */
5957                         m->snext = local_free_q;
5958                         local_free_q = m;
5959                 } else {
5960                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5961                                 vm_page_deactivate_internal(m, FALSE);
5962                         else if (dwp->dw_mask & DW_vm_page_activate) {
5963                                 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q) {
5964                                         vm_page_activate(m);
5965                                 }
5966                         }
5967                         else if (dwp->dw_mask & DW_vm_page_speculate)
5968                                 vm_page_speculate(m, TRUE);
5969                         else if (dwp->dw_mask & DW_enqueue_cleaned) {
5970                                 /*
5971                                  * if we didn't hold the object lock and did this,
5972                                  * we might disconnect the page, then someone might
5973                                  * soft fault it back in, then we would put it on the
5974                                  * cleaned queue, and so we would have a referenced (maybe even dirty)
5975                                  * page on that queue, which we don't want
5976                                  */
5977                                 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5978
5979                                 if ((refmod_state & VM_MEM_REFERENCED)) {
5980                                         /*
5981                                          * this page has been touched since it got cleaned; let's activate it
5982                                          * if it hasn't already been
5983                                          */
5984                                         vm_pageout_enqueued_cleaned++;
5985                                         vm_pageout_cleaned_reactivated++;
5986                                         vm_pageout_cleaned_commit_reactivated++;
5987
5988                                         if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q)
5989                                                 vm_page_activate(m);
5990                                 } else {
5991                                         m->reference = FALSE;
5992                                         vm_page_enqueue_cleaned(m);
5993                                 }
5994                         }
5995                         else if (dwp->dw_mask & DW_vm_page_lru)
5996                                 vm_page_lru(m);
5997                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
5998                                 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q)
5999                                         vm_page_queues_remove(m, TRUE);
6000                         }
6001                         if (dwp->dw_mask & DW_set_reference)
6002                                 m->reference = TRUE;
6003                         else if (dwp->dw_mask & DW_clear_reference)
6004                                 m->reference = FALSE;
6005
6006                         if (dwp->dw_mask & DW_move_page) {
6007                                 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6008                                         vm_page_queues_remove(m, FALSE);
6009
6010                                         assert(VM_PAGE_OBJECT(m) != kernel_object);
6011
6012                                         vm_page_enqueue_inactive(m, FALSE);
6013                                 }
6014                         }
6015                         if (dwp->dw_mask & DW_clear_busy)
6016                                 m->busy = FALSE;
6017
6018                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
6019                                 PAGE_WAKEUP(m);
6020                 }
6021         }
6022         vm_page_unlock_queues();
6023
6024         if (local_free_q)
6025                 vm_page_free_list(local_free_q, TRUE);
6026
6027         VM_CHECK_MEMORYSTATUS;
6028
6029 }
6030
6031 kern_return_t
6032 vm_page_alloc_list(
6033         int     page_count,
6034         int     flags,
6035         vm_page_t *list)
6036 {
6037         vm_page_t       lo_page_list = VM_PAGE_NULL;
6038         vm_page_t       mem;
6039         int             i;
6040
6041         if ( !(flags & KMA_LOMEM))
6042                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6043
6044         for (i = 0; i < page_count; i++) {
6045
6046                 mem = vm_page_grablo();
6047
6048                 if (mem == VM_PAGE_NULL) {
6049                         if (lo_page_list)
6050                                 vm_page_free_list(lo_page_list, FALSE);
6051
6052                         *list = VM_PAGE_NULL;
6053
6054                         return (KERN_RESOURCE_SHORTAGE);
6055                 }
6056                 mem->snext = lo_page_list;
6057                 lo_page_list = mem;
6058         }
6059         *list = lo_page_list;
6060
6061         return (KERN_SUCCESS);
6062 }
6063
6064 void
6065 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6066 {
6067         page->offset = offset;
6068 }
6069
6070 vm_page_t
6071 vm_page_get_next(vm_page_t page)
6072 {
6073         return (page->snext);
6074 }
6075
6076 vm_object_offset_t
6077 vm_page_get_offset(vm_page_t page)
6078 {
6079         return (page->offset);
6080 }
6081
6082 ppnum_t
6083 vm_page_get_phys_page(vm_page_t page)
6084 {
6085         return (VM_PAGE_GET_PHYS_PAGE(page));
6086 }
6087
6088
6089 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6090
6091 #if HIBERNATION
6092
6093 static vm_page_t hibernate_gobble_queue;
6094
6095 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
6096 static int  hibernate_flush_dirty_pages(int);
6097 static int  hibernate_flush_queue(vm_page_queue_head_t *, int);
6098
6099 void hibernate_flush_wait(void);
6100 void hibernate_mark_in_progress(void);
6101 void hibernate_clear_in_progress(void);
6102
6103 void            hibernate_free_range(int, int);
6104 void            hibernate_hash_insert_page(vm_page_t);
6105 uint32_t        hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
6106 void            hibernate_rebuild_vm_structs(void);
6107 uint32_t        hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6108 ppnum_t         hibernate_lookup_paddr(unsigned int);
6109
6110 struct hibernate_statistics {
6111         int hibernate_considered;
6112         int hibernate_reentered_on_q;
6113         int hibernate_found_dirty;
6114         int hibernate_skipped_cleaning;
6115         int hibernate_skipped_transient;
6116         int hibernate_skipped_precious;
6117         int hibernate_skipped_external;
6118         int hibernate_queue_nolock;
6119         int hibernate_queue_paused;
6120         int hibernate_throttled;
6121         int hibernate_throttle_timeout;
6122         int hibernate_drained;
6123         int hibernate_drain_timeout;
6124         int cd_lock_failed;
6125         int cd_found_precious;
6126         int cd_found_wired;
6127         int cd_found_busy;
6128         int cd_found_unusual;
6129         int cd_found_cleaning;
6130         int cd_found_laundry;
6131         int cd_found_dirty;
6132         int cd_found_xpmapped;
6133         int cd_skipped_xpmapped;
6134         int cd_local_free;
6135         int cd_total_free;
6136         int cd_vm_page_wire_count;
6137         int cd_vm_struct_pages_unneeded;
6138         int cd_pages;
6139         int cd_discarded;
6140         int cd_count_wire;
6141 } hibernate_stats;
6142
6143
6144 /*
6145  * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6146  * so that we don't overrun the estimated image size, which would
6147  * result in a hibernation failure.
6148  */
6149 #define HIBERNATE_XPMAPPED_LIMIT        40000
6150
6151
6152 static int
6153 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6154 {
6155         wait_result_t   wait_result;
6156
6157         vm_page_lock_queues();
6158
6159         while ( !vm_page_queue_empty(&q->pgo_pending) ) {
6160
6161                 q->pgo_draining = TRUE;
6162
6163                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
6164
6165                 vm_page_unlock_queues();
6166
6167                 wait_result = thread_block(THREAD_CONTINUE_NULL);
6168
6169                 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
6170                         hibernate_stats.hibernate_drain_timeout++;
6171
6172                         if (q == &vm_pageout_queue_external)
6173                                 return (0);
6174
6175                         return (1);
6176                 }
6177                 vm_page_lock_queues();
6178
6179                 hibernate_stats.hibernate_drained++;
6180         }
6181         vm_page_unlock_queues();
6182
6183         return (0);
6184 }
6185
6186
6187 boolean_t hibernate_skip_external = FALSE;
6188
6189 static int
6190 hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
6191 {
6192         vm_page_t       m;
6193         vm_object_t     l_object = NULL;
6194         vm_object_t     m_object = NULL;
6195         int             refmod_state = 0;
6196         int             try_failed_count = 0;
6197         int             retval = 0;
6198         int             current_run = 0;
6199         struct  vm_pageout_queue *iq;
6200         struct  vm_pageout_queue *eq;
6201         struct  vm_pageout_queue *tq;
6202
6203         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
6204                         VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
6205
6206         iq = &vm_pageout_queue_internal;
6207         eq = &vm_pageout_queue_external;
6208
6209         vm_page_lock_queues();
6210
6211         while (qcount && !vm_page_queue_empty(q)) {
6212
6213                 if (current_run++ == 1000) {
6214                         if (hibernate_should_abort()) {
6215                                 retval = 1;
6216                                 break;
6217                         }
6218                         current_run = 0;
6219                 }
6220
6221                 m = (vm_page_t) vm_page_queue_first(q);
6222                 m_object = VM_PAGE_OBJECT(m);
6223
6224                 /*
6225                  * check to see if we currently are working
6226                  * with the same object... if so, we've
6227                  * already got the lock
6228                  */
6229                 if (m_object != l_object) {
6230                         /*
6231                          * the object associated with candidate page is
6232                          * different from the one we were just working
6233                          * with... dump the lock if we still own it
6234                          */
6235                         if (l_object != NULL) {
6236                                 vm_object_unlock(l_object);
6237                                 l_object = NULL;
6238                         }
6239                         /*
6240                          * Try to lock object; since we've alread got the
6241                          * page queues lock, we can only 'try' for this one.
6242                          * if the 'try' fails, we need to do a mutex_pause
6243                          * to allow the owner of the object lock a chance to
6244                          * run...
6245                          */
6246                         if ( !vm_object_lock_try_scan(m_object)) {
6247
6248                                 if (try_failed_count > 20) {
6249                                         hibernate_stats.hibernate_queue_nolock++;
6250
6251                                         goto reenter_pg_on_q;
6252                                 }
6253
6254                                 vm_page_unlock_queues();
6255                                 mutex_pause(try_failed_count++);
6256                                 vm_page_lock_queues();
6257
6258                                 hibernate_stats.hibernate_queue_paused++;
6259                                 continue;
6260                         } else {
6261                                 l_object = m_object;
6262                         }
6263                 }
6264                 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
6265                         /*
6266                          * page is not to be cleaned
6267                          * put it back on the head of its queue
6268                          */
6269                         if (m->cleaning)
6270                                 hibernate_stats.hibernate_skipped_cleaning++;
6271                         else
6272                                 hibernate_stats.hibernate_skipped_transient++;
6273
6274                         goto reenter_pg_on_q;
6275                 }
6276                 if (m_object->copy == VM_OBJECT_NULL) {
6277                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6278                                 /*
6279                                  * let the normal hibernate image path
6280                                  * deal with these
6281                                  */
6282                                 goto reenter_pg_on_q;
6283                         }
6284                 }
6285                 if ( !m->dirty && m->pmapped) {
6286                         refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6287
6288                         if ((refmod_state & VM_MEM_MODIFIED)) {
6289                                 SET_PAGE_DIRTY(m, FALSE);
6290                         }
6291                 } else
6292                         refmod_state = 0;
6293
6294                 if ( !m->dirty) {
6295                         /*
6296                          * page is not to be cleaned
6297                          * put it back on the head of its queue
6298                          */
6299                         if (m->precious)
6300                                 hibernate_stats.hibernate_skipped_precious++;
6301
6302                         goto reenter_pg_on_q;
6303                 }
6304
6305                 if (hibernate_skip_external == TRUE && !m_object->internal) {
6306
6307                         hibernate_stats.hibernate_skipped_external++;
6308
6309                         goto reenter_pg_on_q;
6310                 }
6311                 tq = NULL;
6312
6313                 if (m_object->internal) {
6314                         if (VM_PAGE_Q_THROTTLED(iq))
6315                                 tq = iq;
6316                 } else if (VM_PAGE_Q_THROTTLED(eq))
6317                         tq = eq;
6318
6319                 if (tq != NULL) {
6320                         wait_result_t   wait_result;
6321                         int             wait_count = 5;
6322
6323                         if (l_object != NULL) {
6324                                 vm_object_unlock(l_object);
6325                                 l_object = NULL;
6326                         }
6327
6328                         while (retval == 0) {
6329
6330                                 tq->pgo_throttled = TRUE;
6331
6332                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6333
6334                                 vm_page_unlock_queues();
6335
6336                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
6337
6338                                 vm_page_lock_queues();
6339
6340                                 if (wait_result != THREAD_TIMED_OUT)
6341                                         break;
6342                                 if (!VM_PAGE_Q_THROTTLED(tq))
6343                                         break;
6344
6345                                 if (hibernate_should_abort())
6346                                         retval = 1;
6347
6348                                 if (--wait_count == 0) {
6349
6350                                         hibernate_stats.hibernate_throttle_timeout++;
6351
6352                                         if (tq == eq) {
6353                                                 hibernate_skip_external = TRUE;
6354                                                 break;
6355                                         }
6356                                         retval = 1;
6357                                 }
6358                         }
6359                         if (retval)
6360                                 break;
6361
6362                         hibernate_stats.hibernate_throttled++;
6363
6364                         continue;
6365                 }
6366                 /*
6367                  * we've already factored out pages in the laundry which
6368                  * means this page can't be on the pageout queue so it's
6369                  * safe to do the vm_page_queues_remove
6370                  */
6371                 vm_page_queues_remove(m, TRUE);
6372
6373                 if (m_object->internal == TRUE)
6374                         pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
6375
6376                 vm_pageout_cluster(m);
6377
6378                 hibernate_stats.hibernate_found_dirty++;
6379
6380                 goto next_pg;
6381
6382 reenter_pg_on_q:
6383                 vm_page_queue_remove(q, m, vm_page_t, pageq);
6384                 vm_page_queue_enter(q, m, vm_page_t, pageq);
6385
6386                 hibernate_stats.hibernate_reentered_on_q++;
6387 next_pg:
6388                 hibernate_stats.hibernate_considered++;
6389
6390                 qcount--;
6391                 try_failed_count = 0;
6392         }
6393         if (l_object != NULL) {
6394                 vm_object_unlock(l_object);
6395                 l_object = NULL;
6396         }
6397
6398         vm_page_unlock_queues();
6399
6400         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6401
6402         return (retval);
6403 }
6404
6405
6406 static int
6407 hibernate_flush_dirty_pages(int pass)
6408 {
6409         struct vm_speculative_age_q     *aq;
6410         uint32_t        i;
6411
6412         if (vm_page_local_q) {
6413                 for (i = 0; i < vm_page_local_q_count; i++)
6414                         vm_page_reactivate_local(i, TRUE, FALSE);
6415         }
6416
6417         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6418                 int             qcount;
6419                 vm_page_t       m;
6420
6421                 aq = &vm_page_queue_speculative[i];
6422
6423                 if (vm_page_queue_empty(&aq->age_q))
6424                         continue;
6425                 qcount = 0;
6426
6427                 vm_page_lockspin_queues();
6428
6429                 vm_page_queue_iterate(&aq->age_q,
6430                               m,
6431                               vm_page_t,
6432                               pageq)
6433                 {
6434                         qcount++;
6435                 }
6436                 vm_page_unlock_queues();
6437
6438                 if (qcount) {
6439                         if (hibernate_flush_queue(&aq->age_q, qcount))
6440                                 return (1);
6441                 }
6442         }
6443         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
6444                 return (1);
6445         /* XXX FBDP TODO: flush secluded queue */
6446         if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6447                 return (1);
6448         if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
6449                 return (1);
6450         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6451                 return (1);
6452
6453         if (pass == 1)
6454                 vm_compressor_record_warmup_start();
6455
6456         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
6457                 if (pass == 1)
6458                         vm_compressor_record_warmup_end();
6459                 return (1);
6460         }
6461         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
6462                 if (pass == 1)
6463                         vm_compressor_record_warmup_end();
6464                 return (1);
6465         }
6466         if (pass == 1)
6467                 vm_compressor_record_warmup_end();
6468
6469         if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6470                 return (1);
6471
6472         return (0);
6473 }
6474
6475
6476 void
6477 hibernate_reset_stats()
6478 {
6479         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6480 }
6481
6482
6483 int
6484 hibernate_flush_memory()
6485 {
6486         int     retval;
6487
6488         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6489
6490         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6491
6492         hibernate_cleaning_in_progress = TRUE;
6493         hibernate_skip_external = FALSE;
6494
6495         if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6496
6497                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6498
6499                 vm_compressor_flush();
6500
6501                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6502
6503                 if (consider_buffer_cache_collect != NULL) {
6504                         unsigned int orig_wire_count;
6505
6506                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6507                         orig_wire_count = vm_page_wire_count;
6508
6509                         (void)(*consider_buffer_cache_collect)(1);
6510                         consider_zone_gc(FALSE);
6511
6512                         HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6513
6514                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
6515                 }
6516         }
6517         hibernate_cleaning_in_progress = FALSE;
6518
6519         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6520
6521         if (retval)
6522                 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6523
6524
6525     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6526                 hibernate_stats.hibernate_considered,
6527                 hibernate_stats.hibernate_reentered_on_q,
6528                 hibernate_stats.hibernate_found_dirty);
6529     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
6530                 hibernate_stats.hibernate_skipped_cleaning,
6531                 hibernate_stats.hibernate_skipped_transient,
6532                 hibernate_stats.hibernate_skipped_precious,
6533                 hibernate_stats.hibernate_skipped_external,
6534                 hibernate_stats.hibernate_queue_nolock);
6535     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6536                 hibernate_stats.hibernate_queue_paused,
6537                 hibernate_stats.hibernate_throttled,
6538                 hibernate_stats.hibernate_throttle_timeout,
6539                 hibernate_stats.hibernate_drained,
6540                 hibernate_stats.hibernate_drain_timeout);
6541
6542         return (retval);
6543 }
6544
6545
6546 static void
6547 hibernate_page_list_zero(hibernate_page_list_t *list)
6548 {
6549     uint32_t             bank;
6550     hibernate_bitmap_t * bitmap;
6551
6552     bitmap = &list->bank_bitmap[0];
6553     for (bank = 0; bank < list->bank_count; bank++)
6554     {
6555         uint32_t last_bit;
6556
6557         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6558         // set out-of-bound bits at end of bitmap.
6559         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6560         if (last_bit)
6561             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6562
6563         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6564     }
6565 }
6566
6567 void
6568 hibernate_free_gobble_pages(void)
6569 {
6570     vm_page_t m, next;
6571     uint32_t  count = 0;
6572
6573     m = (vm_page_t) hibernate_gobble_queue;
6574     while(m)
6575     {
6576         next = m->snext;
6577         vm_page_free(m);
6578         count++;
6579         m = next;
6580     }
6581     hibernate_gobble_queue = VM_PAGE_NULL;
6582
6583     if (count)
6584         HIBLOG("Freed %d pages\n", count);
6585 }
6586
6587 static boolean_t
6588 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
6589 {
6590     vm_object_t object = NULL;
6591     int                  refmod_state;
6592     boolean_t            discard = FALSE;
6593
6594     do
6595     {
6596         if (m->private)
6597             panic("hibernate_consider_discard: private");
6598
6599         object = VM_PAGE_OBJECT(m);
6600
6601         if (!vm_object_lock_try(object)) {
6602             object = NULL;
6603             if (!preflight) hibernate_stats.cd_lock_failed++;
6604             break;
6605         }
6606         if (VM_PAGE_WIRED(m)) {
6607             if (!preflight) hibernate_stats.cd_found_wired++;
6608             break;
6609         }
6610         if (m->precious) {
6611             if (!preflight) hibernate_stats.cd_found_precious++;
6612             break;
6613         }
6614         if (m->busy || !object->alive) {
6615            /*
6616             *   Somebody is playing with this page.
6617             */
6618             if (!preflight) hibernate_stats.cd_found_busy++;
6619             break;
6620         }
6621         if (m->absent || m->unusual || m->error) {
6622            /*
6623             * If it's unusual in anyway, ignore it
6624             */
6625             if (!preflight) hibernate_stats.cd_found_unusual++;
6626             break;
6627         }
6628         if (m->cleaning) {
6629             if (!preflight) hibernate_stats.cd_found_cleaning++;
6630             break;
6631         }
6632         if (m->laundry) {
6633             if (!preflight) hibernate_stats.cd_found_laundry++;
6634             break;
6635         }
6636         if (!m->dirty)
6637         {
6638                 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6639
6640             if (refmod_state & VM_MEM_REFERENCED)
6641                 m->reference = TRUE;
6642             if (refmod_state & VM_MEM_MODIFIED) {
6643                 SET_PAGE_DIRTY(m, FALSE);
6644             }
6645         }
6646
6647         /*
6648          * If it's clean or purgeable we can discard the page on wakeup.
6649          */
6650         discard = (!m->dirty)
6651                     || (VM_PURGABLE_VOLATILE == object->purgable)
6652                     || (VM_PURGABLE_EMPTY    == object->purgable);
6653
6654
6655         if (discard == FALSE) {
6656                 if (!preflight)
6657                         hibernate_stats.cd_found_dirty++;
6658         } else if (m->xpmapped && m->reference && !object->internal) {
6659                 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6660                         if (!preflight)
6661                                 hibernate_stats.cd_found_xpmapped++;
6662                         discard = FALSE;
6663                 } else {
6664                         if (!preflight)
6665                                 hibernate_stats.cd_skipped_xpmapped++;
6666                 }
6667         }
6668     }
6669     while (FALSE);
6670
6671     if (object)
6672         vm_object_unlock(object);
6673
6674     return (discard);
6675 }
6676
6677
6678 static void
6679 hibernate_discard_page(vm_page_t m)
6680 {
6681     vm_object_t m_object;
6682
6683     if (m->absent || m->unusual || m->error)
6684        /*
6685         * If it's unusual in anyway, ignore
6686         */
6687         return;
6688
6689     m_object = VM_PAGE_OBJECT(m);
6690
6691 #if MACH_ASSERT || DEBUG
6692     if (!vm_object_lock_try(m_object))
6693         panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6694 #else
6695     /* No need to lock page queue for token delete, hibernate_vm_unlock()
6696        makes sure these locks are uncontended before sleep */
6697 #endif /* MACH_ASSERT || DEBUG */
6698
6699     if (m->pmapped == TRUE)
6700     {
6701         __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6702     }
6703
6704     if (m->laundry)
6705         panic("hibernate_discard_page(%p) laundry", m);
6706     if (m->private)
6707         panic("hibernate_discard_page(%p) private", m);
6708     if (m->fictitious)
6709         panic("hibernate_discard_page(%p) fictitious", m);
6710
6711     if (VM_PURGABLE_VOLATILE == m_object->purgable)
6712     {
6713         /* object should be on a queue */
6714         assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6715         purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
6716         assert(old_queue);
6717         if (m_object->purgeable_when_ripe) {
6718                 vm_purgeable_token_delete_first(old_queue);
6719         }
6720         vm_object_lock_assert_exclusive(m_object);
6721         m_object->purgable = VM_PURGABLE_EMPTY;
6722
6723         /*
6724          * Purgeable ledgers:  pages of VOLATILE and EMPTY objects are
6725          * accounted in the "volatile" ledger, so no change here.
6726          * We have to update vm_page_purgeable_count, though, since we're
6727          * effectively purging this object.
6728          */
6729         unsigned int delta;
6730         assert(m_object->resident_page_count >= m_object->wired_page_count);
6731         delta = (m_object->resident_page_count - m_object->wired_page_count);
6732         assert(vm_page_purgeable_count >= delta);
6733         assert(delta > 0);
6734         OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
6735     }
6736
6737     vm_page_free(m);
6738
6739 #if MACH_ASSERT || DEBUG
6740     vm_object_unlock(m_object);
6741 #endif  /* MACH_ASSERT || DEBUG */
6742 }
6743
6744 /*
6745  Grab locks for hibernate_page_list_setall()
6746 */
6747 void
6748 hibernate_vm_lock_queues(void)
6749 {
6750     vm_object_lock(compressor_object);
6751     vm_page_lock_queues();
6752     lck_mtx_lock(&vm_page_queue_free_lock);
6753     lck_mtx_lock(&vm_purgeable_queue_lock);
6754
6755     if (vm_page_local_q) {
6756         uint32_t  i;
6757         for (i = 0; i < vm_page_local_q_count; i++) {
6758             struct vpl  *lq;
6759             lq = &vm_page_local_q[i].vpl_un.vpl;
6760             VPL_LOCK(&lq->vpl_lock);
6761         }
6762     }
6763 }
6764
6765 void
6766 hibernate_vm_unlock_queues(void)
6767 {
6768     if (vm_page_local_q) {
6769         uint32_t  i;
6770         for (i = 0; i < vm_page_local_q_count; i++) {
6771             struct vpl  *lq;
6772             lq = &vm_page_local_q[i].vpl_un.vpl;
6773             VPL_UNLOCK(&lq->vpl_lock);
6774         }
6775     }
6776     lck_mtx_unlock(&vm_purgeable_queue_lock);
6777     lck_mtx_unlock(&vm_page_queue_free_lock);
6778     vm_page_unlock_queues();
6779     vm_object_unlock(compressor_object);
6780 }
6781
6782 /*
6783  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6784  pages known to VM to not need saving are subtracted.
6785  Wired pages to be saved are present in page_list_wired, pageable in page_list.
6786 */
6787
6788 void
6789 hibernate_page_list_setall(hibernate_page_list_t * page_list,
6790                            hibernate_page_list_t * page_list_wired,
6791                            hibernate_page_list_t * page_list_pal,
6792                            boolean_t preflight,
6793                            boolean_t will_discard,
6794                            uint32_t * pagesOut)
6795 {
6796     uint64_t start, end, nsec;
6797     vm_page_t m;
6798     vm_page_t next;
6799     uint32_t pages = page_list->page_count;
6800     uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
6801     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
6802     uint32_t count_wire = pages;
6803     uint32_t count_discard_active    = 0;
6804     uint32_t count_discard_inactive  = 0;
6805     uint32_t count_discard_cleaned   = 0;
6806     uint32_t count_discard_purgeable = 0;
6807     uint32_t count_discard_speculative = 0;
6808     uint32_t count_discard_vm_struct_pages = 0;
6809     uint32_t i;
6810     uint32_t             bank;
6811     hibernate_bitmap_t * bitmap;
6812     hibernate_bitmap_t * bitmap_wired;
6813     boolean_t                    discard_all;
6814     boolean_t            discard;
6815
6816     HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
6817
6818     if (preflight) {
6819         page_list       = NULL;
6820         page_list_wired = NULL;
6821         page_list_pal   = NULL;
6822                 discard_all     = FALSE;
6823     } else {
6824                 discard_all     = will_discard;
6825     }
6826
6827 #if MACH_ASSERT || DEBUG
6828     if (!preflight)
6829     {
6830         assert(hibernate_vm_locks_are_safe());
6831         vm_page_lock_queues();
6832         if (vm_page_local_q) {
6833             for (i = 0; i < vm_page_local_q_count; i++) {
6834                 struct vpl      *lq;
6835                 lq = &vm_page_local_q[i].vpl_un.vpl;
6836                 VPL_LOCK(&lq->vpl_lock);
6837             }
6838         }
6839     }
6840 #endif  /* MACH_ASSERT || DEBUG */
6841
6842
6843     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
6844
6845     clock_get_uptime(&start);
6846
6847     if (!preflight) {
6848         hibernate_page_list_zero(page_list);
6849         hibernate_page_list_zero(page_list_wired);
6850         hibernate_page_list_zero(page_list_pal);
6851
6852         hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
6853         hibernate_stats.cd_pages = pages;
6854     }
6855
6856     if (vm_page_local_q) {
6857             for (i = 0; i < vm_page_local_q_count; i++)
6858                     vm_page_reactivate_local(i, TRUE, !preflight);
6859     }
6860
6861     if (preflight) {
6862         vm_object_lock(compressor_object);
6863         vm_page_lock_queues();
6864         lck_mtx_lock(&vm_page_queue_free_lock);
6865     }
6866
6867     m = (vm_page_t) hibernate_gobble_queue;
6868     while (m)
6869     {
6870         pages--;
6871         count_wire--;
6872         if (!preflight) {
6873             hibernate_page_bitset(page_list,       TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6874             hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6875         }
6876         m = m->snext;
6877     }
6878
6879     if (!preflight) for( i = 0; i < real_ncpus; i++ )
6880     {
6881         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
6882         {
6883         for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->snext)
6884             {
6885                 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
6886
6887                 pages--;
6888                 count_wire--;
6889                 hibernate_page_bitset(page_list,       TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6890                 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6891
6892                 hibernate_stats.cd_local_free++;
6893                 hibernate_stats.cd_total_free++;
6894             }
6895         }
6896     }
6897
6898     for( i = 0; i < vm_colors; i++ )
6899     {
6900         vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
6901                               m,
6902                               vm_page_t,
6903                               pageq)
6904         {
6905             assert(m->vm_page_q_state == VM_PAGE_ON_FREE_Q);
6906
6907             pages--;
6908             count_wire--;
6909             if (!preflight) {
6910                 hibernate_page_bitset(page_list,       TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6911                 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6912
6913                 hibernate_stats.cd_total_free++;
6914             }
6915         }
6916     }
6917
6918     vm_page_queue_iterate(&vm_lopage_queue_free,
6919                           m,
6920                           vm_page_t,
6921                           pageq)
6922     {
6923         assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
6924
6925         pages--;
6926         count_wire--;
6927         if (!preflight) {
6928             hibernate_page_bitset(page_list,       TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6929             hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6930
6931             hibernate_stats.cd_total_free++;
6932         }
6933     }
6934
6935     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
6936     while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
6937     {
6938         assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
6939
6940         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6941         discard = FALSE;
6942         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6943          && hibernate_consider_discard(m, preflight))
6944         {
6945             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6946             count_discard_inactive++;
6947             discard = discard_all;
6948         }
6949         else
6950             count_throttled++;
6951         count_wire--;
6952         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6953
6954         if (discard) hibernate_discard_page(m);
6955         m = next;
6956     }
6957
6958     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
6959     while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
6960     {
6961         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
6962
6963         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6964         discard = FALSE;
6965         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6966          && hibernate_consider_discard(m, preflight))
6967         {
6968             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6969             if (m->dirty)
6970                 count_discard_purgeable++;
6971             else
6972                 count_discard_inactive++;
6973             discard = discard_all;
6974         }
6975         else
6976             count_anonymous++;
6977         count_wire--;
6978         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6979         if (discard)    hibernate_discard_page(m);
6980         m = next;
6981     }
6982
6983     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
6984     while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
6985     {
6986         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
6987
6988         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6989         discard = FALSE;
6990         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6991          && hibernate_consider_discard(m, preflight))
6992         {
6993             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6994             if (m->dirty)
6995                 count_discard_purgeable++;
6996             else
6997                 count_discard_cleaned++;
6998             discard = discard_all;
6999         }
7000         else
7001             count_cleaned++;
7002         count_wire--;
7003         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7004         if (discard)    hibernate_discard_page(m);
7005         m = next;
7006     }
7007
7008     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7009     while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7010     {
7011         assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7012
7013         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7014         discard = FALSE;
7015         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
7016          && hibernate_consider_discard(m, preflight))
7017         {
7018             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7019             if (m->dirty)
7020                 count_discard_purgeable++;
7021             else
7022                 count_discard_active++;
7023             discard = discard_all;
7024         }
7025         else
7026             count_active++;
7027         count_wire--;
7028         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7029         if (discard)    hibernate_discard_page(m);
7030         m = next;
7031     }
7032
7033     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7034     while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7035     {
7036         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7037
7038         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7039         discard = FALSE;
7040         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7041          && hibernate_consider_discard(m, preflight))
7042         {
7043             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7044             if (m->dirty)
7045                 count_discard_purgeable++;
7046             else
7047                 count_discard_inactive++;
7048             discard = discard_all;
7049         }
7050         else
7051             count_inactive++;
7052         count_wire--;
7053         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7054         if (discard)    hibernate_discard_page(m);
7055         m = next;
7056     }
7057     /* XXX FBDP TODO: secluded queue */
7058
7059     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7060     {
7061         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7062         while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7063         {
7064             assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7065
7066             next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7067             discard = FALSE;
7068             if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7069              && hibernate_consider_discard(m, preflight))
7070             {
7071                 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7072                 count_discard_speculative++;
7073                 discard = discard_all;
7074             }
7075             else
7076                 count_speculative++;
7077             count_wire--;
7078             if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7079             if (discard)    hibernate_discard_page(m);
7080             m = next;
7081         }
7082     }
7083
7084     vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
7085     {
7086         assert(m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR);
7087
7088         count_compressor++;
7089         count_wire--;
7090         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7091     }
7092
7093     if (preflight == FALSE && discard_all == TRUE) {
7094             KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
7095
7096             HIBLOG("hibernate_teardown started\n");
7097             count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7098             HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
7099
7100             pages -= count_discard_vm_struct_pages;
7101             count_wire -= count_discard_vm_struct_pages;
7102
7103             hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
7104
7105             KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
7106     }
7107
7108     if (!preflight) {
7109         // pull wired from hibernate_bitmap
7110         bitmap = &page_list->bank_bitmap[0];
7111         bitmap_wired = &page_list_wired->bank_bitmap[0];
7112         for (bank = 0; bank < page_list->bank_count; bank++)
7113         {
7114             for (i = 0; i < bitmap->bitmapwords; i++)
7115                 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7116             bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
7117             bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7118         }
7119     }
7120
7121     // machine dependent adjustments
7122     hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
7123
7124     if (!preflight) {
7125         hibernate_stats.cd_count_wire = count_wire;
7126         hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7127                 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
7128     }
7129
7130     clock_get_uptime(&end);
7131     absolutetime_to_nanoseconds(end - start, &nsec);
7132     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
7133
7134     HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n  %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7135            pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7136                 discard_all ? "did" : "could",
7137                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7138
7139     if (hibernate_stats.cd_skipped_xpmapped)
7140             HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7141
7142     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
7143
7144     if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
7145
7146 #if MACH_ASSERT || DEBUG
7147     if (!preflight)
7148     {
7149         if (vm_page_local_q) {
7150             for (i = 0; i < vm_page_local_q_count; i++) {
7151                 struct vpl      *lq;
7152                 lq = &vm_page_local_q[i].vpl_un.vpl;
7153                 VPL_UNLOCK(&lq->vpl_lock);
7154             }
7155         }
7156         vm_page_unlock_queues();
7157     }
7158 #endif  /* MACH_ASSERT || DEBUG */
7159
7160     if (preflight) {
7161         lck_mtx_unlock(&vm_page_queue_free_lock);
7162         vm_page_unlock_queues();
7163         vm_object_unlock(compressor_object);
7164     }
7165
7166     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
7167 }
7168
7169 void
7170 hibernate_page_list_discard(hibernate_page_list_t * page_list)
7171 {
7172     uint64_t  start, end, nsec;
7173     vm_page_t m;
7174     vm_page_t next;
7175     uint32_t  i;
7176     uint32_t  count_discard_active    = 0;
7177     uint32_t  count_discard_inactive  = 0;
7178     uint32_t  count_discard_purgeable = 0;
7179     uint32_t  count_discard_cleaned   = 0;
7180     uint32_t  count_discard_speculative = 0;
7181
7182
7183 #if MACH_ASSERT || DEBUG
7184         vm_page_lock_queues();
7185         if (vm_page_local_q) {
7186             for (i = 0; i < vm_page_local_q_count; i++) {
7187                 struct vpl      *lq;
7188                 lq = &vm_page_local_q[i].vpl_un.vpl;
7189                 VPL_LOCK(&lq->vpl_lock);
7190             }
7191         }
7192 #endif  /* MACH_ASSERT || DEBUG */
7193
7194     clock_get_uptime(&start);
7195
7196     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7197     while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
7198     {
7199         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7200
7201         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7202         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7203         {
7204             if (m->dirty)
7205                 count_discard_purgeable++;
7206             else
7207                 count_discard_inactive++;
7208             hibernate_discard_page(m);
7209         }
7210         m = next;
7211     }
7212
7213     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7214     {
7215        m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7216        while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7217        {
7218            assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7219
7220            next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7221            if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7222            {
7223                count_discard_speculative++;
7224                hibernate_discard_page(m);
7225            }
7226            m = next;
7227        }
7228     }
7229
7230     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7231     while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7232     {
7233         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7234
7235         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7236         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7237         {
7238             if (m->dirty)
7239                 count_discard_purgeable++;
7240             else
7241                 count_discard_inactive++;
7242             hibernate_discard_page(m);
7243         }
7244         m = next;
7245     }
7246     /* XXX FBDP TODO: secluded queue */
7247
7248     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7249     while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7250     {
7251         assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7252
7253         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7254         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7255         {
7256             if (m->dirty)
7257                 count_discard_purgeable++;
7258             else
7259                 count_discard_active++;
7260             hibernate_discard_page(m);
7261         }
7262         m = next;
7263     }
7264
7265     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7266     while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
7267     {
7268         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7269
7270         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7271         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7272         {
7273             if (m->dirty)
7274                 count_discard_purgeable++;
7275             else
7276                 count_discard_cleaned++;
7277             hibernate_discard_page(m);
7278         }
7279         m = next;
7280     }
7281
7282 #if MACH_ASSERT || DEBUG
7283         if (vm_page_local_q) {
7284             for (i = 0; i < vm_page_local_q_count; i++) {
7285                 struct vpl      *lq;
7286                 lq = &vm_page_local_q[i].vpl_un.vpl;
7287                 VPL_UNLOCK(&lq->vpl_lock);
7288             }
7289         }
7290         vm_page_unlock_queues();
7291 #endif  /* MACH_ASSERT || DEBUG */
7292
7293     clock_get_uptime(&end);
7294     absolutetime_to_nanoseconds(end - start, &nsec);
7295     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
7296                 nsec / 1000000ULL,
7297                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7298 }
7299
7300 boolean_t       hibernate_paddr_map_inited = FALSE;
7301 unsigned int    hibernate_teardown_last_valid_compact_indx = -1;
7302 vm_page_t       hibernate_rebuild_hash_list = NULL;
7303
7304 unsigned int    hibernate_teardown_found_tabled_pages = 0;
7305 unsigned int    hibernate_teardown_found_created_pages = 0;
7306 unsigned int    hibernate_teardown_found_free_pages = 0;
7307 unsigned int    hibernate_teardown_vm_page_free_count;
7308
7309
7310 struct ppnum_mapping {
7311         struct ppnum_mapping    *ppnm_next;
7312         ppnum_t                 ppnm_base_paddr;
7313         unsigned int            ppnm_sindx;
7314         unsigned int            ppnm_eindx;
7315 };
7316
7317 struct ppnum_mapping    *ppnm_head;
7318 struct ppnum_mapping    *ppnm_last_found = NULL;
7319
7320
7321 void
7322 hibernate_create_paddr_map()
7323 {
7324         unsigned int    i;
7325         ppnum_t         next_ppnum_in_run = 0;
7326         struct ppnum_mapping *ppnm = NULL;
7327
7328         if (hibernate_paddr_map_inited == FALSE) {
7329
7330                 for (i = 0; i < vm_pages_count; i++) {
7331
7332                         if (ppnm)
7333                                 ppnm->ppnm_eindx = i;
7334
7335                         if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
7336
7337                                 ppnm = kalloc(sizeof(struct ppnum_mapping));
7338
7339                                 ppnm->ppnm_next = ppnm_head;
7340                                 ppnm_head = ppnm;
7341
7342                                 ppnm->ppnm_sindx = i;
7343                                 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
7344                         }
7345                         next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
7346                 }
7347                 ppnm->ppnm_eindx++;
7348
7349                 hibernate_paddr_map_inited = TRUE;
7350         }
7351 }
7352
7353 ppnum_t
7354 hibernate_lookup_paddr(unsigned int indx)
7355 {
7356         struct ppnum_mapping *ppnm = NULL;
7357
7358         ppnm = ppnm_last_found;
7359
7360         if (ppnm) {
7361                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7362                         goto done;
7363         }
7364         for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7365
7366                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7367                         ppnm_last_found = ppnm;
7368                         break;
7369                 }
7370         }
7371         if (ppnm == NULL)
7372                 panic("hibernate_lookup_paddr of %d failed\n", indx);
7373 done:
7374         return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7375 }
7376
7377
7378 uint32_t
7379 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7380 {
7381         addr64_t        saddr_aligned;
7382         addr64_t        eaddr_aligned;
7383         addr64_t        addr;
7384         ppnum_t         paddr;
7385         unsigned int    mark_as_unneeded_pages = 0;
7386
7387         saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7388         eaddr_aligned = eaddr & ~PAGE_MASK_64;
7389
7390         for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7391
7392                 paddr = pmap_find_phys(kernel_pmap, addr);
7393
7394                 assert(paddr);
7395
7396                 hibernate_page_bitset(page_list,       TRUE, paddr);
7397                 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7398
7399                 mark_as_unneeded_pages++;
7400         }
7401         return (mark_as_unneeded_pages);
7402 }
7403
7404
7405 void
7406 hibernate_hash_insert_page(vm_page_t mem)
7407 {
7408         vm_page_bucket_t *bucket;
7409         int             hash_id;
7410         vm_object_t     m_object;
7411
7412         m_object = VM_PAGE_OBJECT(mem);
7413
7414         assert(mem->hashed);
7415         assert(m_object);
7416         assert(mem->offset != (vm_object_offset_t) -1);
7417
7418         /*
7419          *      Insert it into the object_object/offset hash table
7420          */
7421         hash_id = vm_page_hash(m_object, mem->offset);
7422         bucket = &vm_page_buckets[hash_id];
7423
7424         mem->next_m = bucket->page_list;
7425         bucket->page_list = VM_PAGE_PACK_PTR(mem);
7426 }
7427
7428
7429 void
7430 hibernate_free_range(int sindx, int eindx)
7431 {
7432         vm_page_t       mem;
7433         unsigned int    color;
7434
7435         while (sindx < eindx) {
7436                 mem = &vm_pages[sindx];
7437
7438                 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7439
7440                 mem->lopage = FALSE;
7441                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7442
7443                 color = VM_PAGE_GET_COLOR(mem);
7444 #if defined(__x86_64__)
7445                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
7446                                           mem,
7447                                           vm_page_t,
7448                                           pageq);
7449 #else
7450                 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
7451                                           mem,
7452                                           vm_page_t,
7453                                           pageq);
7454 #endif
7455                 vm_page_free_count++;
7456
7457                 sindx++;
7458         }
7459 }
7460
7461
7462 extern void hibernate_rebuild_pmap_structs(void);
7463
7464 void
7465 hibernate_rebuild_vm_structs(void)
7466 {
7467         int             i, cindx, sindx, eindx;
7468         vm_page_t       mem, tmem, mem_next;
7469         AbsoluteTime    startTime, endTime;
7470         uint64_t        nsec;
7471
7472         if (hibernate_rebuild_needed == FALSE)
7473                 return;
7474
7475         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
7476         HIBLOG("hibernate_rebuild started\n");
7477
7478         clock_get_uptime(&startTime);
7479
7480         hibernate_rebuild_pmap_structs();
7481
7482         bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7483         eindx = vm_pages_count;
7484
7485         /*
7486          * Mark all the vm_pages[] that have not been initialized yet as being
7487          * transient. This is needed to ensure that buddy page search is corrrect.
7488          * Without this random data in these vm_pages[] can trip the buddy search
7489          */
7490         for (i = hibernate_teardown_last_valid_compact_indx+1; i < eindx; ++i)
7491                 vm_pages[i].vm_page_q_state = VM_PAGE_NOT_ON_Q;
7492
7493         for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7494
7495                 mem = &vm_pages[cindx];
7496                 assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
7497                 /*
7498                  * hibernate_teardown_vm_structs leaves the location where
7499                  * this vm_page_t must be located in "next".
7500                  */
7501                 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7502                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
7503
7504                 sindx = (int)(tmem - &vm_pages[0]);
7505
7506                 if (mem != tmem) {
7507                         /*
7508                          * this vm_page_t was moved by hibernate_teardown_vm_structs,
7509                          * so move it back to its real location
7510                          */
7511                         *tmem = *mem;
7512                         mem = tmem;
7513                 }
7514                 if (mem->hashed)
7515                         hibernate_hash_insert_page(mem);
7516                 /*
7517                  * the 'hole' between this vm_page_t and the previous
7518                  * vm_page_t we moved needs to be initialized as
7519                  * a range of free vm_page_t's
7520                  */
7521                 hibernate_free_range(sindx + 1, eindx);
7522
7523                 eindx = sindx;
7524         }
7525         if (sindx)
7526                 hibernate_free_range(0, sindx);
7527
7528         assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7529
7530         /*
7531          * process the list of vm_page_t's that were entered in the hash,
7532          * but were not located in the vm_pages arrary... these are
7533          * vm_page_t's that were created on the fly (i.e. fictitious)
7534          */
7535         for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
7536                 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7537
7538                 mem->next_m = 0;
7539                 hibernate_hash_insert_page(mem);
7540         }
7541         hibernate_rebuild_hash_list = NULL;
7542
7543         clock_get_uptime(&endTime);
7544         SUB_ABSOLUTETIME(&endTime, &startTime);
7545         absolutetime_to_nanoseconds(endTime, &nsec);
7546
7547         HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7548
7549         hibernate_rebuild_needed = FALSE;
7550
7551         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
7552 }
7553
7554
7555 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7556
7557 uint32_t
7558 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7559 {
7560         unsigned int    i;
7561         unsigned int    compact_target_indx;
7562         vm_page_t       mem, mem_next;
7563         vm_page_bucket_t *bucket;
7564         unsigned int    mark_as_unneeded_pages = 0;
7565         unsigned int    unneeded_vm_page_bucket_pages = 0;
7566         unsigned int    unneeded_vm_pages_pages = 0;
7567         unsigned int    unneeded_pmap_pages = 0;
7568         addr64_t        start_of_unneeded = 0;
7569         addr64_t        end_of_unneeded = 0;
7570
7571
7572         if (hibernate_should_abort())
7573                 return (0);
7574
7575         hibernate_rebuild_needed = TRUE;
7576
7577         HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7578                vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7579                vm_page_cleaned_count, compressor_object->resident_page_count);
7580
7581         for (i = 0; i < vm_page_bucket_count; i++) {
7582
7583                 bucket = &vm_page_buckets[i];
7584
7585                 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
7586                         assert(mem->hashed);
7587
7588                         mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7589
7590                         if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
7591                                 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
7592                                 hibernate_rebuild_hash_list = mem;
7593                         }
7594                 }
7595         }
7596         unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7597         mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7598
7599         hibernate_teardown_vm_page_free_count = vm_page_free_count;
7600
7601         compact_target_indx = 0;
7602
7603         for (i = 0; i < vm_pages_count; i++) {
7604
7605                 mem = &vm_pages[i];
7606
7607                 if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7608                         unsigned int color;
7609
7610                         assert(mem->busy);
7611                         assert(!mem->lopage);
7612
7613                         color = VM_PAGE_GET_COLOR(mem);
7614
7615                         vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7616                                              mem,
7617                                              vm_page_t,
7618                                              pageq);
7619
7620                         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7621
7622                         vm_page_free_count--;
7623
7624                         hibernate_teardown_found_free_pages++;
7625
7626                         if (vm_pages[compact_target_indx].vm_page_q_state != VM_PAGE_ON_FREE_Q)
7627                                 compact_target_indx = i;
7628                 } else {
7629                         /*
7630                          * record this vm_page_t's original location
7631                          * we need this even if it doesn't get moved
7632                          * as an indicator to the rebuild function that
7633                          * we don't have to move it
7634                          */
7635                         mem->next_m = VM_PAGE_PACK_PTR(mem);
7636
7637                         if (vm_pages[compact_target_indx].vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7638                                 /*
7639                                  * we've got a hole to fill, so
7640                                  * move this vm_page_t to it's new home
7641                                  */
7642                                 vm_pages[compact_target_indx] = *mem;
7643                                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7644
7645                                 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7646                                 compact_target_indx++;
7647                         } else
7648                                 hibernate_teardown_last_valid_compact_indx = i;
7649                 }
7650         }
7651         unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7652                                                              (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7653         mark_as_unneeded_pages += unneeded_vm_pages_pages;
7654
7655         hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7656
7657         if (start_of_unneeded) {
7658                 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7659                 mark_as_unneeded_pages += unneeded_pmap_pages;
7660         }
7661         HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7662
7663         return (mark_as_unneeded_pages);
7664 }
7665
7666
7667 #endif /* HIBERNATION */
7668
7669 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7670
7671 #include <mach_vm_debug.h>
7672 #if     MACH_VM_DEBUG
7673
7674 #include <mach_debug/hash_info.h>
7675 #include <vm/vm_debug.h>
7676
7677 /*
7678  *      Routine:        vm_page_info
7679  *      Purpose:
7680  *              Return information about the global VP table.
7681  *              Fills the buffer with as much information as possible
7682  *              and returns the desired size of the buffer.
7683  *      Conditions:
7684  *              Nothing locked.  The caller should provide
7685  *              possibly-pageable memory.
7686  */
7687
7688 unsigned int
7689 vm_page_info(
7690         hash_info_bucket_t *info,
7691         unsigned int count)
7692 {
7693         unsigned int i;
7694         lck_spin_t      *bucket_lock;
7695
7696         if (vm_page_bucket_count < count)
7697                 count = vm_page_bucket_count;
7698
7699         for (i = 0; i < count; i++) {
7700                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7701                 unsigned int bucket_count = 0;
7702                 vm_page_t m;
7703
7704                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7705                 lck_spin_lock(bucket_lock);
7706
7707                 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7708                      m != VM_PAGE_NULL;
7709                      m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->next_m)))
7710                         bucket_count++;
7711
7712                 lck_spin_unlock(bucket_lock);
7713
7714                 /* don't touch pageable memory while holding locks */
7715                 info[i].hib_count = bucket_count;
7716         }
7717
7718         return vm_page_bucket_count;
7719 }
7720 #endif  /* MACH_VM_DEBUG */
7721
7722 #if VM_PAGE_BUCKETS_CHECK
7723 void
7724 vm_page_buckets_check(void)
7725 {
7726         unsigned int i;
7727         vm_page_t p;
7728         unsigned int p_hash;
7729         vm_page_bucket_t *bucket;
7730         lck_spin_t      *bucket_lock;
7731
7732         if (!vm_page_buckets_check_ready) {
7733                 return;
7734         }
7735
7736 #if HIBERNATION
7737         if (hibernate_rebuild_needed ||
7738             hibernate_rebuild_hash_list) {
7739                 panic("BUCKET_CHECK: hibernation in progress: "
7740                       "rebuild_needed=%d rebuild_hash_list=%p\n",
7741                       hibernate_rebuild_needed,
7742                       hibernate_rebuild_hash_list);
7743         }
7744 #endif /* HIBERNATION */
7745
7746 #if VM_PAGE_FAKE_BUCKETS
7747         char *cp;
7748         for (cp = (char *) vm_page_fake_buckets_start;
7749              cp < (char *) vm_page_fake_buckets_end;
7750              cp++) {
7751                 if (*cp != 0x5a) {
7752                         panic("BUCKET_CHECK: corruption at %p in fake buckets "
7753                               "[0x%llx:0x%llx]\n",
7754                               cp,
7755                               (uint64_t) vm_page_fake_buckets_start,
7756                               (uint64_t) vm_page_fake_buckets_end);
7757                 }
7758         }
7759 #endif /* VM_PAGE_FAKE_BUCKETS */
7760
7761         for (i = 0; i < vm_page_bucket_count; i++) {
7762                 vm_object_t     p_object;
7763
7764                 bucket = &vm_page_buckets[i];
7765                 if (!bucket->page_list) {
7766                         continue;
7767                 }
7768
7769                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7770                 lck_spin_lock(bucket_lock);
7771                 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7772
7773                 while (p != VM_PAGE_NULL) {
7774                         p_object = VM_PAGE_OBJECT(p);
7775
7776                         if (!p->hashed) {
7777                                 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7778                                       "hash %d in bucket %d at %p "
7779                                       "is not hashed\n",
7780                                       p, p_object, p->offset,
7781                                       p_hash, i, bucket);
7782                         }
7783                         p_hash = vm_page_hash(p_object, p->offset);
7784                         if (p_hash != i) {
7785                                 panic("BUCKET_CHECK: corruption in bucket %d "
7786                                       "at %p: page %p object %p offset 0x%llx "
7787                                       "hash %d\n",
7788                                       i, bucket, p, p_object, p->offset,
7789                                       p_hash);
7790                         }
7791                         p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m));
7792                 }
7793                 lck_spin_unlock(bucket_lock);
7794         }
7795
7796 //      printf("BUCKET_CHECK: checked buckets\n");
7797 }
7798 #endif /* VM_PAGE_BUCKETS_CHECK */
7799
7800 /*
7801  * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7802  * local queues if they exist... its the only spot in the system where we add pages
7803  * to those queues...  once on those queues, those pages can only move to one of the
7804  * global page queues or the free queues... they NEVER move from local q to local q.
7805  * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7806  * the global vm_page_queue_lock at this point...  we still need to take the local lock
7807  * in case this operation is being run on a different CPU then the local queue's identity,
7808  * but we don't have to worry about the page moving to a global queue or becoming wired
7809  * while we're grabbing the local lock since those operations would require the global
7810  * vm_page_queue_lock to be held, and we already own it.
7811  *
7812  * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
7813  * 'wired' and local are ALWAYS mutually exclusive conditions.
7814  */
7815
7816 #if CONFIG_BACKGROUND_QUEUE
7817 void
7818 vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
7819 #else
7820 void
7821 vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
7822 #endif
7823 {
7824         boolean_t       was_pageable = TRUE;
7825         vm_object_t     m_object;
7826
7827         m_object = VM_PAGE_OBJECT(mem);
7828
7829         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7830
7831         if (mem->vm_page_q_state == VM_PAGE_NOT_ON_Q)
7832         {
7833                 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7834 #if CONFIG_BACKGROUND_QUEUE
7835                 if (remove_from_backgroundq == TRUE) {
7836                         vm_page_remove_from_backgroundq(mem);
7837                 }
7838                 if (mem->vm_page_on_backgroundq) {
7839                         assert(mem->vm_page_backgroundq.next != 0);
7840                         assert(mem->vm_page_backgroundq.prev != 0);
7841                 } else {
7842                         assert(mem->vm_page_backgroundq.next == 0);
7843                         assert(mem->vm_page_backgroundq.prev == 0);
7844                 }
7845 #endif /* CONFIG_BACKGROUND_QUEUE */
7846                 return;
7847         }
7848
7849         if (mem->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
7850         {
7851                 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7852 #if CONFIG_BACKGROUND_QUEUE
7853                 assert(mem->vm_page_backgroundq.next == 0 &&
7854                        mem->vm_page_backgroundq.prev == 0 &&
7855                        mem->vm_page_on_backgroundq == FALSE);
7856 #endif
7857                 return;
7858         }
7859         if (mem->vm_page_q_state == VM_PAGE_IS_WIRED) {
7860                 /*
7861                  * might put these guys on a list for debugging purposes
7862                  * if we do, we'll need to remove this assert
7863                  */
7864                 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7865 #if CONFIG_BACKGROUND_QUEUE
7866                 assert(mem->vm_page_backgroundq.next == 0 &&
7867                        mem->vm_page_backgroundq.prev == 0 &&
7868                        mem->vm_page_on_backgroundq == FALSE);
7869 #endif
7870                 return;
7871         }
7872
7873         assert(m_object != compressor_object);
7874         assert(m_object != kernel_object);
7875         assert(m_object != vm_submap_object);
7876         assert(!mem->fictitious);
7877
7878         switch(mem->vm_page_q_state) {
7879
7880         case VM_PAGE_ON_ACTIVE_LOCAL_Q:
7881         {
7882                 struct vpl      *lq;
7883
7884                 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
7885                 VPL_LOCK(&lq->vpl_lock);
7886                 vm_page_queue_remove(&lq->vpl_queue,
7887                                      mem, vm_page_t, pageq);
7888                 mem->local_id = 0;
7889                 lq->vpl_count--;
7890                 if (m_object->internal) {
7891                         lq->vpl_internal_count--;
7892                 } else {
7893                         lq->vpl_external_count--;
7894                 }
7895                 VPL_UNLOCK(&lq->vpl_lock);
7896                 was_pageable = FALSE;
7897                 break;
7898         }
7899         case VM_PAGE_ON_ACTIVE_Q:
7900         {
7901                 vm_page_queue_remove(&vm_page_queue_active,
7902                                      mem, vm_page_t, pageq);
7903                 vm_page_active_count--;
7904                 break;
7905         }
7906
7907         case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
7908         {
7909                 assert(m_object->internal == TRUE);
7910
7911                 vm_page_inactive_count--;
7912                 vm_page_queue_remove(&vm_page_queue_anonymous,
7913                                      mem, vm_page_t, pageq);
7914                 vm_page_anonymous_count--;
7915                 vm_purgeable_q_advance_all();
7916                 break;
7917         }
7918
7919         case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
7920         {
7921                 assert(m_object->internal == FALSE);
7922
7923                 vm_page_inactive_count--;
7924                 vm_page_queue_remove(&vm_page_queue_inactive,
7925                                      mem, vm_page_t, pageq);
7926                 vm_purgeable_q_advance_all();
7927                 break;
7928         }
7929
7930         case VM_PAGE_ON_INACTIVE_CLEANED_Q:
7931         {
7932                 assert(m_object->internal == FALSE);
7933
7934                 vm_page_inactive_count--;
7935                 vm_page_queue_remove(&vm_page_queue_cleaned,
7936                                      mem, vm_page_t, pageq);
7937                 vm_page_cleaned_count--;
7938                 break;
7939         }
7940
7941         case VM_PAGE_ON_THROTTLED_Q:
7942         {
7943                 assert(m_object->internal == TRUE);
7944
7945                 vm_page_queue_remove(&vm_page_queue_throttled,
7946                                      mem, vm_page_t, pageq);
7947                 vm_page_throttled_count--;
7948                 was_pageable = FALSE;
7949                 break;
7950         }
7951
7952         case VM_PAGE_ON_SPECULATIVE_Q:
7953         {
7954                 assert(m_object->internal == FALSE);
7955
7956                 vm_page_remque(&mem->pageq);
7957                 vm_page_speculative_count--;
7958                 break;
7959         }
7960
7961 #if CONFIG_SECLUDED_MEMORY
7962         case VM_PAGE_ON_SECLUDED_Q:
7963         {
7964                 vm_page_queue_remove(&vm_page_queue_secluded,
7965                                      mem, vm_page_t, pageq);
7966                 vm_page_secluded_count--;
7967                 if (m_object == VM_OBJECT_NULL) {
7968                         vm_page_secluded_count_free--;
7969                         was_pageable = FALSE;
7970                 } else {
7971                         assert(!m_object->internal);
7972                         vm_page_secluded_count_inuse--;
7973                         was_pageable = FALSE;
7974 //                      was_pageable = TRUE;
7975                 }
7976                 break;
7977         }
7978 #endif /* CONFIG_SECLUDED_MEMORY */
7979
7980         default:
7981         {
7982                 /*
7983                  *      if (mem->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
7984                  *              NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
7985                  *              the caller is responsible for determing if the page is on that queue, and if so, must
7986                  *              either first remove it (it needs both the page queues lock and the object lock to do
7987                  *              this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
7988                  *
7989                  *      we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
7990                  *      or any of the undefined states
7991                  */
7992                 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vm_page_q_state);
7993                 break;
7994         }
7995
7996         }
7997         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7998         mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
7999
8000 #if CONFIG_BACKGROUND_QUEUE
8001         if (remove_from_backgroundq == TRUE)
8002                 vm_page_remove_from_backgroundq(mem);
8003 #endif
8004         if (was_pageable) {
8005                 if (m_object->internal) {
8006                         vm_page_pageable_internal_count--;
8007                 } else {
8008                         vm_page_pageable_external_count--;
8009                 }
8010         }
8011 }
8012
8013 void
8014 vm_page_remove_internal(vm_page_t page)
8015 {
8016         vm_object_t __object = VM_PAGE_OBJECT(page);
8017         if (page == __object->memq_hint) {
8018                 vm_page_t       __new_hint;
8019                 vm_page_queue_entry_t   __qe;
8020                 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->listq);
8021                 if (vm_page_queue_end(&__object->memq, __qe)) {
8022                         __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->listq);
8023                         if (vm_page_queue_end(&__object->memq, __qe)) {
8024                                 __qe = NULL;
8025                         }
8026                 }
8027                 __new_hint = (vm_page_t)((uintptr_t) __qe);
8028                 __object->memq_hint = __new_hint;
8029         }
8030         vm_page_queue_remove(&__object->memq, page, vm_page_t, listq);
8031 #if CONFIG_SECLUDED_MEMORY
8032         if (__object->eligible_for_secluded) {
8033                 vm_page_secluded.eligible_for_secluded--;
8034         }
8035 #endif /* CONFIG_SECLUDED_MEMORY */
8036 }
8037
8038 void
8039 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8040 {
8041         vm_object_t     m_object;
8042
8043         m_object = VM_PAGE_OBJECT(mem);
8044
8045         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8046         assert(!mem->fictitious);
8047         assert(!mem->laundry);
8048         assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8049         vm_page_check_pageable_safe(mem);
8050
8051         if (m_object->internal) {
8052                 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
8053
8054                 if (first == TRUE)
8055                         vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
8056                 else
8057                         vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
8058
8059                 vm_page_anonymous_count++;
8060                 vm_page_pageable_internal_count++;
8061         } else {
8062                 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
8063
8064                 if (first == TRUE)
8065                         vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
8066                 else
8067                         vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
8068
8069                 vm_page_pageable_external_count++;
8070         }
8071         vm_page_inactive_count++;
8072         token_new_pagecount++;
8073
8074 #if CONFIG_BACKGROUND_QUEUE
8075         if (mem->vm_page_in_background)
8076                 vm_page_add_to_backgroundq(mem, FALSE);
8077 #endif
8078 }
8079
8080 void
8081 vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8082 {
8083         vm_object_t     m_object;
8084
8085         m_object = VM_PAGE_OBJECT(mem);
8086
8087         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8088         assert(!mem->fictitious);
8089         assert(!mem->laundry);
8090         assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8091         vm_page_check_pageable_safe(mem);
8092
8093         mem->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
8094         if (first == TRUE)
8095                 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, pageq);
8096         else
8097                 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
8098         vm_page_active_count++;
8099
8100         if (m_object->internal) {
8101                 vm_page_pageable_internal_count++;
8102         } else {
8103                 vm_page_pageable_external_count++;
8104         }
8105
8106 #if CONFIG_BACKGROUND_QUEUE
8107         if (mem->vm_page_in_background)
8108                 vm_page_add_to_backgroundq(mem, FALSE);
8109 #endif
8110 }
8111
8112 /*
8113  * Pages from special kernel objects shouldn't
8114  * be placed on pageable queues.
8115  */
8116 void
8117 vm_page_check_pageable_safe(vm_page_t page)
8118 {
8119         vm_object_t     page_object;
8120
8121         page_object = VM_PAGE_OBJECT(page);
8122
8123         if (page_object == kernel_object) {
8124                 panic("vm_page_check_pageable_safe: trying to add page" \
8125                          "from kernel object (%p) to pageable queue", kernel_object);
8126         }
8127
8128         if (page_object == compressor_object) {
8129                 panic("vm_page_check_pageable_safe: trying to add page" \
8130                          "from compressor object (%p) to pageable queue", compressor_object);
8131         }
8132
8133         if (page_object == vm_submap_object) {
8134                 panic("vm_page_check_pageable_safe: trying to add page" \
8135                         "from submap object (%p) to pageable queue", vm_submap_object);
8136         }
8137 }
8138
8139 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8140  * wired page diagnose
8141  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8142
8143 #include <libkern/OSKextLibPrivate.h>
8144
8145 #define KA_SIZE(namelen, subtotalscount)        \
8146         (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8147
8148 #define KA_NAME(alloc)  \
8149         ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8150
8151 #define KA_NAME_LEN(alloc)      \
8152     (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8153
8154 vm_tag_t
8155 vm_tag_bt(void)
8156 {
8157     uintptr_t* frameptr;
8158     uintptr_t* frameptr_next;
8159     uintptr_t retaddr;
8160     uintptr_t kstackb, kstackt;
8161     const vm_allocation_site_t * site;
8162     thread_t cthread;
8163     kern_allocation_name_t name;
8164
8165     cthread = current_thread();
8166     if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
8167
8168     if ((name = thread_get_kernel_state(cthread)->allocation_name))
8169     {
8170         if (!name->tag) vm_tag_alloc(name);
8171         return name->tag;
8172     }
8173
8174     kstackb = cthread->kernel_stack;
8175     kstackt = kstackb + kernel_stack_size;
8176
8177     /* Load stack frame pointer (EBP on x86) into frameptr */
8178     frameptr = __builtin_frame_address(0);
8179     site = NULL;
8180     while (frameptr != NULL)
8181     {
8182         /* Verify thread stack bounds */
8183         if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
8184
8185         /* Next frame pointer is pointed to by the previous one */
8186         frameptr_next = (uintptr_t*) *frameptr;
8187
8188         /* Pull return address from one spot above the frame pointer */
8189         retaddr = *(frameptr + 1);
8190
8191
8192         if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
8193         {
8194                 site = OSKextGetAllocationSiteForCaller(retaddr);
8195                 break;
8196         }
8197         frameptr = frameptr_next;
8198     }
8199
8200     return (site ? site->tag : VM_KERN_MEMORY_NONE);
8201 }
8202
8203 static uint64_t free_tag_bits[VM_MAX_TAG_VALUE/64];
8204
8205 void
8206 vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
8207 {
8208     vm_tag_t tag;
8209     uint64_t avail;
8210     uint32_t idx;
8211     vm_allocation_site_t * prev;
8212
8213     if (site->tag) return;
8214
8215     idx = 0;
8216     while (TRUE)
8217     {
8218                 avail = free_tag_bits[idx];
8219                 if (avail)
8220                 {
8221                     tag = __builtin_clzll(avail);
8222                     avail &= ~(1ULL << (63 - tag));
8223                     free_tag_bits[idx] = avail;
8224                     tag += (idx << 6);
8225                     break;
8226                 }
8227                 idx++;
8228                 if (idx >= ARRAY_COUNT(free_tag_bits))
8229                 {
8230                     for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++)
8231                     {
8232                                 prev = vm_allocation_sites[idx];
8233                                 if (!prev)               continue;
8234                                 if (!KA_NAME_LEN(prev))  continue;
8235                                 if (!prev->tag)          continue;
8236                                 if (prev->total)         continue;
8237                                 if (1 != prev->refcount) continue;
8238
8239                                 assert(idx == prev->tag);
8240                                 tag = idx;
8241                                 prev->tag = VM_KERN_MEMORY_NONE;
8242                                 *releasesiteP = prev;
8243                                 break;
8244                     }
8245                     if (idx >= ARRAY_COUNT(vm_allocation_sites))
8246                         {
8247                                 tag = VM_KERN_MEMORY_ANY;
8248                         }
8249                     break;
8250                 }
8251     }
8252     site->tag = tag;
8253
8254     OSAddAtomic16(1, &site->refcount);
8255
8256     if (VM_KERN_MEMORY_ANY != tag) vm_allocation_sites[tag] = site;
8257
8258     if (tag > vm_allocation_tag_highest) vm_allocation_tag_highest = tag;
8259 }
8260
8261 static void
8262 vm_tag_free_locked(vm_tag_t tag)
8263 {
8264     uint64_t avail;
8265     uint32_t idx;
8266     uint64_t bit;
8267
8268     if (VM_KERN_MEMORY_ANY == tag) return;
8269
8270     idx = (tag >> 6);
8271     avail = free_tag_bits[idx];
8272     tag &= 63;
8273     bit = (1ULL << (63 - tag));
8274     assert(!(avail & bit));
8275     free_tag_bits[idx] = (avail | bit);
8276 }
8277
8278 static void
8279 vm_tag_init(void)
8280 {
8281     vm_tag_t tag;
8282     for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
8283     {
8284         vm_tag_free_locked(tag);
8285     }
8286
8287     for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++)
8288     {
8289         vm_tag_free_locked(tag);
8290     }
8291 }
8292
8293 vm_tag_t
8294 vm_tag_alloc(vm_allocation_site_t * site)
8295 {
8296     vm_tag_t tag;
8297     vm_allocation_site_t * releasesite;
8298
8299     if (VM_TAG_BT & site->flags)
8300     {
8301                 tag = vm_tag_bt();
8302                 if (VM_KERN_MEMORY_NONE != tag) return (tag);
8303     }
8304
8305     if (!site->tag)
8306     {
8307                 releasesite = NULL;
8308                 lck_spin_lock(&vm_allocation_sites_lock);
8309                 vm_tag_alloc_locked(site, &releasesite);
8310                 lck_spin_unlock(&vm_allocation_sites_lock);
8311         if (releasesite) kern_allocation_name_release(releasesite);
8312     }
8313
8314     return (site->tag);
8315 }
8316
8317 void
8318 vm_tag_update_size(vm_tag_t tag, int64_t delta)
8319 {
8320     vm_allocation_site_t * allocation;
8321     uint64_t prior;
8322
8323     assert(VM_KERN_MEMORY_NONE != tag);
8324     assert(tag < VM_MAX_TAG_VALUE);
8325
8326     allocation = vm_allocation_sites[tag];
8327     assert(allocation);
8328
8329     if (delta < 0) {
8330                 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
8331     }
8332     prior = OSAddAtomic64(delta, &allocation->total);
8333
8334 #if DEBUG || DEVELOPMENT
8335
8336     uint64_t new, peak;
8337         new = prior + delta;
8338     do
8339     {
8340         peak = allocation->peak;
8341         if (new <= peak) break;
8342     }
8343     while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8344
8345 #endif /* DEBUG || DEVELOPMENT */
8346
8347     if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) return;
8348
8349     if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8350 }
8351
8352 void
8353 kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
8354 {
8355     uint64_t prior;
8356
8357     if (delta < 0) {
8358                 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
8359     }
8360     prior = OSAddAtomic64(delta, &allocation->total);
8361
8362 #if DEBUG || DEVELOPMENT
8363
8364     uint64_t new, peak;
8365         new = prior + delta;
8366     do
8367     {
8368         peak = allocation->peak;
8369         if (new <= peak) break;
8370     }
8371     while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8372
8373 #endif /* DEBUG || DEVELOPMENT */
8374
8375     if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8376 }
8377
8378 #if VM_MAX_TAG_ZONES
8379
8380 void
8381 vm_allocation_zones_init(void)
8382 {
8383     kern_return_t ret;
8384         vm_offset_t       addr;
8385         vm_size_t     size;
8386
8387     size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **)
8388          + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8389
8390         ret = kernel_memory_allocate(kernel_map,
8391                                         &addr, round_page(size), 0,
8392                                         KMA_ZERO, VM_KERN_MEMORY_DIAG);
8393     assert(KERN_SUCCESS == ret);
8394
8395     vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
8396     addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **);
8397
8398     // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
8399     // in vm_tag_update_zone_size() won't recurse
8400     vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG]   = (vm_allocation_zone_total_t *) addr;
8401     addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8402     vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
8403 }
8404
8405 void
8406 vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
8407 {
8408     vm_allocation_zone_total_t * zone;
8409
8410     assert(VM_KERN_MEMORY_NONE != tag);
8411     assert(tag < VM_MAX_TAG_VALUE);
8412
8413     if (zidx >= VM_MAX_TAG_ZONES) return;
8414
8415         zone = vm_allocation_zone_totals[tag];
8416     if (!zone)
8417     {
8418         zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
8419         if (!zone) return;
8420         bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8421         if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag]))
8422                 {
8423                         kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8424                 }
8425     }
8426 }
8427
8428 void
8429 vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
8430 {
8431     vm_allocation_zone_total_t * zone;
8432     uint32_t new;
8433
8434     assert(VM_KERN_MEMORY_NONE != tag);
8435     assert(tag < VM_MAX_TAG_VALUE);
8436
8437     if (zidx >= VM_MAX_TAG_ZONES) return;
8438
8439         zone = vm_allocation_zone_totals[tag];
8440     assert(zone);
8441     zone += zidx;
8442
8443     /* the zone is locked */
8444     if (delta < 0)
8445     {
8446                 assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
8447             zone->total += delta;
8448     }
8449     else
8450     {
8451                 zone->total += delta;
8452                 if (zone->total > zone->peak) zone->peak = zone->total;
8453         if (dwaste)
8454         {
8455                         new = zone->waste;
8456                         if (zone->wastediv < 65536) zone->wastediv++;
8457                         else                        new -= (new >> 16);
8458                         __assert_only bool ov = os_add_overflow(new, dwaste, &new);
8459                         assert(!ov);
8460                         zone->waste = new;
8461         }
8462     }
8463 }
8464
8465 #endif /* VM_MAX_TAG_ZONES */
8466
8467 void
8468 kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
8469 {
8470     kern_allocation_name_t other;
8471         struct vm_allocation_total * total;
8472     uint32_t subidx;
8473
8474     subidx = 0;
8475     assert(VM_KERN_MEMORY_NONE != subtag);
8476         for (; subidx < allocation->subtotalscount; subidx++)
8477         {
8478                 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag)
8479                 {
8480                         allocation->subtotals[subidx].tag = subtag;
8481                         break;
8482                 }
8483                 if (subtag == allocation->subtotals[subidx].tag) break;
8484         }
8485     assert(subidx < allocation->subtotalscount);
8486     if (subidx >= allocation->subtotalscount) return;
8487
8488     total = &allocation->subtotals[subidx];
8489     other = vm_allocation_sites[subtag];
8490     assert(other);
8491
8492     if (delta < 0)
8493     {
8494                 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
8495             OSAddAtomic64(delta, &total->total);
8496                 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
8497         OSAddAtomic64(delta, &other->mapped);
8498     }
8499     else
8500     {
8501         OSAddAtomic64(delta, &other->mapped);
8502             OSAddAtomic64(delta, &total->total);
8503     }
8504 }
8505
8506 const char *
8507 kern_allocation_get_name(kern_allocation_name_t allocation)
8508 {
8509     return (KA_NAME(allocation));
8510 }
8511
8512 kern_allocation_name_t
8513 kern_allocation_name_allocate(const char * name, uint32_t subtotalscount)
8514 {
8515     uint32_t namelen;
8516
8517     namelen = (uint32_t) strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
8518
8519     kern_allocation_name_t allocation;
8520     allocation = kalloc(KA_SIZE(namelen, subtotalscount));
8521     bzero(allocation, KA_SIZE(namelen, subtotalscount));
8522
8523     allocation->refcount       = 1;
8524     allocation->subtotalscount = subtotalscount;
8525     allocation->flags          = (namelen << VM_TAG_NAME_LEN_SHIFT);
8526     strlcpy(KA_NAME(allocation), name, namelen + 1);
8527
8528     return (allocation);
8529 }
8530
8531 void
8532 kern_allocation_name_release(kern_allocation_name_t allocation)
8533 {
8534     assert(allocation->refcount > 0);
8535     if (1 == OSAddAtomic16(-1, &allocation->refcount))
8536     {
8537         kfree(allocation, KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
8538     }
8539 }
8540
8541 vm_tag_t
8542 kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
8543 {
8544     return (vm_tag_alloc(allocation));
8545 }
8546
8547 static void
8548 vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
8549 {
8550     if (!object->wired_page_count) return;
8551     if (object != kernel_object)
8552     {
8553                 assert(object->wire_tag < num_info);
8554                 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
8555     }
8556 }
8557
8558 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
8559                                      unsigned int num_info, vm_object_t object);
8560
8561 static void
8562 vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
8563                                   vm_page_iterate_proc proc, purgeable_q_t queue,
8564                                   int group)
8565 {
8566     vm_object_t object;
8567
8568     for (object = (vm_object_t) queue_first(&queue->objq[group]);
8569                 !queue_end(&queue->objq[group], (queue_entry_t) object);
8570                 object = (vm_object_t) queue_next(&object->objq))
8571     {
8572                 proc(info, num_info, object);
8573     }
8574 }
8575
8576 static void
8577 vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
8578                         vm_page_iterate_proc proc)
8579 {
8580     purgeable_q_t   volatile_q;
8581     queue_head_t  * nonvolatile_q;
8582     vm_object_t     object;
8583     int             group;
8584
8585     lck_spin_lock(&vm_objects_wired_lock);
8586     queue_iterate(&vm_objects_wired,
8587                   object,
8588                   vm_object_t,
8589                   objq)
8590     {
8591                 proc(info, num_info, object);
8592     }
8593     lck_spin_unlock(&vm_objects_wired_lock);
8594
8595     lck_mtx_lock(&vm_purgeable_queue_lock);
8596     nonvolatile_q = &purgeable_nonvolatile_queue;
8597     for (object = (vm_object_t) queue_first(nonvolatile_q);
8598                  !queue_end(nonvolatile_q, (queue_entry_t) object);
8599                  object = (vm_object_t) queue_next(&object->objq))
8600     {
8601                 proc(info, num_info, object);
8602     }
8603
8604     volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
8605     vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, 0);
8606
8607     volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
8608     for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8609     {
8610                 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, group);
8611     }
8612
8613     volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
8614     for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8615     {
8616                 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, group);
8617     }
8618     lck_mtx_unlock(&vm_purgeable_queue_lock);
8619 }
8620
8621 static uint64_t
8622 process_account(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, boolean_t iterated)
8623 {
8624     size_t                 namelen;
8625     unsigned int           idx, count, nextinfo;
8626     vm_allocation_site_t * site;
8627         lck_spin_lock(&vm_allocation_sites_lock);
8628
8629         for (idx = 0; idx <= vm_allocation_tag_highest; idx++)
8630     {
8631                 site = vm_allocation_sites[idx];
8632                 if (!site) continue;
8633                 info[idx].mapped = site->mapped;
8634                 info[idx].tag    = site->tag;
8635         if (!iterated)
8636         {
8637                         info[idx].size = site->total;
8638 #if DEBUG || DEVELOPMENT
8639                         info[idx].peak = site->peak;
8640 #endif /* DEBUG || DEVELOPMENT */
8641         }
8642         else
8643         {
8644                         if (!site->subtotalscount && (site->total != info[idx].size))
8645                         {
8646                             printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
8647                             info[idx].size = site->total;
8648                         }
8649         }
8650     }
8651
8652     nextinfo = (vm_allocation_tag_highest + 1);
8653     count    = nextinfo;
8654     if (count >= num_info) count = num_info;
8655
8656     for (idx = 0; idx < count; idx++)
8657     {
8658                 site = vm_allocation_sites[idx];
8659                 if (!site) continue;
8660                 info[idx].flags |= VM_KERN_SITE_WIRED;
8661                 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8662                 {
8663                     info[idx].site   = idx;
8664                     info[idx].flags |= VM_KERN_SITE_TAG;
8665                     if (VM_KERN_MEMORY_ZONE == idx)
8666                     {
8667                                 info[idx].flags |= VM_KERN_SITE_HIDE;
8668                                 info[idx].flags &= ~VM_KERN_SITE_WIRED;
8669                                 info[idx].collectable_bytes = zones_collectable_bytes;
8670                         }
8671                 }
8672                 else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT))))
8673                 {
8674                     info[idx].site   = 0;
8675                     info[idx].flags |= VM_KERN_SITE_NAMED;
8676                     if (namelen > sizeof(info[idx].name)) namelen = sizeof(info[idx].name);
8677                     strncpy(&info[idx].name[0], KA_NAME(site), namelen);
8678                 }
8679                 else if (VM_TAG_KMOD & site->flags)
8680                 {
8681                     info[idx].site   = OSKextGetKmodIDForSite(site, NULL, 0);
8682                     info[idx].flags |= VM_KERN_SITE_KMOD;
8683                 }
8684                 else
8685                 {
8686                     info[idx].site   = VM_KERNEL_UNSLIDE(site);
8687                     info[idx].flags |= VM_KERN_SITE_KERNEL;
8688                 }
8689 #if VM_MAX_TAG_ZONES
8690                 vm_allocation_zone_total_t * zone;
8691                 unsigned int                 zidx;
8692                 vm_size_t                    elem_size;
8693
8694         if (vm_allocation_zone_totals
8695                         && (zone = vm_allocation_zone_totals[idx])
8696                         && (nextinfo < num_info))
8697         {
8698             for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++)
8699             {
8700                 if (!zone[zidx].peak)              continue;
8701                                 info[nextinfo]                   = info[idx];
8702                                 info[nextinfo].zone              = zone_index_from_tag_index(zidx, &elem_size);
8703                                 info[nextinfo].flags            &= ~VM_KERN_SITE_WIRED;
8704                                 info[nextinfo].flags            |= VM_KERN_SITE_ZONE;
8705                                 info[nextinfo].size              = zone[zidx].total;
8706                                 info[nextinfo].peak              = zone[zidx].peak;
8707                                 info[nextinfo].mapped            = 0;
8708                 if (zone[zidx].wastediv)
8709                                 {
8710                                         info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
8711                                 }
8712                 nextinfo++;
8713              }
8714         }
8715 #endif /* VM_MAX_TAG_ZONES */
8716         if (site->subtotalscount)
8717         {
8718                         uint64_t mapped, mapcost, take;
8719                         uint32_t sub;
8720                         vm_tag_t alloctag;
8721
8722             info[idx].size = site->total;
8723             mapped = info[idx].size;
8724             info[idx].mapped = mapped;
8725             mapcost = 0;
8726             for (sub = 0; sub < site->subtotalscount; sub++)
8727             {
8728                                 alloctag = site->subtotals[sub].tag;
8729                                 assert(alloctag < num_info);
8730                                 if (info[alloctag].name[0]) continue;
8731                                 take = info[alloctag].mapped;
8732                                 if (take > info[alloctag].size) take = info[alloctag].size;
8733                                 if (take > mapped) take = mapped;
8734                                 info[alloctag].mapped  -= take;
8735                                 info[alloctag].size    -= take;
8736                                 mapped                 -= take;
8737                                 mapcost                += take;
8738             }
8739             info[idx].size = mapcost;
8740         }
8741         }
8742         lck_spin_unlock(&vm_allocation_sites_lock);
8743
8744     return (0);
8745 }
8746
8747 uint32_t
8748 vm_page_diagnose_estimate(void)
8749 {
8750     vm_allocation_site_t * site;
8751     uint32_t               count;
8752     uint32_t               idx;
8753
8754         lck_spin_lock(&vm_allocation_sites_lock);
8755         for (count = idx = 0; idx < VM_MAX_TAG_VALUE; idx++)
8756     {
8757                 site = vm_allocation_sites[idx];
8758                 if (!site) continue;
8759                 count++;
8760 #if VM_MAX_TAG_ZONES
8761                 if (vm_allocation_zone_totals)
8762                 {
8763                         vm_allocation_zone_total_t * zone;
8764                         zone = vm_allocation_zone_totals[idx];
8765                         if (!zone) continue;
8766                         for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) if (zone[zidx].peak) count++;
8767                 }
8768 #endif
8769     }
8770         lck_spin_unlock(&vm_allocation_sites_lock);
8771
8772     /* some slop for new tags created */
8773     count += 8;
8774     count += VM_KERN_COUNTER_COUNT;
8775
8776     return (count);
8777 }
8778
8779
8780 kern_return_t
8781 vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
8782 {
8783     uint64_t                 wired_size;
8784     uint64_t                 wired_managed_size;
8785     uint64_t                 wired_reserved_size;
8786     uint64_t                 booter_size;
8787     boolean_t                iterate;
8788     mach_memory_info_t     * counts;
8789
8790     bzero(info, num_info * sizeof(mach_memory_info_t));
8791
8792     if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8793
8794 #if CONFIG_EMBEDDED
8795     wired_size          = ptoa_64(vm_page_wire_count);
8796     wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
8797 #else
8798     wired_size          = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8799     wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
8800 #endif
8801     wired_managed_size  = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8802
8803     booter_size = ml_get_booter_memory_size();
8804     wired_size += booter_size;
8805
8806     assert(num_info >= VM_KERN_COUNTER_COUNT);
8807     num_info -= VM_KERN_COUNTER_COUNT;
8808     counts = &info[num_info];
8809
8810 #define SET_COUNT(xcount, xsize, xflags)                        \
8811     counts[xcount].tag   = VM_MAX_TAG_VALUE + xcount;   \
8812     counts[xcount].site  = (xcount);                            \
8813     counts[xcount].size  = (xsize);                                 \
8814     counts[xcount].mapped  = (xsize);                           \
8815     counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8816
8817     SET_COUNT(VM_KERN_COUNT_MANAGED,              ptoa_64(vm_page_pages),        0);
8818     SET_COUNT(VM_KERN_COUNT_WIRED,                wired_size,                    0);
8819     SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED,        wired_managed_size,            0);
8820     SET_COUNT(VM_KERN_COUNT_RESERVED,             wired_reserved_size,           VM_KERN_SITE_WIRED);
8821     SET_COUNT(VM_KERN_COUNT_STOLEN,               ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8822     SET_COUNT(VM_KERN_COUNT_LOPAGE,               ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
8823     SET_COUNT(VM_KERN_COUNT_WIRED_BOOT,           ptoa_64(vm_page_wire_count_on_boot), 0);
8824     SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN,          booter_size,                   VM_KERN_SITE_WIRED);
8825
8826 #define SET_MAP(xcount, xsize, xfree, xlargest) \
8827     counts[xcount].site    = (xcount);                  \
8828     counts[xcount].size    = (xsize);                   \
8829     counts[xcount].mapped  = (xsize);                   \
8830     counts[xcount].free    = (xfree);                   \
8831     counts[xcount].largest = (xlargest);                \
8832     counts[xcount].flags   = VM_KERN_SITE_COUNTER;
8833
8834     vm_map_size_t map_size, map_free, map_largest;
8835
8836     vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
8837     SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
8838
8839     vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
8840     SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
8841
8842     vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
8843     SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
8844
8845     iterate = !VM_TAG_ACTIVE_UPDATE;
8846     if (iterate)
8847         {
8848                 enum                       { kMaxKernelDepth = 1 };
8849                 vm_map_t                     maps   [kMaxKernelDepth];
8850                 vm_map_entry_t               entries[kMaxKernelDepth];
8851                 vm_map_t                     map;
8852                 vm_map_entry_t               entry;
8853                 vm_object_offset_t           offset;
8854                 vm_page_t                    page;
8855                 int                          stackIdx, count;
8856
8857             vm_page_iterate_objects(info, num_info, &vm_page_count_object);
8858
8859             map = kernel_map;
8860             stackIdx = 0;
8861             while (map)
8862             {
8863                         vm_map_lock(map);
8864                         for (entry = map->hdr.links.next; map; entry = entry->links.next)
8865                         {
8866                             if (entry->is_sub_map)
8867                             {
8868                                         assert(stackIdx < kMaxKernelDepth);
8869                                         maps[stackIdx] = map;
8870                                         entries[stackIdx] = entry;
8871                                         stackIdx++;
8872                                         map = VME_SUBMAP(entry);
8873                                         entry = NULL;
8874                                         break;
8875                             }
8876                             if (VME_OBJECT(entry) == kernel_object)
8877                             {
8878                                         count = 0;
8879                                         vm_object_lock(VME_OBJECT(entry));
8880                                         for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
8881                                         {
8882                                                 page = vm_page_lookup(VME_OBJECT(entry), offset);
8883                                                 if (page && VM_PAGE_WIRED(page)) count++;
8884                                         }
8885                                         vm_object_unlock(VME_OBJECT(entry));
8886
8887                                         if (count)
8888                                         {
8889                                             assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
8890                                             assert(VME_ALIAS(entry) < num_info);
8891                                             info[VME_ALIAS(entry)].size += ptoa_64(count);
8892                                         }
8893                             }
8894                             while (map && (entry == vm_map_last_entry(map)))
8895                             {
8896                                         vm_map_unlock(map);
8897                                         if (!stackIdx) map = NULL;
8898                                         else
8899                                         {
8900                                             --stackIdx;
8901                                             map = maps[stackIdx];
8902                                             entry = entries[stackIdx];
8903                                         }
8904                             }
8905                         }
8906             }
8907     }
8908
8909     process_account(info, num_info, zones_collectable_bytes, iterate);
8910
8911     return (KERN_SUCCESS);
8912 }
8913
8914 #if DEBUG || DEVELOPMENT
8915
8916 kern_return_t
8917 vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
8918 {
8919     kern_return_t  ret;
8920     vm_size_t      zsize;
8921     vm_map_t       map;
8922     vm_map_entry_t entry;
8923
8924     zsize = zone_element_info((void *) addr, tag);
8925     if (zsize)
8926     {
8927                 *zone_size = *size = zsize;
8928                 return (KERN_SUCCESS);
8929     }
8930
8931         *zone_size = 0;
8932     ret = KERN_INVALID_ADDRESS;
8933     for (map = kernel_map; map; )
8934         {
8935                 vm_map_lock(map);
8936                 if (!vm_map_lookup_entry(map, addr, &entry)) break;
8937             if (entry->is_sub_map)
8938             {
8939             if (map != kernel_map)                   break;
8940                         map = VME_SUBMAP(entry);
8941                         continue;
8942             }
8943                 if (entry->vme_start != addr)                break;
8944                 *tag = VME_ALIAS(entry);
8945                 *size = (entry->vme_end - addr);
8946                 ret = KERN_SUCCESS;
8947                 break;
8948         }
8949         if (map != kernel_map) vm_map_unlock(map);
8950         vm_map_unlock(kernel_map);
8951
8952         return (ret);
8953 }
8954
8955 #endif /* DEBUG || DEVELOPMENT */
8956
8957 uint32_t
8958 vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
8959 {
8960     vm_allocation_site_t * site;
8961     uint32_t               kmodId;
8962
8963     kmodId = 0;
8964     lck_spin_lock(&vm_allocation_sites_lock);
8965     if ((site = vm_allocation_sites[tag]))
8966     {
8967         if (VM_TAG_KMOD & site->flags)
8968         {
8969             kmodId = OSKextGetKmodIDForSite(site, name, namelen);
8970         }
8971     }
8972     lck_spin_unlock(&vm_allocation_sites_lock);
8973
8974     return (kmodId);
8975 }