osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67 #include <libkern/OSDebug.h>
  68
  69 #include <mach/clock_types.h>
  70 #include <mach/vm_prot.h>
  71 #include <mach/vm_statistics.h>
  72 #include <mach/sdt.h>
  73 #include <kern/counters.h>
  74 #include <kern/sched_prim.h>
  75 #include <kern/policy_internal.h>
  76 #include <kern/task.h>
  77 #include <kern/thread.h>
  78 #include <kern/kalloc.h>
  79 #include <kern/zalloc.h>
  80 #include <kern/xpr.h>
  81 #include <kern/ledger.h>
  82 #include <vm/pmap.h>
  83 #include <vm/vm_init.h>
  84 #include <vm/vm_map.h>
  85 #include <vm/vm_page.h>
  86 #include <vm/vm_pageout.h>
  87 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  88 #include <kern/misc_protos.h>
  89 #include <zone_debug.h>
  90 #include <mach_debug/zone_info.h>
  91 #include <vm/cpm.h>
  92 #include <pexpert/pexpert.h>
  93 #include <san/kasan.h>
  94
  95 #include <vm/vm_protos.h>
  96 #include <vm/memory_object.h>
  97 #include <vm/vm_purgeable_internal.h>
  98 #include <vm/vm_compressor.h>
  99
 100 #if CONFIG_PHANTOM_CACHE
 101 #include <vm/vm_phantom_cache.h>
 102 #endif
 103
 104 #include <IOKit/IOHibernatePrivate.h>
 105
 106 #include <sys/kdebug.h>
 107
 108
 109 char    vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 110 char    vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 111 char    vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 112 char    vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
 113
 114 #if CONFIG_SECLUDED_MEMORY
 115 struct vm_page_secluded_data vm_page_secluded;
 116 #endif /* CONFIG_SECLUDED_MEMORY */
 117
 118 boolean_t       hibernate_cleaning_in_progress = FALSE;
 119 boolean_t       vm_page_free_verify = TRUE;
 120
 121 uint32_t        vm_lopage_free_count = 0;
 122 uint32_t        vm_lopage_free_limit = 0;
 123 uint32_t        vm_lopage_lowater    = 0;
 124 boolean_t       vm_lopage_refill = FALSE;
 125 boolean_t       vm_lopage_needed = FALSE;
 126
 127 lck_mtx_ext_t   vm_page_queue_lock_ext;
 128 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 129 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 130
 131 int             speculative_age_index = 0;
 132 int             speculative_steal_index = 0;
 133 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 134
 135
 136 __private_extern__ void         vm_page_init_lck_grp(void);
 137
 138 static void             vm_page_free_prepare(vm_page_t  page);
 139 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 140
 141 static void vm_tag_init(void);
 142
 143 uint64_t        vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
 144 uint32_t        vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
 145 uint32_t        vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
 146
 147 /*
 148  *      Associated with page of user-allocatable memory is a
 149  *      page structure.
 150  */
 151
 152 /*
 153  *      These variables record the values returned by vm_page_bootstrap,
 154  *      for debugging purposes.  The implementation of pmap_steal_memory
 155  *      and pmap_startup here also uses them internally.
 156  */
 157
 158 vm_offset_t virtual_space_start;
 159 vm_offset_t virtual_space_end;
 160 uint32_t        vm_page_pages;
 161
 162 /*
 163  *      The vm_page_lookup() routine, which provides for fast
 164  *      (virtual memory object, offset) to page lookup, employs
 165  *      the following hash table.  The vm_page_{insert,remove}
 166  *      routines install and remove associations in the table.
 167  *      [This table is often called the virtual-to-physical,
 168  *      or VP, table.]
 169  */
 170 typedef struct {
 171         vm_page_packed_t page_list;
 172 #if     MACH_PAGE_HASH_STATS
 173         int             cur_count;              /* current count */
 174         int             hi_count;               /* high water mark */
 175 #endif /* MACH_PAGE_HASH_STATS */
 176 } vm_page_bucket_t;
 177
 178
 179 #define BUCKETS_PER_LOCK        16
 180
 181 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 182 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 183 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 184 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 185 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 186 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 187
 188 #ifndef VM_TAG_ACTIVE_UPDATE
 189 #error VM_TAG_ACTIVE_UPDATE
 190 #endif
 191 #ifndef VM_MAX_TAG_ZONES
 192 #error VM_MAX_TAG_ZONES
 193 #endif
 194
 195 boolean_t   vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
 196 lck_spin_t      *vm_page_bucket_locks;
 197 lck_spin_t      vm_objects_wired_lock;
 198 lck_spin_t      vm_allocation_sites_lock;
 199
 200 vm_allocation_site_t            vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
 201 vm_allocation_site_t *          vm_allocation_sites[VM_MAX_TAG_VALUE];
 202 #if VM_MAX_TAG_ZONES
 203 vm_allocation_zone_total_t **   vm_allocation_zone_totals;
 204 #endif /* VM_MAX_TAG_ZONES */
 205
 206 vm_tag_t vm_allocation_tag_highest;
 207
 208 #if VM_PAGE_BUCKETS_CHECK
 209 boolean_t vm_page_buckets_check_ready = FALSE;
 210 #if VM_PAGE_FAKE_BUCKETS
 211 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
 212 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
 213 #endif /* VM_PAGE_FAKE_BUCKETS */
 214 #endif /* VM_PAGE_BUCKETS_CHECK */
 215
 216
 217
 218 #if     MACH_PAGE_HASH_STATS
 219 /* This routine is only for debug.  It is intended to be called by
 220  * hand by a developer using a kernel debugger.  This routine prints
 221  * out vm_page_hash table statistics to the kernel debug console.
 222  */
 223 void
 224 hash_debug(void)
 225 {
 226         int     i;
 227         int     numbuckets = 0;
 228         int     highsum = 0;
 229         int     maxdepth = 0;
 230
 231         for (i = 0; i < vm_page_bucket_count; i++) {
 232                 if (vm_page_buckets[i].hi_count) {
 233                         numbuckets++;
 234                         highsum += vm_page_buckets[i].hi_count;
 235                         if (vm_page_buckets[i].hi_count > maxdepth)
 236                                 maxdepth = vm_page_buckets[i].hi_count;
 237                 }
 238         }
 239         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 240         printf("Number used buckets:     %d = %d%%\n",
 241                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 242         printf("Number unused buckets:   %d = %d%%\n",
 243                 vm_page_bucket_count - numbuckets,
 244                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 245         printf("Sum of bucket max depth: %d\n", highsum);
 246         printf("Average bucket depth:    %d.%2d\n",
 247                 highsum/vm_page_bucket_count,
 248                 highsum%vm_page_bucket_count);
 249         printf("Maximum bucket depth:    %d\n", maxdepth);
 250 }
 251 #endif /* MACH_PAGE_HASH_STATS */
 252
 253 /*
 254  *      The virtual page size is currently implemented as a runtime
 255  *      variable, but is constant once initialized using vm_set_page_size.
 256  *      This initialization must be done in the machine-dependent
 257  *      bootstrap sequence, before calling other machine-independent
 258  *      initializations.
 259  *
 260  *      All references to the virtual page size outside this
 261  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 262  *      constants.
 263  */
 264 #if defined(__arm__) || defined(__arm64__)
 265 vm_size_t       page_size;
 266 vm_size_t       page_mask;
 267 int             page_shift;
 268 #else
 269 vm_size_t       page_size  = PAGE_SIZE;
 270 vm_size_t       page_mask  = PAGE_MASK;
 271 int             page_shift = PAGE_SHIFT;
 272 #endif
 273
 274 /*
 275  *      Resident page structures are initialized from
 276  *      a template (see vm_page_alloc).
 277  *
 278  *      When adding a new field to the virtual memory
 279  *      object structure, be sure to add initialization
 280  *      (see vm_page_bootstrap).
 281  */
 282 struct vm_page  vm_page_template;
 283
 284 vm_page_t       vm_pages = VM_PAGE_NULL;
 285 vm_page_t       vm_page_array_beginning_addr;
 286 vm_page_t       vm_page_array_ending_addr;
 287 vm_page_t       vm_page_array_boundary;
 288
 289 unsigned int    vm_pages_count = 0;
 290 ppnum_t         vm_page_lowest = 0;
 291
 292 /*
 293  *      Resident pages that represent real memory
 294  *      are allocated from a set of free lists,
 295  *      one per color.
 296  */
 297 unsigned int    vm_colors;
 298 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 299 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 300 unsigned int    vm_free_magazine_refill_limit = 0;
 301
 302
 303 struct vm_page_queue_free_head {
 304         vm_page_queue_head_t    qhead;
 305 } __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 306
 307 struct vm_page_queue_free_head  vm_page_queue_free[MAX_COLORS];
 308
 309
 310 unsigned int    vm_page_free_wanted;
 311 unsigned int    vm_page_free_wanted_privileged;
 312 #if CONFIG_SECLUDED_MEMORY
 313 unsigned int    vm_page_free_wanted_secluded;
 314 #endif /* CONFIG_SECLUDED_MEMORY */
 315 unsigned int    vm_page_free_count;
 316
 317 /*
 318  *      Occasionally, the virtual memory system uses
 319  *      resident page structures that do not refer to
 320  *      real pages, for example to leave a page with
 321  *      important state information in the VP table.
 322  *
 323  *      These page structures are allocated the way
 324  *      most other kernel structures are.
 325  */
 326 zone_t  vm_page_array_zone;
 327 zone_t  vm_page_zone;
 328 vm_locks_array_t vm_page_locks;
 329 decl_lck_mtx_data(,vm_page_alloc_lock)
 330 lck_mtx_ext_t vm_page_alloc_lock_ext;
 331
 332 unsigned int io_throttle_zero_fill;
 333
 334 unsigned int    vm_page_local_q_count = 0;
 335 unsigned int    vm_page_local_q_soft_limit = 250;
 336 unsigned int    vm_page_local_q_hard_limit = 500;
 337 struct vplq     *vm_page_local_q = NULL;
 338
 339 /* N.B. Guard and fictitious pages must not
 340  * be assigned a zero phys_page value.
 341  */
 342 /*
 343  *      Fictitious pages don't have a physical address,
 344  *      but we must initialize phys_page to something.
 345  *      For debugging, this should be a strange value
 346  *      that the pmap module can recognize in assertions.
 347  */
 348 const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 349
 350 /*
 351  *      Guard pages are not accessible so they don't
 352  *      need a physical address, but we need to enter
 353  *      one in the pmap.
 354  *      Let's make it recognizable and make sure that
 355  *      we don't use a real physical page with that
 356  *      physical address.
 357  */
 358 const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 359
 360 /*
 361  *      Resident page structures are also chained on
 362  *      queues that are used by the page replacement
 363  *      system (pageout daemon).  These queues are
 364  *      defined here, but are shared by the pageout
 365  *      module.  The inactive queue is broken into
 366  *      file backed and anonymous for convenience as the
 367  *      pageout daemon often assignes a higher
 368  *      importance to anonymous pages (less likely to pick)
 369  */
 370 vm_page_queue_head_t    vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 371 vm_page_queue_head_t    vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 372 #if CONFIG_SECLUDED_MEMORY
 373 vm_page_queue_head_t    vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 374 #endif /* CONFIG_SECLUDED_MEMORY */
 375 vm_page_queue_head_t    vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));  /* inactive memory queue for anonymous pages */
 376 vm_page_queue_head_t    vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 377
 378 queue_head_t    vm_objects_wired;
 379
 380 #if CONFIG_BACKGROUND_QUEUE
 381 vm_page_queue_head_t    vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 382 uint32_t        vm_page_background_target;
 383 uint32_t        vm_page_background_count;
 384 uint64_t        vm_page_background_promoted_count;
 385
 386 uint32_t        vm_page_background_internal_count;
 387 uint32_t        vm_page_background_external_count;
 388
 389 uint32_t        vm_page_background_mode;
 390 uint32_t        vm_page_background_exclude_external;
 391 #endif
 392
 393 unsigned int    vm_page_active_count;
 394 unsigned int    vm_page_inactive_count;
 395 #if CONFIG_SECLUDED_MEMORY
 396 unsigned int    vm_page_secluded_count;
 397 unsigned int    vm_page_secluded_count_free;
 398 unsigned int    vm_page_secluded_count_inuse;
 399 #endif /* CONFIG_SECLUDED_MEMORY */
 400 unsigned int    vm_page_anonymous_count;
 401 unsigned int    vm_page_throttled_count;
 402 unsigned int    vm_page_speculative_count;
 403
 404 unsigned int    vm_page_wire_count;
 405 unsigned int    vm_page_wire_count_on_boot = 0;
 406 unsigned int    vm_page_stolen_count;
 407 unsigned int    vm_page_wire_count_initial;
 408 unsigned int    vm_page_pages_initial;
 409 unsigned int    vm_page_gobble_count = 0;
 410
 411 #define VM_PAGE_WIRE_COUNT_WARNING      0
 412 #define VM_PAGE_GOBBLE_COUNT_WARNING    0
 413
 414 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 415 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 416 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 417
 418 unsigned int    vm_page_xpmapped_external_count = 0;
 419 unsigned int    vm_page_external_count = 0;
 420 unsigned int    vm_page_internal_count = 0;
 421 unsigned int    vm_page_pageable_external_count = 0;
 422 unsigned int    vm_page_pageable_internal_count = 0;
 423
 424 #if DEVELOPMENT || DEBUG
 425 unsigned int    vm_page_speculative_recreated = 0;
 426 unsigned int    vm_page_speculative_created = 0;
 427 unsigned int    vm_page_speculative_used = 0;
 428 #endif
 429
 430 vm_page_queue_head_t    vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 431
 432 unsigned int    vm_page_cleaned_count = 0;
 433 unsigned int    vm_pageout_enqueued_cleaned = 0;
 434
 435 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 436 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 437
 438
 439 /*
 440  *      Several page replacement parameters are also
 441  *      shared with this module, so that page allocation
 442  *      (done here in vm_page_alloc) can trigger the
 443  *      pageout daemon.
 444  */
 445 unsigned int    vm_page_free_target = 0;
 446 unsigned int    vm_page_free_min = 0;
 447 unsigned int    vm_page_throttle_limit = 0;
 448 unsigned int    vm_page_inactive_target = 0;
 449 #if CONFIG_SECLUDED_MEMORY
 450 unsigned int    vm_page_secluded_target = 0;
 451 #endif /* CONFIG_SECLUDED_MEMORY */
 452 unsigned int    vm_page_anonymous_min = 0;
 453 unsigned int    vm_page_inactive_min = 0;
 454 unsigned int    vm_page_free_reserved = 0;
 455 unsigned int    vm_page_throttle_count = 0;
 456
 457
 458 /*
 459  *      The VM system has a couple of heuristics for deciding
 460  *      that pages are "uninteresting" and should be placed
 461  *      on the inactive queue as likely candidates for replacement.
 462  *      These variables let the heuristics be controlled at run-time
 463  *      to make experimentation easier.
 464  */
 465
 466 boolean_t vm_page_deactivate_hint = TRUE;
 467
 468 struct vm_page_stats_reusable vm_page_stats_reusable;
 469
 470 /*
 471  *      vm_set_page_size:
 472  *
 473  *      Sets the page size, perhaps based upon the memory
 474  *      size.  Must be called before any use of page-size
 475  *      dependent functions.
 476  *
 477  *      Sets page_shift and page_mask from page_size.
 478  */
 479 void
 480 vm_set_page_size(void)
 481 {
 482         page_size  = PAGE_SIZE;
 483         page_mask  = PAGE_MASK;
 484         page_shift = PAGE_SHIFT;
 485
 486         if ((page_mask & page_size) != 0)
 487                 panic("vm_set_page_size: page size not a power of two");
 488
 489         for (page_shift = 0; ; page_shift++)
 490                 if ((1U << page_shift) == page_size)
 491                         break;
 492 }
 493
 494 #if defined (__x86_64__)
 495
 496 #define MAX_CLUMP_SIZE      16
 497 #define DEFAULT_CLUMP_SIZE  4
 498
 499 unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
 500
 501 #if DEVELOPMENT || DEBUG
 502 unsigned long vm_clump_stats[MAX_CLUMP_SIZE+1];
 503 unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
 504
 505 static inline void vm_clump_update_stats(unsigned int c) {
 506     assert(c<=vm_clump_size);
 507     if(c>0 && c<=vm_clump_size) vm_clump_stats[c]+=c;
 508     vm_clump_allocs+=c;
 509 }
 510 #endif  /*  if DEVELOPMENT || DEBUG */
 511
 512 /* Called once to setup the VM clump knobs */
 513 static void
 514 vm_page_setup_clump( void )
 515 {
 516     unsigned int override, n;
 517
 518     vm_clump_size = DEFAULT_CLUMP_SIZE;
 519     if ( PE_parse_boot_argn("clump_size", &override, sizeof (override)) ) vm_clump_size = override;
 520
 521     if(vm_clump_size > MAX_CLUMP_SIZE) panic("vm_page_setup_clump:: clump_size is too large!");
 522     if(vm_clump_size < 1) panic("vm_page_setup_clump:: clump_size must be >= 1");
 523     if((vm_clump_size & (vm_clump_size-1)) != 0)  panic("vm_page_setup_clump:: clump_size must be a power of 2");
 524
 525     vm_clump_promote_threshold = vm_clump_size;
 526     vm_clump_mask = vm_clump_size - 1;
 527     for(vm_clump_shift=0, n=vm_clump_size; n>1; n>>=1, vm_clump_shift++);
 528
 529 #if DEVELOPMENT || DEBUG
 530     bzero(vm_clump_stats, sizeof(vm_clump_stats));
 531     vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
 532 #endif  /*  if DEVELOPMENT || DEBUG */
 533 }
 534
 535 #endif  /* #if defined (__x86_64__) */
 536
 537 #define COLOR_GROUPS_TO_STEAL   4
 538
 539 /* Called once during statup, once the cache geometry is known.
 540  */
 541 static void
 542 vm_page_set_colors( void )
 543 {
 544         unsigned int    n, override;
 545
 546 #if defined (__x86_64__)
 547         /* adjust #colors because we need to color outside the clump boundary */
 548         vm_cache_geometry_colors >>= vm_clump_shift;
 549 #endif
 550         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 551                 n = override;
 552         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 553                 n = vm_cache_geometry_colors;
 554         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 555
 556         if ( n == 0 )
 557                 n = 1;
 558         if ( n > MAX_COLORS )
 559                 n = MAX_COLORS;
 560
 561         /* the count must be a power of 2  */
 562         if ( ( n & (n - 1)) != 0  )
 563                 n = DEFAULT_COLORS;                             /* use default if all else fails */
 564
 565         vm_colors = n;
 566         vm_color_mask = n - 1;
 567
 568         vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
 569
 570 #if defined (__x86_64__)
 571         /* adjust for reduction in colors due to clumping and multiple cores */
 572         if (real_ncpus)
 573                 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
 574 #endif
 575 }
 576
 577
 578 lck_grp_t               vm_page_lck_grp_free;
 579 lck_grp_t               vm_page_lck_grp_queue;
 580 lck_grp_t               vm_page_lck_grp_local;
 581 lck_grp_t               vm_page_lck_grp_purge;
 582 lck_grp_t               vm_page_lck_grp_alloc;
 583 lck_grp_t               vm_page_lck_grp_bucket;
 584 lck_grp_attr_t          vm_page_lck_grp_attr;
 585 lck_attr_t              vm_page_lck_attr;
 586
 587
 588 __private_extern__ void
 589 vm_page_init_lck_grp(void)
 590 {
 591         /*
 592          * initialze the vm_page lock world
 593          */
 594         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 595         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 596         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 597         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 598         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 599         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 600         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 601         lck_attr_setdefault(&vm_page_lck_attr);
 602         lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 603
 604         vm_compressor_init_locks();
 605 }
 606
 607 void
 608 vm_page_init_local_q()
 609 {
 610         unsigned int            num_cpus;
 611         unsigned int            i;
 612         struct vplq             *t_local_q;
 613
 614         num_cpus = ml_get_max_cpus();
 615
 616         /*
 617          * no point in this for a uni-processor system
 618          */
 619         if (num_cpus >= 2) {
 620 #if KASAN
 621                 /* KASAN breaks the expectation of a size-aligned object by adding a
 622                  * rezone, so explicitly align. */
 623                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq) + VM_PACKED_POINTER_ALIGNMENT);
 624                 t_local_q = (void *)(((uintptr_t)t_local_q + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT-1));
 625 #else
 626                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 627 #endif
 628
 629                 for (i = 0; i < num_cpus; i++) {
 630                         struct vpl      *lq;
 631
 632                         lq = &t_local_q[i].vpl_un.vpl;
 633                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 634                         vm_page_queue_init(&lq->vpl_queue);
 635                         lq->vpl_count = 0;
 636                         lq->vpl_internal_count = 0;
 637                         lq->vpl_external_count = 0;
 638                 }
 639                 vm_page_local_q_count = num_cpus;
 640
 641                 vm_page_local_q = (struct vplq *)t_local_q;
 642         }
 643 }
 644
 645 /*
 646  * vm_init_before_launchd
 647  *
 648  * This should be called right before launchd is loaded.
 649  */
 650 void
 651 vm_init_before_launchd()
 652 {
 653         vm_page_wire_count_on_boot = vm_page_wire_count;
 654 }
 655
 656
 657 /*
 658  *      vm_page_bootstrap:
 659  *
 660  *      Initializes the resident memory module.
 661  *
 662  *      Allocates memory for the page cells, and
 663  *      for the object/offset-to-page hash table headers.
 664  *      Each page cell is initialized and placed on the free list.
 665  *      Returns the range of available kernel virtual memory.
 666  */
 667
 668 void
 669 vm_page_bootstrap(
 670         vm_offset_t             *startp,
 671         vm_offset_t             *endp)
 672 {
 673         vm_page_t               m;
 674         unsigned int            i;
 675         unsigned int            log1;
 676         unsigned int            log2;
 677         unsigned int            size;
 678
 679         /*
 680          *      Initialize the vm_page template.
 681          */
 682
 683         m = &vm_page_template;
 684         bzero(m, sizeof (*m));
 685
 686 #if CONFIG_BACKGROUND_QUEUE
 687         m->vm_page_backgroundq.next = 0;
 688         m->vm_page_backgroundq.prev = 0;
 689         m->vm_page_in_background = FALSE;
 690         m->vm_page_on_backgroundq = FALSE;
 691 #endif
 692
 693         VM_PAGE_ZERO_PAGEQ_ENTRY(m);
 694         m->listq.next = 0;
 695         m->listq.prev = 0;
 696         m->next_m = 0;
 697
 698         m->vm_page_object = 0;                  /* reset later */
 699         m->offset = (vm_object_offset_t) -1;    /* reset later */
 700
 701         m->wire_count = 0;
 702         m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
 703         m->laundry = FALSE;
 704         m->reference = FALSE;
 705         m->gobbled = FALSE;
 706         m->private = FALSE;
 707         m->__unused_pageq_bits = 0;
 708
 709 #if    !defined(__arm__) && !defined(__arm64__)
 710         VM_PAGE_SET_PHYS_PAGE(m, 0);            /* reset later */
 711 #endif
 712         m->busy = TRUE;
 713         m->wanted = FALSE;
 714         m->tabled = FALSE;
 715         m->hashed = FALSE;
 716         m->fictitious = FALSE;
 717         m->pmapped = FALSE;
 718         m->wpmapped = FALSE;
 719         m->free_when_done = FALSE;
 720         m->absent = FALSE;
 721         m->error = FALSE;
 722         m->dirty = FALSE;
 723         m->cleaning = FALSE;
 724         m->precious = FALSE;
 725         m->clustered = FALSE;
 726         m->overwriting = FALSE;
 727         m->restart = FALSE;
 728         m->unusual = FALSE;
 729         m->cs_validated = FALSE;
 730         m->cs_tainted = FALSE;
 731         m->cs_nx = FALSE;
 732         m->no_cache = FALSE;
 733         m->reusable = FALSE;
 734         m->slid = FALSE;
 735         m->xpmapped = FALSE;
 736         m->written_by_kernel = FALSE;
 737         m->__unused_object_bits = 0;
 738
 739         /*
 740          *      Initialize the page queues.
 741          */
 742         vm_page_init_lck_grp();
 743
 744         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 745         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 746         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 747
 748         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 749                 int group;
 750
 751                 purgeable_queues[i].token_q_head = 0;
 752                 purgeable_queues[i].token_q_tail = 0;
 753                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 754                         queue_init(&purgeable_queues[i].objq[group]);
 755
 756                 purgeable_queues[i].type = i;
 757                 purgeable_queues[i].new_pages = 0;
 758 #if MACH_ASSERT
 759                 purgeable_queues[i].debug_count_tokens = 0;
 760                 purgeable_queues[i].debug_count_objects = 0;
 761 #endif
 762         };
 763         purgeable_nonvolatile_count = 0;
 764         queue_init(&purgeable_nonvolatile_queue);
 765
 766         for (i = 0; i < MAX_COLORS; i++ )
 767                 vm_page_queue_init(&vm_page_queue_free[i].qhead);
 768
 769         vm_page_queue_init(&vm_lopage_queue_free);
 770         vm_page_queue_init(&vm_page_queue_active);
 771         vm_page_queue_init(&vm_page_queue_inactive);
 772 #if CONFIG_SECLUDED_MEMORY
 773         vm_page_queue_init(&vm_page_queue_secluded);
 774 #endif /* CONFIG_SECLUDED_MEMORY */
 775         vm_page_queue_init(&vm_page_queue_cleaned);
 776         vm_page_queue_init(&vm_page_queue_throttled);
 777         vm_page_queue_init(&vm_page_queue_anonymous);
 778         queue_init(&vm_objects_wired);
 779
 780         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 781                 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
 782
 783                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 784                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 785         }
 786 #if CONFIG_BACKGROUND_QUEUE
 787         vm_page_queue_init(&vm_page_queue_background);
 788
 789         vm_page_background_count = 0;
 790         vm_page_background_internal_count = 0;
 791         vm_page_background_external_count = 0;
 792         vm_page_background_promoted_count = 0;
 793
 794         vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
 795
 796         if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
 797                 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
 798
 799         vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
 800         vm_page_background_exclude_external = 0;
 801
 802         PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
 803         PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
 804         PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
 805
 806         if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1)
 807                 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
 808 #endif
 809         vm_page_free_wanted = 0;
 810         vm_page_free_wanted_privileged = 0;
 811 #if CONFIG_SECLUDED_MEMORY
 812         vm_page_free_wanted_secluded = 0;
 813 #endif /* CONFIG_SECLUDED_MEMORY */
 814
 815 #if defined (__x86_64__)
 816         /* this must be called before vm_page_set_colors() */
 817         vm_page_setup_clump();
 818 #endif
 819
 820         vm_page_set_colors();
 821
 822         bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
 823         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
 824         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
 825         vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
 826
 827         bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
 828         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
 829         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
 830         vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
 831         vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
 832         vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
 833         vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
 834 #if CONFIG_SECLUDED_MEMORY
 835         vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
 836 #endif /* CONFIG_SECLUDED_MEMORY */
 837
 838         bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
 839         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
 840         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
 841         vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
 842         vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
 843         vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
 844 #if CONFIG_SECLUDED_MEMORY
 845         vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
 846 #endif /* CONFIG_SECLUDED_MEMORY */
 847
 848         bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
 849         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
 850         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
 851         vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
 852         vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
 853 #if CONFIG_SECLUDED_MEMORY
 854         vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
 855 #endif /* CONFIG_SECLUDED_MEMORY */
 856
 857         for (i = 0; i < VM_KERN_MEMORY_FIRST_DYNAMIC; i++)
 858         {
 859                 vm_allocation_sites_static[i].refcount = 2;
 860                 vm_allocation_sites_static[i].tag = i;
 861                 vm_allocation_sites[i] = &vm_allocation_sites_static[i];
 862         }
 863         vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
 864         vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
 865         vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
 866
 867         /*
 868          *      Steal memory for the map and zone subsystems.
 869          */
 870 #if CONFIG_GZALLOC
 871         gzalloc_configure();
 872 #endif
 873         kernel_debug_string_early("vm_map_steal_memory");
 874         vm_map_steal_memory();
 875
 876         /*
 877          *      Allocate (and initialize) the virtual-to-physical
 878          *      table hash buckets.
 879          *
 880          *      The number of buckets should be a power of two to
 881          *      get a good hash function.  The following computation
 882          *      chooses the first power of two that is greater
 883          *      than the number of physical pages in the system.
 884          */
 885
 886         if (vm_page_bucket_count == 0) {
 887                 unsigned int npages = pmap_free_pages();
 888
 889                 vm_page_bucket_count = 1;
 890                 while (vm_page_bucket_count < npages)
 891                         vm_page_bucket_count <<= 1;
 892         }
 893         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 894
 895         vm_page_hash_mask = vm_page_bucket_count - 1;
 896
 897         /*
 898          *      Calculate object shift value for hashing algorithm:
 899          *              O = log2(sizeof(struct vm_object))
 900          *              B = log2(vm_page_bucket_count)
 901          *              hash shifts the object left by
 902          *              B/2 - O
 903          */
 904         size = vm_page_bucket_count;
 905         for (log1 = 0; size > 1; log1++)
 906                 size /= 2;
 907         size = sizeof(struct vm_object);
 908         for (log2 = 0; size > 1; log2++)
 909                 size /= 2;
 910         vm_page_hash_shift = log1/2 - log2 + 1;
 911
 912         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 913         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 914         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 915
 916         if (vm_page_hash_mask & vm_page_bucket_count)
 917                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 918
 919 #if VM_PAGE_BUCKETS_CHECK
 920 #if VM_PAGE_FAKE_BUCKETS
 921         /*
 922          * Allocate a decoy set of page buckets, to detect
 923          * any stomping there.
 924          */
 925         vm_page_fake_buckets = (vm_page_bucket_t *)
 926                 pmap_steal_memory(vm_page_bucket_count *
 927                                   sizeof(vm_page_bucket_t));
 928         vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
 929         vm_page_fake_buckets_end =
 930                 vm_map_round_page((vm_page_fake_buckets_start +
 931                                    (vm_page_bucket_count *
 932                                     sizeof (vm_page_bucket_t))),
 933                                   PAGE_MASK);
 934         char *cp;
 935         for (cp = (char *)vm_page_fake_buckets_start;
 936              cp < (char *)vm_page_fake_buckets_end;
 937              cp++) {
 938                 *cp = 0x5a;
 939         }
 940 #endif /* VM_PAGE_FAKE_BUCKETS */
 941 #endif /* VM_PAGE_BUCKETS_CHECK */
 942
 943         kernel_debug_string_early("vm_page_buckets");
 944         vm_page_buckets = (vm_page_bucket_t *)
 945                 pmap_steal_memory(vm_page_bucket_count *
 946                                   sizeof(vm_page_bucket_t));
 947
 948         kernel_debug_string_early("vm_page_bucket_locks");
 949         vm_page_bucket_locks = (lck_spin_t *)
 950                 pmap_steal_memory(vm_page_bucket_lock_count *
 951                                   sizeof(lck_spin_t));
 952
 953         for (i = 0; i < vm_page_bucket_count; i++) {
 954                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
 955
 956                 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
 957 #if     MACH_PAGE_HASH_STATS
 958                 bucket->cur_count = 0;
 959                 bucket->hi_count = 0;
 960 #endif /* MACH_PAGE_HASH_STATS */
 961         }
 962
 963         for (i = 0; i < vm_page_bucket_lock_count; i++)
 964                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 965
 966         lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 967         lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 968         vm_tag_init();
 969
 970 #if VM_PAGE_BUCKETS_CHECK
 971         vm_page_buckets_check_ready = TRUE;
 972 #endif /* VM_PAGE_BUCKETS_CHECK */
 973
 974         /*
 975          *      Machine-dependent code allocates the resident page table.
 976          *      It uses vm_page_init to initialize the page frames.
 977          *      The code also returns to us the virtual space available
 978          *      to the kernel.  We don't trust the pmap module
 979          *      to get the alignment right.
 980          */
 981
 982         kernel_debug_string_early("pmap_startup");
 983         pmap_startup(&virtual_space_start, &virtual_space_end);
 984         virtual_space_start = round_page(virtual_space_start);
 985         virtual_space_end = trunc_page(virtual_space_end);
 986
 987         *startp = virtual_space_start;
 988         *endp = virtual_space_end;
 989
 990         /*
 991          *      Compute the initial "wire" count.
 992          *      Up until now, the pages which have been set aside are not under
 993          *      the VM system's control, so although they aren't explicitly
 994          *      wired, they nonetheless can't be moved. At this moment,
 995          *      all VM managed pages are "free", courtesy of pmap_startup.
 996          */
 997         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 998         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 999 #if CONFIG_SECLUDED_MEMORY
1000         vm_page_wire_count -= vm_page_secluded_count;
1001 #endif
1002         vm_page_wire_count_initial = vm_page_wire_count;
1003         vm_page_pages_initial = vm_page_pages;
1004
1005         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
1006                vm_page_free_count, vm_page_wire_count);
1007
1008         kernel_debug_string_early("vm_page_bootstrap complete");
1009         simple_lock_init(&vm_paging_lock, 0);
1010 }
1011
1012 #ifndef MACHINE_PAGES
1013 /*
1014  *      We implement pmap_steal_memory and pmap_startup with the help
1015  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
1016  */
1017
1018 void *
1019 pmap_steal_memory(
1020         vm_size_t size)
1021 {
1022         kern_return_t kr;
1023         vm_offset_t addr, vaddr;
1024         ppnum_t phys_page;
1025
1026         /*
1027          *      We round the size to a round multiple.
1028          */
1029
1030         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
1031
1032         /*
1033          *      If this is the first call to pmap_steal_memory,
1034          *      we have to initialize ourself.
1035          */
1036
1037         if (virtual_space_start == virtual_space_end) {
1038                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
1039
1040                 /*
1041                  *      The initial values must be aligned properly, and
1042                  *      we don't trust the pmap module to do it right.
1043                  */
1044
1045                 virtual_space_start = round_page(virtual_space_start);
1046                 virtual_space_end = trunc_page(virtual_space_end);
1047         }
1048
1049         /*
1050          *      Allocate virtual memory for this request.
1051          */
1052
1053         addr = virtual_space_start;
1054         virtual_space_start += size;
1055
1056         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1057
1058         /*
1059          *      Allocate and map physical pages to back new virtual pages.
1060          */
1061
1062         for (vaddr = round_page(addr);
1063              vaddr < addr + size;
1064              vaddr += PAGE_SIZE) {
1065
1066                 if (!pmap_next_page_hi(&phys_page))
1067                         panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1068
1069                 /*
1070                  *      XXX Logically, these mappings should be wired,
1071                  *      but some pmap modules barf if they are.
1072                  */
1073 #if defined(__LP64__)
1074 #ifdef  __arm64__
1075                 /* ARM64_TODO: verify that we really don't need this */
1076 #else
1077                 pmap_pre_expand(kernel_pmap, vaddr);
1078 #endif
1079 #endif
1080
1081                 kr = pmap_enter(kernel_pmap, vaddr, phys_page,
1082                                 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
1083                                 VM_WIMG_USE_DEFAULT, FALSE);
1084
1085                 if (kr != KERN_SUCCESS) {
1086                         panic("pmap_steal_memory() pmap_enter failed, vaddr=%#lx, phys_page=%u",
1087                               (unsigned long)vaddr, phys_page);
1088                 }
1089
1090                 /*
1091                  * Account for newly stolen memory
1092                  */
1093                 vm_page_wire_count++;
1094                 vm_page_stolen_count++;
1095         }
1096
1097 #if KASAN
1098         kasan_notify_address(round_page(addr), size);
1099 #endif
1100         return (void *) addr;
1101 }
1102
1103 #if CONFIG_SECLUDED_MEMORY
1104 /* boot-args to control secluded memory */
1105 unsigned int secluded_mem_mb = 0;       /* # of MBs of RAM to seclude */
1106 int secluded_for_iokit = 1;             /* IOKit can use secluded memory */
1107 int secluded_for_apps = 1;              /* apps can use secluded memory */
1108 int secluded_for_filecache = 2;         /* filecache can use seclude memory */
1109 #if 11
1110 int secluded_for_fbdp = 0;
1111 #endif
1112 #endif /* CONFIG_SECLUDED_MEMORY */
1113
1114
1115 #if defined(__arm__) || defined(__arm64__)
1116 extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1117 unsigned int vm_first_phys_ppnum = 0;
1118 #endif
1119
1120
1121 void vm_page_release_startup(vm_page_t mem);
1122 void
1123 pmap_startup(
1124         vm_offset_t *startp,
1125         vm_offset_t *endp)
1126 {
1127         unsigned int i, npages, pages_initialized, fill, fillval;
1128         ppnum_t         phys_page;
1129         addr64_t        tmpaddr;
1130
1131 #if    defined(__LP64__)
1132         /*
1133          * make sure we are aligned on a 64 byte boundary
1134          * for VM_PAGE_PACK_PTR (it clips off the low-order
1135          * 6 bits of the pointer)
1136          */
1137         if (virtual_space_start != virtual_space_end)
1138                 virtual_space_start = round_page(virtual_space_start);
1139 #endif
1140
1141         /*
1142          *      We calculate how many page frames we will have
1143          *      and then allocate the page structures in one chunk.
1144          */
1145
1146         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
1147         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
1148         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1149
1150         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1151
1152         /*
1153          *      Initialize the page frames.
1154          */
1155         kernel_debug_string_early("Initialize the page frames");
1156
1157         vm_page_array_beginning_addr = &vm_pages[0];
1158         vm_page_array_ending_addr = &vm_pages[npages];
1159
1160         for (i = 0, pages_initialized = 0; i < npages; i++) {
1161                 if (!pmap_next_page(&phys_page))
1162                         break;
1163 #if defined(__arm__) || defined(__arm64__)
1164                 if (pages_initialized == 0) {
1165                         vm_first_phys_ppnum = phys_page;
1166                         patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr, (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1167                 }
1168                 assert((i + vm_first_phys_ppnum) == phys_page);
1169 #endif
1170                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1171                         vm_page_lowest = phys_page;
1172
1173                 vm_page_init(&vm_pages[i], phys_page, FALSE);
1174                 vm_page_pages++;
1175                 pages_initialized++;
1176         }
1177         vm_pages_count = pages_initialized;
1178         vm_page_array_boundary = &vm_pages[pages_initialized];
1179
1180 #if    defined(__LP64__)
1181
1182         if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
1183                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1184
1185         if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
1186                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1187 #endif
1188         kernel_debug_string_early("page fill/release");
1189         /*
1190          * Check if we want to initialize pages to a known value
1191          */
1192         fill = 0;                                                               /* Assume no fill */
1193         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
1194 #if     DEBUG
1195         /* This slows down booting the DEBUG kernel, particularly on
1196          * large memory systems, but is worthwhile in deterministically
1197          * trapping uninitialized memory usage.
1198          */
1199         if (fill == 0) {
1200                 fill = 1;
1201                 fillval = 0xDEB8F177;
1202         }
1203 #endif
1204         if (fill)
1205                 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
1206
1207 #if CONFIG_SECLUDED_MEMORY
1208         /* default: no secluded mem */
1209         secluded_mem_mb = 0;
1210         if (max_mem > 1*1024*1024*1024) {
1211                 /* default to 90MB for devices with > 1GB of RAM */
1212                 secluded_mem_mb = 90;
1213         }
1214         /* override with value from device tree, if provided */
1215         PE_get_default("kern.secluded_mem_mb",
1216                        &secluded_mem_mb, sizeof(secluded_mem_mb));
1217         /* override with value from boot-args, if provided */
1218         PE_parse_boot_argn("secluded_mem_mb",
1219                            &secluded_mem_mb,
1220                            sizeof (secluded_mem_mb));
1221
1222         vm_page_secluded_target = (unsigned int)
1223                 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1224         PE_parse_boot_argn("secluded_for_iokit",
1225                            &secluded_for_iokit,
1226                            sizeof (secluded_for_iokit));
1227         PE_parse_boot_argn("secluded_for_apps",
1228                            &secluded_for_apps,
1229                            sizeof (secluded_for_apps));
1230         PE_parse_boot_argn("secluded_for_filecache",
1231                            &secluded_for_filecache,
1232                            sizeof (secluded_for_filecache));
1233 #if 11
1234         PE_parse_boot_argn("secluded_for_fbdp",
1235                            &secluded_for_fbdp,
1236                            sizeof (secluded_for_fbdp));
1237 #endif
1238 #endif /* CONFIG_SECLUDED_MEMORY */
1239
1240         // -debug code remove
1241         if (2 == vm_himemory_mode) {
1242                 // free low -> high so high is preferred
1243                 for (i = 1; i <= pages_initialized; i++) {
1244                         if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval);            /* Fill the page with a know value if requested at boot */
1245                         vm_page_release_startup(&vm_pages[i - 1]);
1246                 }
1247         }
1248         else
1249         // debug code remove-
1250
1251         /*
1252          * Release pages in reverse order so that physical pages
1253          * initially get allocated in ascending addresses. This keeps
1254          * the devices (which must address physical memory) happy if
1255          * they require several consecutive pages.
1256          */
1257         for (i = pages_initialized; i > 0; i--) {
1258                 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval);            /* Fill the page with a know value if requested at boot */
1259                 vm_page_release_startup(&vm_pages[i - 1]);
1260         }
1261
1262         VM_CHECK_MEMORYSTATUS;
1263
1264 #if 0
1265         {
1266                 vm_page_t xx, xxo, xxl;
1267                 int i, j, k, l;
1268
1269                 j = 0;                                                                                                  /* (BRINGUP) */
1270                 xxl = 0;
1271
1272                 for( i = 0; i < vm_colors; i++ ) {
1273                         queue_iterate(&vm_page_queue_free[i].qhead,
1274                                       xx,
1275                                       vm_page_t,
1276                                       pageq) {  /* BRINGUP */
1277                                 j++;                                                                                            /* (BRINGUP) */
1278                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
1279                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
1280                                 }
1281
1282                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
1283                                 k = 0;                                                                                          /* (BRINGUP) */
1284
1285                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1286
1287                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
1288                                         k++;
1289                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1290                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
1291                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1292                                         }
1293                                 }
1294
1295                                 xxl = xx;
1296                         }
1297                 }
1298
1299                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
1300                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1301                 }
1302         }
1303 #endif
1304
1305
1306         /*
1307          *      We have to re-align virtual_space_start,
1308          *      because pmap_steal_memory has been using it.
1309          */
1310
1311         virtual_space_start = round_page(virtual_space_start);
1312
1313         *startp = virtual_space_start;
1314         *endp = virtual_space_end;
1315 }
1316 #endif  /* MACHINE_PAGES */
1317
1318 /*
1319  *      Routine:        vm_page_module_init
1320  *      Purpose:
1321  *              Second initialization pass, to be done after
1322  *              the basic VM system is ready.
1323  */
1324 void
1325 vm_page_module_init(void)
1326 {
1327         uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1328         vm_size_t       vm_page_with_ppnum_size;
1329
1330         vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1331                              0, PAGE_SIZE, "vm pages array");
1332
1333         zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1334         zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1335         zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1336         zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1337         zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
1338         /*
1339          * Adjust zone statistics to account for the real pages allocated
1340          * in vm_page_create(). [Q: is this really what we want?]
1341          */
1342         vm_page_array_zone->count += vm_page_pages;
1343         vm_page_array_zone->sum_count += vm_page_pages;
1344         vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1345         vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1346         vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1347         OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
1348         /* since zone accounts for these, take them out of stolen */
1349         VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1350
1351         vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1352
1353         vm_page_zone = zinit(vm_page_with_ppnum_size,
1354                              0, PAGE_SIZE, "vm pages");
1355
1356         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1357         zone_change(vm_page_zone, Z_EXPAND, FALSE);
1358         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1359         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1360         zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1361         zone_change(vm_page_zone, Z_ALIGNMENT_REQUIRED, TRUE);
1362 }
1363
1364 /*
1365  *      Routine:        vm_page_create
1366  *      Purpose:
1367  *              After the VM system is up, machine-dependent code
1368  *              may stumble across more physical memory.  For example,
1369  *              memory that it was reserving for a frame buffer.
1370  *              vm_page_create turns this memory into available pages.
1371  */
1372
1373 void
1374 vm_page_create(
1375         ppnum_t start,
1376         ppnum_t end)
1377 {
1378         ppnum_t         phys_page;
1379         vm_page_t       m;
1380
1381         for (phys_page = start;
1382              phys_page < end;
1383              phys_page++) {
1384                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1385                         == VM_PAGE_NULL)
1386                         vm_page_more_fictitious();
1387
1388                 m->fictitious = FALSE;
1389                 pmap_clear_noencrypt(phys_page);
1390
1391                 vm_page_pages++;
1392                 vm_page_release(m, FALSE);
1393         }
1394 }
1395
1396 /*
1397  *      vm_page_hash:
1398  *
1399  *      Distributes the object/offset key pair among hash buckets.
1400  *
1401  *      NOTE:   The bucket count must be a power of 2
1402  */
1403 #define vm_page_hash(object, offset) (\
1404         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1405          & vm_page_hash_mask)
1406
1407
1408 /*
1409  *      vm_page_insert:         [ internal use only ]
1410  *
1411  *      Inserts the given mem entry into the object/object-page
1412  *      table and object list.
1413  *
1414  *      The object must be locked.
1415  */
1416 void
1417 vm_page_insert(
1418         vm_page_t               mem,
1419         vm_object_t             object,
1420         vm_object_offset_t      offset)
1421 {
1422         vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1423 }
1424
1425 void
1426 vm_page_insert_wired(
1427         vm_page_t               mem,
1428         vm_object_t             object,
1429         vm_object_offset_t      offset,
1430         vm_tag_t                tag)
1431 {
1432         vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1433 }
1434
1435 void
1436 vm_page_insert_internal(
1437         vm_page_t               mem,
1438         vm_object_t             object,
1439         vm_object_offset_t      offset,
1440         vm_tag_t                tag,
1441         boolean_t               queues_lock_held,
1442         boolean_t               insert_in_hash,
1443         boolean_t               batch_pmap_op,
1444         boolean_t               batch_accounting,
1445         uint64_t                *delayed_ledger_update)
1446 {
1447         vm_page_bucket_t        *bucket;
1448         lck_spin_t              *bucket_lock;
1449         int                     hash_id;
1450         task_t                  owner;
1451
1452         XPR(XPR_VM_PAGE,
1453                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1454                 object, offset, mem, 0,0);
1455 #if 0
1456         /*
1457          * we may not hold the page queue lock
1458          * so this check isn't safe to make
1459          */
1460         VM_PAGE_CHECK(mem);
1461 #endif
1462
1463         assert(page_aligned(offset));
1464
1465         assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1466
1467         /* the vm_submap_object is only a placeholder for submaps */
1468         assert(object != vm_submap_object);
1469
1470         vm_object_lock_assert_exclusive(object);
1471         LCK_MTX_ASSERT(&vm_page_queue_lock,
1472                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1473                                         : LCK_MTX_ASSERT_NOTOWNED);
1474
1475         if (queues_lock_held == FALSE)
1476                 assert(!VM_PAGE_PAGEABLE(mem));
1477
1478         if (insert_in_hash == TRUE) {
1479 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1480                 if (mem->tabled || mem->vm_page_object)
1481                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1482                               "already in (obj=%p,off=0x%llx)",
1483                               mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1484 #endif
1485                 if (object->internal && (offset >= object->vo_size)) {
1486                         panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1487                               mem, object, offset, object->vo_size);
1488                 }
1489
1490                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1491
1492                 /*
1493                  *      Record the object/offset pair in this page
1494                  */
1495
1496                 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1497                 mem->offset = offset;
1498
1499 #if CONFIG_SECLUDED_MEMORY
1500                 if (object->eligible_for_secluded) {
1501                         vm_page_secluded.eligible_for_secluded++;
1502                 }
1503 #endif /* CONFIG_SECLUDED_MEMORY */
1504
1505                 /*
1506                  *      Insert it into the object_object/offset hash table
1507                  */
1508                 hash_id = vm_page_hash(object, offset);
1509                 bucket = &vm_page_buckets[hash_id];
1510                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1511
1512                 lck_spin_lock(bucket_lock);
1513
1514                 mem->next_m = bucket->page_list;
1515                 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1516                 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1517
1518 #if     MACH_PAGE_HASH_STATS
1519                 if (++bucket->cur_count > bucket->hi_count)
1520                         bucket->hi_count = bucket->cur_count;
1521 #endif /* MACH_PAGE_HASH_STATS */
1522                 mem->hashed = TRUE;
1523                 lck_spin_unlock(bucket_lock);
1524         }
1525
1526         {
1527                 unsigned int    cache_attr;
1528
1529                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1530
1531                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1532                         PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1533                 }
1534         }
1535         /*
1536          *      Now link into the object's list of backed pages.
1537          */
1538         vm_page_queue_enter(&object->memq, mem, vm_page_t, listq);
1539         object->memq_hint = mem;
1540         mem->tabled = TRUE;
1541
1542         /*
1543          *      Show that the object has one more resident page.
1544          */
1545
1546         object->resident_page_count++;
1547         if (VM_PAGE_WIRED(mem)) {
1548             assert(mem->wire_count > 0);
1549             VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1550             VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1551             VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
1552         }
1553         assert(object->resident_page_count >= object->wired_page_count);
1554
1555         if (batch_accounting == FALSE) {
1556                 if (object->internal) {
1557                         OSAddAtomic(1, &vm_page_internal_count);
1558                 } else {
1559                         OSAddAtomic(1, &vm_page_external_count);
1560                 }
1561         }
1562
1563         /*
1564          * It wouldn't make sense to insert a "reusable" page in
1565          * an object (the page would have been marked "reusable" only
1566          * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1567          * in the object at that time).
1568          * But a page could be inserted in a "all_reusable" object, if
1569          * something faults it in (a vm_read() from another task or a
1570          * "use-after-free" issue in user space, for example).  It can
1571          * also happen if we're relocating a page from that object to
1572          * a different physical page during a physically-contiguous
1573          * allocation.
1574          */
1575         assert(!mem->reusable);
1576         if (object->all_reusable) {
1577                 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1578         }
1579
1580         if (object->purgable == VM_PURGABLE_DENY) {
1581                 owner = TASK_NULL;
1582         } else {
1583                 owner = object->vo_purgeable_owner;
1584         }
1585         if (owner &&
1586             (object->purgable == VM_PURGABLE_NONVOLATILE ||
1587              VM_PAGE_WIRED(mem))) {
1588
1589                 if (delayed_ledger_update)
1590                         *delayed_ledger_update += PAGE_SIZE;
1591                 else {
1592                         /* more non-volatile bytes */
1593                         ledger_credit(owner->ledger,
1594                                       task_ledgers.purgeable_nonvolatile,
1595                                       PAGE_SIZE);
1596                         /* more footprint */
1597                         ledger_credit(owner->ledger,
1598                                       task_ledgers.phys_footprint,
1599                                       PAGE_SIZE);
1600                 }
1601
1602         } else if (owner &&
1603                    (object->purgable == VM_PURGABLE_VOLATILE ||
1604                     object->purgable == VM_PURGABLE_EMPTY)) {
1605                 assert(! VM_PAGE_WIRED(mem));
1606                 /* more volatile bytes */
1607                 ledger_credit(owner->ledger,
1608                               task_ledgers.purgeable_volatile,
1609                               PAGE_SIZE);
1610         }
1611
1612         if (object->purgable == VM_PURGABLE_VOLATILE) {
1613                 if (VM_PAGE_WIRED(mem)) {
1614                         OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1615                 } else {
1616                         OSAddAtomic(+1, &vm_page_purgeable_count);
1617                 }
1618         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1619                    mem->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q) {
1620                 /*
1621                  * This page belongs to a purged VM object but hasn't
1622                  * been purged (because it was "busy").
1623                  * It's in the "throttled" queue and hence not
1624                  * visible to vm_pageout_scan().  Move it to a pageable
1625                  * queue, so that it can eventually be reclaimed, instead
1626                  * of lingering in the "empty" object.
1627                  */
1628                 if (queues_lock_held == FALSE)
1629                         vm_page_lockspin_queues();
1630                 vm_page_deactivate(mem);
1631                 if (queues_lock_held == FALSE)
1632                         vm_page_unlock_queues();
1633         }
1634
1635 #if VM_OBJECT_TRACKING_OP_MODIFIED
1636         if (vm_object_tracking_inited &&
1637             object->internal &&
1638             object->resident_page_count == 0 &&
1639             object->pager == NULL &&
1640             object->shadow != NULL &&
1641             object->shadow->copy == object) {
1642                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1643                 int numsaved = 0;
1644
1645                 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1646                 btlog_add_entry(vm_object_tracking_btlog,
1647                                 object,
1648                                 VM_OBJECT_TRACKING_OP_MODIFIED,
1649                                 bt,
1650                                 numsaved);
1651         }
1652 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1653 }
1654
1655 /*
1656  *      vm_page_replace:
1657  *
1658  *      Exactly like vm_page_insert, except that we first
1659  *      remove any existing page at the given offset in object.
1660  *
1661  *      The object must be locked.
1662  */
1663 void
1664 vm_page_replace(
1665         vm_page_t               mem,
1666         vm_object_t             object,
1667         vm_object_offset_t      offset)
1668 {
1669         vm_page_bucket_t *bucket;
1670         vm_page_t        found_m = VM_PAGE_NULL;
1671         lck_spin_t      *bucket_lock;
1672         int             hash_id;
1673
1674 #if 0
1675         /*
1676          * we don't hold the page queue lock
1677          * so this check isn't safe to make
1678          */
1679         VM_PAGE_CHECK(mem);
1680 #endif
1681         vm_object_lock_assert_exclusive(object);
1682 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1683         if (mem->tabled || mem->vm_page_object)
1684                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1685                       "already in (obj=%p,off=0x%llx)",
1686                       mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1687 #endif
1688         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1689
1690         assert(!VM_PAGE_PAGEABLE(mem));
1691
1692         /*
1693          *      Record the object/offset pair in this page
1694          */
1695         mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1696         mem->offset = offset;
1697
1698         /*
1699          *      Insert it into the object_object/offset hash table,
1700          *      replacing any page that might have been there.
1701          */
1702
1703         hash_id = vm_page_hash(object, offset);
1704         bucket = &vm_page_buckets[hash_id];
1705         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1706
1707         lck_spin_lock(bucket_lock);
1708
1709         if (bucket->page_list) {
1710                 vm_page_packed_t *mp = &bucket->page_list;
1711                 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
1712
1713                 do {
1714                         /*
1715                          * compare packed object pointers
1716                          */
1717                         if (m->vm_page_object == mem->vm_page_object && m->offset == offset) {
1718                                 /*
1719                                  * Remove old page from hash list
1720                                  */
1721                                 *mp = m->next_m;
1722                                 m->hashed = FALSE;
1723                                 m->next_m = VM_PAGE_PACK_PTR(NULL);
1724
1725                                 found_m = m;
1726                                 break;
1727                         }
1728                         mp = &m->next_m;
1729                 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
1730
1731                 mem->next_m = bucket->page_list;
1732         } else {
1733                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
1734         }
1735         /*
1736          * insert new page at head of hash list
1737          */
1738         bucket->page_list = VM_PAGE_PACK_PTR(mem);
1739         mem->hashed = TRUE;
1740
1741         lck_spin_unlock(bucket_lock);
1742
1743         if (found_m) {
1744                 /*
1745                  * there was already a page at the specified
1746                  * offset for this object... remove it from
1747                  * the object and free it back to the free list
1748                  */
1749                 vm_page_free_unlocked(found_m, FALSE);
1750         }
1751         vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1752 }
1753
1754 /*
1755  *      vm_page_remove:         [ internal use only ]
1756  *
1757  *      Removes the given mem entry from the object/offset-page
1758  *      table and the object page list.
1759  *
1760  *      The object must be locked.
1761  */
1762
1763 void
1764 vm_page_remove(
1765         vm_page_t       mem,
1766         boolean_t       remove_from_hash)
1767 {
1768         vm_page_bucket_t *bucket;
1769         vm_page_t       this;
1770         lck_spin_t      *bucket_lock;
1771         int             hash_id;
1772         task_t          owner;
1773         vm_object_t     m_object;
1774
1775         m_object = VM_PAGE_OBJECT(mem);
1776
1777         XPR(XPR_VM_PAGE,
1778                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1779                 m_object, mem->offset,
1780                 mem, 0,0);
1781
1782         vm_object_lock_assert_exclusive(m_object);
1783         assert(mem->tabled);
1784         assert(!mem->cleaning);
1785         assert(!mem->laundry);
1786
1787         if (VM_PAGE_PAGEABLE(mem)) {
1788                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1789         }
1790 #if 0
1791         /*
1792          * we don't hold the page queue lock
1793          * so this check isn't safe to make
1794          */
1795         VM_PAGE_CHECK(mem);
1796 #endif
1797         if (remove_from_hash == TRUE) {
1798                 /*
1799                  *      Remove from the object_object/offset hash table
1800                  */
1801                 hash_id = vm_page_hash(m_object, mem->offset);
1802                 bucket = &vm_page_buckets[hash_id];
1803                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1804
1805                 lck_spin_lock(bucket_lock);
1806
1807                 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
1808                         /* optimize for common case */
1809
1810                         bucket->page_list = mem->next_m;
1811                 } else {
1812                         vm_page_packed_t        *prev;
1813
1814                         for (prev = &this->next_m;
1815                              (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
1816                              prev = &this->next_m)
1817                                 continue;
1818                         *prev = this->next_m;
1819                 }
1820 #if     MACH_PAGE_HASH_STATS
1821                 bucket->cur_count--;
1822 #endif /* MACH_PAGE_HASH_STATS */
1823                 mem->hashed = FALSE;
1824                 this->next_m = VM_PAGE_PACK_PTR(NULL);
1825                 lck_spin_unlock(bucket_lock);
1826         }
1827         /*
1828          *      Now remove from the object's list of backed pages.
1829          */
1830
1831         vm_page_remove_internal(mem);
1832
1833         /*
1834          *      And show that the object has one fewer resident
1835          *      page.
1836          */
1837
1838         assert(m_object->resident_page_count > 0);
1839         m_object->resident_page_count--;
1840
1841         if (m_object->internal) {
1842 #if DEBUG
1843                 assert(vm_page_internal_count);
1844 #endif /* DEBUG */
1845
1846                 OSAddAtomic(-1, &vm_page_internal_count);
1847         } else {
1848                 assert(vm_page_external_count);
1849                 OSAddAtomic(-1, &vm_page_external_count);
1850
1851                 if (mem->xpmapped) {
1852                         assert(vm_page_xpmapped_external_count);
1853                         OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1854                 }
1855         }
1856         if (!m_object->internal && (m_object->objq.next || m_object->objq.prev)) {
1857                 if (m_object->resident_page_count == 0)
1858                         vm_object_cache_remove(m_object);
1859         }
1860
1861         if (VM_PAGE_WIRED(mem)) {
1862                 assert(mem->wire_count > 0);
1863                 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
1864                 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
1865                 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
1866         }
1867         assert(m_object->resident_page_count >=
1868                m_object->wired_page_count);
1869         if (mem->reusable) {
1870                 assert(m_object->reusable_page_count > 0);
1871                 m_object->reusable_page_count--;
1872                 assert(m_object->reusable_page_count <=
1873                        m_object->resident_page_count);
1874                 mem->reusable = FALSE;
1875                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1876                 vm_page_stats_reusable.reused_remove++;
1877         } else if (m_object->all_reusable) {
1878                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1879                 vm_page_stats_reusable.reused_remove++;
1880         }
1881
1882         if (m_object->purgable == VM_PURGABLE_DENY) {
1883                 owner = TASK_NULL;
1884         } else {
1885                 owner = m_object->vo_purgeable_owner;
1886         }
1887         if (owner &&
1888             (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
1889              VM_PAGE_WIRED(mem))) {
1890                 /* less non-volatile bytes */
1891                 ledger_debit(owner->ledger,
1892                              task_ledgers.purgeable_nonvolatile,
1893                              PAGE_SIZE);
1894                 /* less footprint */
1895                 ledger_debit(owner->ledger,
1896                              task_ledgers.phys_footprint,
1897                              PAGE_SIZE);
1898         } else if (owner &&
1899                    (m_object->purgable == VM_PURGABLE_VOLATILE ||
1900                     m_object->purgable == VM_PURGABLE_EMPTY)) {
1901                 assert(! VM_PAGE_WIRED(mem));
1902                 /* less volatile bytes */
1903                 ledger_debit(owner->ledger,
1904                              task_ledgers.purgeable_volatile,
1905                              PAGE_SIZE);
1906         }
1907         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
1908                 if (VM_PAGE_WIRED(mem)) {
1909                         assert(vm_page_purgeable_wired_count > 0);
1910                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1911                 } else {
1912                         assert(vm_page_purgeable_count > 0);
1913                         OSAddAtomic(-1, &vm_page_purgeable_count);
1914                 }
1915         }
1916
1917         if (m_object->set_cache_attr == TRUE)
1918                 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
1919
1920         mem->tabled = FALSE;
1921         mem->vm_page_object = 0;
1922         mem->offset = (vm_object_offset_t) -1;
1923 }
1924
1925
1926 /*
1927  *      vm_page_lookup:
1928  *
1929  *      Returns the page associated with the object/offset
1930  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1931  *
1932  *      The object must be locked.  No side effects.
1933  */
1934
1935 #define VM_PAGE_HASH_LOOKUP_THRESHOLD   10
1936
1937 #if DEBUG_VM_PAGE_LOOKUP
1938
1939 struct {
1940         uint64_t        vpl_total;
1941         uint64_t        vpl_empty_obj;
1942         uint64_t        vpl_bucket_NULL;
1943         uint64_t        vpl_hit_hint;
1944         uint64_t        vpl_hit_hint_next;
1945         uint64_t        vpl_hit_hint_prev;
1946         uint64_t        vpl_fast;
1947         uint64_t        vpl_slow;
1948         uint64_t        vpl_hit;
1949         uint64_t        vpl_miss;
1950
1951         uint64_t        vpl_fast_elapsed;
1952         uint64_t        vpl_slow_elapsed;
1953 } vm_page_lookup_stats __attribute__((aligned(8)));
1954
1955 #endif
1956
1957 #define KDP_VM_PAGE_WALK_MAX    1000
1958
1959 vm_page_t
1960 kdp_vm_page_lookup(
1961         vm_object_t             object,
1962         vm_object_offset_t      offset)
1963 {
1964         vm_page_t cur_page;
1965         int num_traversed = 0;
1966
1967         if (not_in_kdp) {
1968                 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1969         }
1970
1971         vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
1972                 if (cur_page->offset == offset) {
1973                         return cur_page;
1974                 }
1975                 num_traversed++;
1976
1977                 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1978                         return VM_PAGE_NULL;
1979                 }
1980         }
1981
1982         return VM_PAGE_NULL;
1983 }
1984
1985 vm_page_t
1986 vm_page_lookup(
1987         vm_object_t             object,
1988         vm_object_offset_t      offset)
1989 {
1990         vm_page_t       mem;
1991         vm_page_bucket_t *bucket;
1992         vm_page_queue_entry_t   qe;
1993         lck_spin_t      *bucket_lock = NULL;
1994         int             hash_id;
1995 #if DEBUG_VM_PAGE_LOOKUP
1996         uint64_t        start, elapsed;
1997
1998         OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
1999 #endif
2000         vm_object_lock_assert_held(object);
2001
2002         if (object->resident_page_count == 0) {
2003 #if DEBUG_VM_PAGE_LOOKUP
2004                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2005 #endif
2006                 return (VM_PAGE_NULL);
2007         }
2008
2009         mem = object->memq_hint;
2010
2011         if (mem != VM_PAGE_NULL) {
2012                 assert(VM_PAGE_OBJECT(mem) == object);
2013
2014                 if (mem->offset == offset) {
2015 #if DEBUG_VM_PAGE_LOOKUP
2016                         OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2017 #endif
2018                         return (mem);
2019                 }
2020                 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->listq);
2021
2022                 if (! vm_page_queue_end(&object->memq, qe)) {
2023                         vm_page_t       next_page;
2024
2025                         next_page = (vm_page_t)((uintptr_t)qe);
2026                         assert(VM_PAGE_OBJECT(next_page) == object);
2027
2028                         if (next_page->offset == offset) {
2029                                 object->memq_hint = next_page; /* new hint */
2030 #if DEBUG_VM_PAGE_LOOKUP
2031                                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2032 #endif
2033                                 return (next_page);
2034                         }
2035                 }
2036                 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->listq);
2037
2038                 if (! vm_page_queue_end(&object->memq, qe)) {
2039                         vm_page_t prev_page;
2040
2041                         prev_page = (vm_page_t)((uintptr_t)qe);
2042                         assert(VM_PAGE_OBJECT(prev_page) == object);
2043
2044                         if (prev_page->offset == offset) {
2045                                 object->memq_hint = prev_page; /* new hint */
2046 #if DEBUG_VM_PAGE_LOOKUP
2047                                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2048 #endif
2049                                 return (prev_page);
2050                         }
2051                 }
2052         }
2053         /*
2054          * Search the hash table for this object/offset pair
2055          */
2056         hash_id = vm_page_hash(object, offset);
2057         bucket = &vm_page_buckets[hash_id];
2058
2059         /*
2060          * since we hold the object lock, we are guaranteed that no
2061          * new pages can be inserted into this object... this in turn
2062          * guarantess that the page we're looking for can't exist
2063          * if the bucket it hashes to is currently NULL even when looked
2064          * at outside the scope of the hash bucket lock... this is a
2065          * really cheap optimiztion to avoid taking the lock
2066          */
2067         if (!bucket->page_list) {
2068 #if DEBUG_VM_PAGE_LOOKUP
2069                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2070 #endif
2071                 return (VM_PAGE_NULL);
2072         }
2073
2074 #if DEBUG_VM_PAGE_LOOKUP
2075         start = mach_absolute_time();
2076 #endif
2077         if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
2078                 /*
2079                  * on average, it's roughly 3 times faster to run a short memq list
2080                  * than to take the spin lock and go through the hash list
2081                  */
2082                 mem = (vm_page_t)vm_page_queue_first(&object->memq);
2083
2084                 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2085
2086                         if (mem->offset == offset)
2087                                 break;
2088
2089                         mem = (vm_page_t)vm_page_queue_next(&mem->listq);
2090                 }
2091                 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
2092                         mem = NULL;
2093         } else {
2094                 vm_page_object_t        packed_object;
2095
2096                 packed_object = VM_PAGE_PACK_OBJECT(object);
2097
2098                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2099
2100                 lck_spin_lock(bucket_lock);
2101
2102                 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2103                      mem != VM_PAGE_NULL;
2104                      mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m))) {
2105 #if 0
2106                         /*
2107                          * we don't hold the page queue lock
2108                          * so this check isn't safe to make
2109                          */
2110                         VM_PAGE_CHECK(mem);
2111 #endif
2112                         if ((mem->vm_page_object == packed_object) && (mem->offset == offset))
2113                                 break;
2114                 }
2115                 lck_spin_unlock(bucket_lock);
2116         }
2117
2118 #if DEBUG_VM_PAGE_LOOKUP
2119         elapsed = mach_absolute_time() - start;
2120
2121         if (bucket_lock) {
2122                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2123                 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2124         } else {
2125                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2126                 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2127         }
2128         if (mem != VM_PAGE_NULL)
2129                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2130         else
2131                 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2132 #endif
2133         if (mem != VM_PAGE_NULL) {
2134                 assert(VM_PAGE_OBJECT(mem) == object);
2135
2136                 object->memq_hint = mem;
2137         }
2138         return (mem);
2139 }
2140
2141
2142 /*
2143  *      vm_page_rename:
2144  *
2145  *      Move the given memory entry from its
2146  *      current object to the specified target object/offset.
2147  *
2148  *      The object must be locked.
2149  */
2150 void
2151 vm_page_rename(
2152         vm_page_t               mem,
2153         vm_object_t             new_object,
2154         vm_object_offset_t      new_offset)
2155 {
2156         boolean_t       internal_to_external, external_to_internal;
2157         vm_tag_t        tag;
2158         vm_object_t     m_object;
2159
2160         m_object = VM_PAGE_OBJECT(mem);
2161
2162         assert(m_object != new_object);
2163         assert(m_object);
2164
2165         XPR(XPR_VM_PAGE,
2166                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2167                 new_object, new_offset,
2168                 mem, 0,0);
2169
2170         /*
2171          *      Changes to mem->object require the page lock because
2172          *      the pageout daemon uses that lock to get the object.
2173          */
2174         vm_page_lockspin_queues();
2175
2176         internal_to_external = FALSE;
2177         external_to_internal = FALSE;
2178
2179         if (mem->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2180                 /*
2181                  * it's much easier to get the vm_page_pageable_xxx accounting correct
2182                  * if we first move the page to the active queue... it's going to end
2183                  * up there anyway, and we don't do vm_page_rename's frequently enough
2184                  * for this to matter.
2185                  */
2186                 vm_page_queues_remove(mem, FALSE);
2187                 vm_page_activate(mem);
2188         }
2189         if (VM_PAGE_PAGEABLE(mem)) {
2190                 if (m_object->internal && !new_object->internal) {
2191                         internal_to_external = TRUE;
2192                 }
2193                 if (!m_object->internal && new_object->internal) {
2194                         external_to_internal = TRUE;
2195                 }
2196         }
2197
2198         tag = m_object->wire_tag;
2199         vm_page_remove(mem, TRUE);
2200         vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2201
2202         if (internal_to_external) {
2203                 vm_page_pageable_internal_count--;
2204                 vm_page_pageable_external_count++;
2205         } else if (external_to_internal) {
2206                 vm_page_pageable_external_count--;
2207                 vm_page_pageable_internal_count++;
2208         }
2209
2210         vm_page_unlock_queues();
2211 }
2212
2213 /*
2214  *      vm_page_init:
2215  *
2216  *      Initialize the fields in a new page.
2217  *      This takes a structure with random values and initializes it
2218  *      so that it can be given to vm_page_release or vm_page_insert.
2219  */
2220 void
2221 vm_page_init(
2222         vm_page_t       mem,
2223         ppnum_t         phys_page,
2224         boolean_t       lopage)
2225 {
2226         assert(phys_page);
2227
2228 #if     DEBUG
2229         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2230                 if (!(pmap_valid_page(phys_page))) {
2231                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2232                 }
2233         }
2234 #endif
2235         *mem = vm_page_template;
2236
2237         VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2238 #if 0
2239         /*
2240          * we're leaving this turned off for now... currently pages
2241          * come off the free list and are either immediately dirtied/referenced
2242          * due to zero-fill or COW faults, or are used to read or write files...
2243          * in the file I/O case, the UPL mechanism takes care of clearing
2244          * the state of the HW ref/mod bits in a somewhat fragile way.
2245          * Since we may change the way this works in the future (to toughen it up),
2246          * I'm leaving this as a reminder of where these bits could get cleared
2247          */
2248
2249         /*
2250          * make sure both the h/w referenced and modified bits are
2251          * clear at this point... we are especially dependent on
2252          * not finding a 'stale' h/w modified in a number of spots
2253          * once this page goes back into use
2254          */
2255         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2256 #endif
2257         mem->lopage = lopage;
2258 }
2259
2260 /*
2261  *      vm_page_grab_fictitious:
2262  *
2263  *      Remove a fictitious page from the free list.
2264  *      Returns VM_PAGE_NULL if there are no free pages.
2265  */
2266 int     c_vm_page_grab_fictitious = 0;
2267 int     c_vm_page_grab_fictitious_failed = 0;
2268 int     c_vm_page_release_fictitious = 0;
2269 int     c_vm_page_more_fictitious = 0;
2270
2271 vm_page_t
2272 vm_page_grab_fictitious_common(
2273         ppnum_t phys_addr)
2274 {
2275         vm_page_t       m;
2276
2277         if ((m = (vm_page_t)zget(vm_page_zone))) {
2278
2279                 vm_page_init(m, phys_addr, FALSE);
2280                 m->fictitious = TRUE;
2281
2282                 c_vm_page_grab_fictitious++;
2283         } else
2284                 c_vm_page_grab_fictitious_failed++;
2285
2286         return m;
2287 }
2288
2289 vm_page_t
2290 vm_page_grab_fictitious(void)
2291 {
2292         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2293 }
2294
2295 int vm_guard_count;
2296
2297
2298 vm_page_t
2299 vm_page_grab_guard(void)
2300 {
2301         vm_page_t page;
2302         page = vm_page_grab_fictitious_common(vm_page_guard_addr);
2303     if (page) OSAddAtomic(1, &vm_guard_count);
2304         return page;
2305 }
2306
2307
2308 /*
2309  *      vm_page_release_fictitious:
2310  *
2311  *      Release a fictitious page to the zone pool
2312  */
2313 void
2314 vm_page_release_fictitious(
2315         vm_page_t m)
2316 {
2317         assert((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) || (m->vm_page_q_state == VM_PAGE_IS_WIRED));
2318         assert(m->fictitious);
2319         assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2320                VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2321
2322
2323 if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) OSAddAtomic(-1, &vm_guard_count);
2324
2325         c_vm_page_release_fictitious++;
2326
2327         zfree(vm_page_zone, m);
2328 }
2329
2330 /*
2331  *      vm_page_more_fictitious:
2332  *
2333  *      Add more fictitious pages to the zone.
2334  *      Allowed to block. This routine is way intimate
2335  *      with the zones code, for several reasons:
2336  *      1. we need to carve some page structures out of physical
2337  *         memory before zones work, so they _cannot_ come from
2338  *         the zone_map.
2339  *      2. the zone needs to be collectable in order to prevent
2340  *         growth without bound. These structures are used by
2341  *         the device pager (by the hundreds and thousands), as
2342  *         private pages for pageout, and as blocking pages for
2343  *         pagein. Temporary bursts in demand should not result in
2344  *         permanent allocation of a resource.
2345  *      3. To smooth allocation humps, we allocate single pages
2346  *         with kernel_memory_allocate(), and cram them into the
2347  *         zone.
2348  */
2349
2350 void vm_page_more_fictitious(void)
2351 {
2352         vm_offset_t     addr;
2353         kern_return_t   retval;
2354
2355         c_vm_page_more_fictitious++;
2356
2357         /*
2358          * Allocate a single page from the zone_map. Do not wait if no physical
2359          * pages are immediately available, and do not zero the space. We need
2360          * our own blocking lock here to prevent having multiple,
2361          * simultaneous requests from piling up on the zone_map lock. Exactly
2362          * one (of our) threads should be potentially waiting on the map lock.
2363          * If winner is not vm-privileged, then the page allocation will fail,
2364          * and it will temporarily block here in the vm_page_wait().
2365          */
2366         lck_mtx_lock(&vm_page_alloc_lock);
2367         /*
2368          * If another thread allocated space, just bail out now.
2369          */
2370         if (zone_free_count(vm_page_zone) > 5) {
2371                 /*
2372                  * The number "5" is a small number that is larger than the
2373                  * number of fictitious pages that any single caller will
2374                  * attempt to allocate. Otherwise, a thread will attempt to
2375                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
2376                  * release all of the resources and locks already acquired,
2377                  * and then call this routine. This routine finds the pages
2378                  * that the caller released, so fails to allocate new space.
2379                  * The process repeats infinitely. The largest known number
2380                  * of fictitious pages required in this manner is 2. 5 is
2381                  * simply a somewhat larger number.
2382                  */
2383                 lck_mtx_unlock(&vm_page_alloc_lock);
2384                 return;
2385         }
2386
2387         retval = kernel_memory_allocate(zone_map,
2388                                         &addr, PAGE_SIZE, 0,
2389                                         KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2390         if (retval != KERN_SUCCESS) {
2391                 /*
2392                  * No page was available. Drop the
2393                  * lock to give another thread a chance at it, and
2394                  * wait for the pageout daemon to make progress.
2395                  */
2396                 lck_mtx_unlock(&vm_page_alloc_lock);
2397                 vm_page_wait(THREAD_UNINT);
2398                 return;
2399         }
2400
2401         zcram(vm_page_zone, addr, PAGE_SIZE);
2402
2403         lck_mtx_unlock(&vm_page_alloc_lock);
2404 }
2405
2406
2407 /*
2408  *      vm_pool_low():
2409  *
2410  *      Return true if it is not likely that a non-vm_privileged thread
2411  *      can get memory without blocking.  Advisory only, since the
2412  *      situation may change under us.
2413  */
2414 int
2415 vm_pool_low(void)
2416 {
2417         /* No locking, at worst we will fib. */
2418         return( vm_page_free_count <= vm_page_free_reserved );
2419 }
2420
2421
2422 #if CONFIG_BACKGROUND_QUEUE
2423
2424 void
2425 vm_page_update_background_state(vm_page_t mem)
2426 {
2427         if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2428                 return;
2429
2430         if (mem->vm_page_in_background == FALSE)
2431                 return;
2432
2433 #if BACKGROUNDQ_BASED_ON_QOS
2434         if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2435                 return;
2436 #else
2437         task_t  my_task;
2438
2439         my_task = current_task();
2440
2441         if (my_task) {
2442                 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2443                         return;
2444         }
2445 #endif
2446         vm_page_lockspin_queues();
2447
2448         mem->vm_page_in_background = FALSE;
2449         vm_page_background_promoted_count++;
2450
2451         vm_page_remove_from_backgroundq(mem);
2452
2453         vm_page_unlock_queues();
2454 }
2455
2456
2457 void
2458 vm_page_assign_background_state(vm_page_t mem)
2459 {
2460         if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2461                 return;
2462
2463 #if BACKGROUNDQ_BASED_ON_QOS
2464         if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2465                 mem->vm_page_in_background = TRUE;
2466         else
2467                 mem->vm_page_in_background = FALSE;
2468 #else
2469         task_t  my_task;
2470
2471         my_task = current_task();
2472
2473         if (my_task)
2474                 mem->vm_page_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2475 #endif
2476 }
2477
2478
2479 void
2480 vm_page_remove_from_backgroundq(
2481         vm_page_t       mem)
2482 {
2483         vm_object_t     m_object;
2484
2485         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2486
2487         if (mem->vm_page_on_backgroundq) {
2488                 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2489
2490                 mem->vm_page_backgroundq.next = 0;
2491                 mem->vm_page_backgroundq.prev = 0;
2492                 mem->vm_page_on_backgroundq = FALSE;
2493
2494                 vm_page_background_count--;
2495
2496                 m_object = VM_PAGE_OBJECT(mem);
2497
2498                 if (m_object->internal)
2499                         vm_page_background_internal_count--;
2500                 else
2501                         vm_page_background_external_count--;
2502         } else {
2503                 assert(VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.next) == (uintptr_t)NULL &&
2504                        VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.prev) == (uintptr_t)NULL);
2505         }
2506 }
2507
2508
2509 void
2510 vm_page_add_to_backgroundq(
2511         vm_page_t       mem,
2512         boolean_t       first)
2513 {
2514         vm_object_t     m_object;
2515
2516         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2517
2518         if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2519                 return;
2520
2521         if (mem->vm_page_on_backgroundq == FALSE) {
2522
2523                 m_object = VM_PAGE_OBJECT(mem);
2524
2525                 if (vm_page_background_exclude_external && !m_object->internal)
2526                         return;
2527
2528                 if (first == TRUE)
2529                         vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2530                 else
2531                         vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2532                 mem->vm_page_on_backgroundq = TRUE;
2533
2534                 vm_page_background_count++;
2535
2536                 if (m_object->internal)
2537                         vm_page_background_internal_count++;
2538                 else
2539                         vm_page_background_external_count++;
2540         }
2541 }
2542
2543 #endif
2544
2545 /*
2546  * this is an interface to support bring-up of drivers
2547  * on platforms with physical memory > 4G...
2548  */
2549 int             vm_himemory_mode = 2;
2550
2551
2552 /*
2553  * this interface exists to support hardware controllers
2554  * incapable of generating DMAs with more than 32 bits
2555  * of address on platforms with physical memory > 4G...
2556  */
2557 unsigned int    vm_lopages_allocated_q = 0;
2558 unsigned int    vm_lopages_allocated_cpm_success = 0;
2559 unsigned int    vm_lopages_allocated_cpm_failed = 0;
2560 vm_page_queue_head_t    vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2561
2562 vm_page_t
2563 vm_page_grablo(void)
2564 {
2565         vm_page_t       mem;
2566
2567         if (vm_lopage_needed == FALSE)
2568                 return (vm_page_grab());
2569
2570         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2571
2572         if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2573                 vm_page_queue_remove_first(&vm_lopage_queue_free,
2574                                    mem,
2575                                    vm_page_t,
2576                                    pageq);
2577                 assert(vm_lopage_free_count);
2578                 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2579                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2580
2581                 vm_lopage_free_count--;
2582                 vm_lopages_allocated_q++;
2583
2584                 if (vm_lopage_free_count < vm_lopage_lowater)
2585                         vm_lopage_refill = TRUE;
2586
2587                 lck_mtx_unlock(&vm_page_queue_free_lock);
2588
2589 #if CONFIG_BACKGROUND_QUEUE
2590                 vm_page_assign_background_state(mem);
2591 #endif
2592         } else {
2593                 lck_mtx_unlock(&vm_page_queue_free_lock);
2594
2595                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2596
2597                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2598                         vm_lopages_allocated_cpm_failed++;
2599                         lck_mtx_unlock(&vm_page_queue_free_lock);
2600
2601                         return (VM_PAGE_NULL);
2602                 }
2603                 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2604
2605                 mem->busy = TRUE;
2606
2607                 vm_page_lockspin_queues();
2608
2609                 mem->gobbled = FALSE;
2610                 vm_page_gobble_count--;
2611                 vm_page_wire_count--;
2612
2613                 vm_lopages_allocated_cpm_success++;
2614                 vm_page_unlock_queues();
2615         }
2616         assert(mem->busy);
2617         assert(!mem->pmapped);
2618         assert(!mem->wpmapped);
2619         assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2620
2621         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2622
2623         return (mem);
2624 }
2625
2626
2627 /*
2628  *      vm_page_grab:
2629  *
2630  *      first try to grab a page from the per-cpu free list...
2631  *      this must be done while pre-emption is disabled... if
2632  *      a page is available, we're done...
2633  *      if no page is available, grab the vm_page_queue_free_lock
2634  *      and see if current number of free pages would allow us
2635  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
2636  *      if there are pages available, disable preemption and
2637  *      recheck the state of the per-cpu free list... we could
2638  *      have been preempted and moved to a different cpu, or
2639  *      some other thread could have re-filled it... if still
2640  *      empty, figure out how many pages we can steal from the
2641  *      global free queue and move to the per-cpu queue...
2642  *      return 1 of these pages when done... only wakeup the
2643  *      pageout_scan thread if we moved pages from the global
2644  *      list... no need for the wakeup if we've satisfied the
2645  *      request from the per-cpu queue.
2646  */
2647
2648 #if CONFIG_SECLUDED_MEMORY
2649 vm_page_t vm_page_grab_secluded(void);
2650 #endif /* CONFIG_SECLUDED_MEMORY */
2651
2652 vm_page_t
2653 vm_page_grab(void)
2654 {
2655         return vm_page_grab_options(0);
2656 }
2657
2658 #if HIBERNATION
2659 boolean_t       hibernate_rebuild_needed = FALSE;
2660 #endif /* HIBERNATION */
2661
2662 vm_page_t
2663 vm_page_grab_options(
2664         int grab_options)
2665 {
2666         vm_page_t       mem;
2667
2668         disable_preemption();
2669
2670         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2671 return_page_from_cpu_list:
2672                 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2673
2674 #if HIBERNATION
2675                 if (hibernate_rebuild_needed) {
2676                         panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2677                 }
2678 #endif /* HIBERNATION */
2679                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2680                 PROCESSOR_DATA(current_processor(), free_pages) = mem->snext;
2681
2682                 enable_preemption();
2683                 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2684                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2685
2686                 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2687                 assert(mem->tabled == FALSE);
2688                 assert(mem->vm_page_object == 0);
2689                 assert(!mem->laundry);
2690                 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2691                 assert(mem->busy);
2692                 assert(!mem->pmapped);
2693                 assert(!mem->wpmapped);
2694                 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2695
2696 #if CONFIG_BACKGROUND_QUEUE
2697                 vm_page_assign_background_state(mem);
2698 #endif
2699                 return mem;
2700         }
2701         enable_preemption();
2702
2703
2704         /*
2705          *      Optionally produce warnings if the wire or gobble
2706          *      counts exceed some threshold.
2707          */
2708 #if VM_PAGE_WIRE_COUNT_WARNING
2709         if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2710                 printf("mk: vm_page_grab(): high wired page count of %d\n",
2711                         vm_page_wire_count);
2712         }
2713 #endif
2714 #if VM_PAGE_GOBBLE_COUNT_WARNING
2715         if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2716                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2717                         vm_page_gobble_count);
2718         }
2719 #endif
2720
2721         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2722
2723         /*
2724          *      Only let privileged threads (involved in pageout)
2725          *      dip into the reserved pool.
2726          */
2727         if ((vm_page_free_count < vm_page_free_reserved) &&
2728             !(current_thread()->options & TH_OPT_VMPRIV)) {
2729                 /* no page for us in the free queue... */
2730                 lck_mtx_unlock(&vm_page_queue_free_lock);
2731                 mem = VM_PAGE_NULL;
2732
2733 #if CONFIG_SECLUDED_MEMORY
2734                 /* ... but can we try and grab from the secluded queue? */
2735                 if (vm_page_secluded_count > 0 &&
2736                     ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
2737                      task_can_use_secluded_mem(current_task()))) {
2738                         mem = vm_page_grab_secluded();
2739                         if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2740                                 vm_page_secluded.grab_for_iokit++;
2741                                 if (mem) {
2742                                         vm_page_secluded.grab_for_iokit_success++;
2743                                 }
2744                         }
2745                         if (mem) {
2746                                 VM_CHECK_MEMORYSTATUS;
2747                                 return mem;
2748                         }
2749                 }
2750 #else /* CONFIG_SECLUDED_MEMORY */
2751                 (void) grab_options;
2752 #endif /* CONFIG_SECLUDED_MEMORY */
2753         }
2754         else {
2755                vm_page_t        head;
2756                vm_page_t        tail;
2757                unsigned int     pages_to_steal;
2758                unsigned int     color;
2759                unsigned int clump_end, sub_count;
2760
2761                while ( vm_page_free_count == 0 ) {
2762
2763                         lck_mtx_unlock(&vm_page_queue_free_lock);
2764                         /*
2765                          * must be a privileged thread to be
2766                          * in this state since a non-privileged
2767                          * thread would have bailed if we were
2768                          * under the vm_page_free_reserved mark
2769                          */
2770                         VM_PAGE_WAIT();
2771                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2772                 }
2773
2774                 disable_preemption();
2775
2776                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2777                         lck_mtx_unlock(&vm_page_queue_free_lock);
2778
2779                         /*
2780                          * we got preempted and moved to another processor
2781                          * or we got preempted and someone else ran and filled the cache
2782                          */
2783                         goto return_page_from_cpu_list;
2784                 }
2785                 if (vm_page_free_count <= vm_page_free_reserved)
2786                         pages_to_steal = 1;
2787                 else {
2788                         if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2789                                 pages_to_steal = vm_free_magazine_refill_limit;
2790                         else
2791                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2792                 }
2793                 color = PROCESSOR_DATA(current_processor(), start_color);
2794                 head = tail = NULL;
2795
2796                 vm_page_free_count -= pages_to_steal;
2797                 clump_end = sub_count = 0;
2798
2799                 while (pages_to_steal--) {
2800
2801                         while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2802                                 color = (color + 1) & vm_color_mask;
2803 #if defined(__x86_64__)
2804                         vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
2805                                                               mem,
2806                                                               vm_page_t,
2807                                                               pageq,
2808                                                               clump_end);
2809 #else
2810                         vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
2811                                                               mem,
2812                                                               vm_page_t,
2813                                                               pageq);
2814 #endif
2815
2816                         assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_Q);
2817
2818                         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2819
2820 #if defined(__arm__) || defined(__arm64__)
2821                         color = (color + 1) & vm_color_mask;
2822 #else
2823
2824 #if DEVELOPMENT || DEBUG
2825
2826                         sub_count++;
2827                         if (clump_end) {
2828                                 vm_clump_update_stats(sub_count);
2829                                 sub_count = 0;
2830                                 color = (color + 1) & vm_color_mask;
2831                         }
2832 #else
2833                         if (clump_end) color = (color + 1) & vm_color_mask;
2834
2835 #endif /* if DEVELOPMENT || DEBUG */
2836
2837 #endif  /* if defined(__arm__) || defined(__arm64__) */
2838
2839                         if (head == NULL)
2840                                 head = mem;
2841                         else
2842                                 tail->snext = mem;
2843                         tail = mem;
2844
2845                         assert(mem->listq.next == 0 && mem->listq.prev == 0);
2846                         assert(mem->tabled == FALSE);
2847                         assert(mem->vm_page_object == 0);
2848                         assert(!mem->laundry);
2849
2850                         mem->vm_page_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
2851
2852                         assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2853                         assert(mem->busy);
2854                         assert(!mem->pmapped);
2855                         assert(!mem->wpmapped);
2856                         assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2857                 }
2858 #if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
2859                 vm_clump_update_stats(sub_count);
2860 #endif
2861                 lck_mtx_unlock(&vm_page_queue_free_lock);
2862
2863 #if HIBERNATION
2864                 if (hibernate_rebuild_needed) {
2865                         panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2866                 }
2867 #endif /* HIBERNATION */
2868                 PROCESSOR_DATA(current_processor(), free_pages) = head->snext;
2869                 PROCESSOR_DATA(current_processor(), start_color) = color;
2870
2871                 /*
2872                  * satisfy this request
2873                  */
2874                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2875                 mem = head;
2876                 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2877
2878                 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2879                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2880
2881                 enable_preemption();
2882         }
2883         /*
2884          *      Decide if we should poke the pageout daemon.
2885          *      We do this if the free count is less than the low
2886          *      water mark, or if the free count is less than the high
2887          *      water mark (but above the low water mark) and the inactive
2888          *      count is less than its target.
2889          *
2890          *      We don't have the counts locked ... if they change a little,
2891          *      it doesn't really matter.
2892          */
2893         if ((vm_page_free_count < vm_page_free_min) ||
2894              ((vm_page_free_count < vm_page_free_target) &&
2895               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2896                  thread_wakeup((event_t) &vm_page_free_wanted);
2897
2898         VM_CHECK_MEMORYSTATUS;
2899
2900         if (mem) {
2901 //              dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4);  /* (TEST/DEBUG) */
2902
2903 #if CONFIG_BACKGROUND_QUEUE
2904                 vm_page_assign_background_state(mem);
2905 #endif
2906         }
2907         return mem;
2908 }
2909
2910 #if CONFIG_SECLUDED_MEMORY
2911 vm_page_t
2912 vm_page_grab_secluded(void)
2913 {
2914         vm_page_t       mem;
2915         vm_object_t     object;
2916         int             refmod_state;
2917
2918         if (vm_page_secluded_count == 0) {
2919                 /* no secluded pages to grab... */
2920                 return VM_PAGE_NULL;
2921         }
2922
2923         /* secluded queue is protected by the VM page queue lock */
2924         vm_page_lock_queues();
2925
2926         if (vm_page_secluded_count == 0) {
2927                 /* no secluded pages to grab... */
2928                 vm_page_unlock_queues();
2929                 return VM_PAGE_NULL;
2930         }
2931
2932 #if 00
2933         /* can we grab from the secluded queue? */
2934         if (vm_page_secluded_count > vm_page_secluded_target ||
2935             (vm_page_secluded_count > 0 &&
2936              task_can_use_secluded_mem(current_task()))) {
2937                 /* OK */
2938         } else {
2939                 /* can't grab from secluded queue... */
2940                 vm_page_unlock_queues();
2941                 return VM_PAGE_NULL;
2942         }
2943 #endif
2944
2945         /* we can grab a page from secluded queue! */
2946         assert((vm_page_secluded_count_free +
2947                 vm_page_secluded_count_inuse) ==
2948                vm_page_secluded_count);
2949         if (current_task()->task_can_use_secluded_mem) {
2950                 assert(num_tasks_can_use_secluded_mem > 0);
2951         }
2952         assert(!vm_page_queue_empty(&vm_page_queue_secluded));
2953         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2954         mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
2955         assert(mem->vm_page_q_state == VM_PAGE_ON_SECLUDED_Q);
2956         vm_page_queues_remove(mem, TRUE);
2957
2958         object = VM_PAGE_OBJECT(mem);
2959
2960         assert(!mem->fictitious);
2961         assert(!VM_PAGE_WIRED(mem));
2962         if (object == VM_OBJECT_NULL) {
2963                 /* free for grab! */
2964                 vm_page_unlock_queues();
2965                 vm_page_secluded.grab_success_free++;
2966
2967                 assert(mem->busy);
2968                 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2969                 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
2970                 assert(mem->pageq.next == 0);
2971                 assert(mem->pageq.prev == 0);
2972                 assert(mem->listq.next == 0);
2973                 assert(mem->listq.prev == 0);
2974 #if CONFIG_BACKGROUND_QUEUE
2975                 assert(mem->vm_page_on_backgroundq == 0);
2976                 assert(mem->vm_page_backgroundq.next == 0);
2977                 assert(mem->vm_page_backgroundq.prev == 0);
2978 #endif /* CONFIG_BACKGROUND_QUEUE */
2979                 return mem;
2980         }
2981
2982         assert(!object->internal);
2983 //      vm_page_pageable_external_count--;
2984
2985         if (!vm_object_lock_try(object)) {
2986 //              printf("SECLUDED: page %p: object %p locked\n", mem, object);
2987                 vm_page_secluded.grab_failure_locked++;
2988         reactivate_secluded_page:
2989                 vm_page_activate(mem);
2990                 vm_page_unlock_queues();
2991                 return VM_PAGE_NULL;
2992         }
2993         if (mem->busy ||
2994             mem->cleaning ||
2995             mem->laundry) {
2996                 /* can't steal page in this state... */
2997                 vm_object_unlock(object);
2998                 vm_page_secluded.grab_failure_state++;
2999                 goto reactivate_secluded_page;
3000         }
3001
3002         mem->busy = TRUE;
3003         refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3004         if (refmod_state & VM_MEM_REFERENCED) {
3005                 mem->reference = TRUE;
3006         }
3007         if (refmod_state & VM_MEM_MODIFIED) {
3008                 SET_PAGE_DIRTY(mem, FALSE);
3009         }
3010         if (mem->dirty || mem->precious) {
3011                 /* can't grab a dirty page; re-activate */
3012 //              printf("SECLUDED: dirty page %p\n", mem);
3013                 PAGE_WAKEUP_DONE(mem);
3014                 vm_page_secluded.grab_failure_dirty++;
3015                 vm_object_unlock(object);
3016                 goto reactivate_secluded_page;
3017         }
3018         if (mem->reference) {
3019                 /* it's been used but we do need to grab a page... */
3020         }
3021
3022         vm_page_unlock_queues();
3023
3024         /* finish what vm_page_free() would have done... */
3025         vm_page_free_prepare_object(mem, TRUE);
3026         vm_object_unlock(object);
3027         object = VM_OBJECT_NULL;
3028         if (vm_page_free_verify) {
3029                 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3030         }
3031         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3032         vm_page_secluded.grab_success_other++;
3033
3034         assert(mem->busy);
3035         assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3036         assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3037         assert(mem->pageq.next == 0);
3038         assert(mem->pageq.prev == 0);
3039         assert(mem->listq.next == 0);
3040         assert(mem->listq.prev == 0);
3041 #if CONFIG_BACKGROUND_QUEUE
3042         assert(mem->vm_page_on_backgroundq == 0);
3043         assert(mem->vm_page_backgroundq.next == 0);
3044         assert(mem->vm_page_backgroundq.prev == 0);
3045 #endif /* CONFIG_BACKGROUND_QUEUE */
3046
3047         return mem;
3048 }
3049 #endif /* CONFIG_SECLUDED_MEMORY */
3050
3051 /*
3052  *      vm_page_release:
3053  *
3054  *      Return a page to the free list.
3055  */
3056
3057 void
3058 vm_page_release(
3059         vm_page_t       mem,
3060         boolean_t       page_queues_locked)
3061 {
3062         unsigned int    color;
3063         int     need_wakeup = 0;
3064         int     need_priv_wakeup = 0;
3065 #if CONFIG_SECLUDED_MEMORY
3066         int     need_secluded_wakeup = 0;
3067 #endif /* CONFIG_SECLUDED_MEMORY */
3068
3069         if (page_queues_locked) {
3070                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3071         } else {
3072                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3073         }
3074
3075         assert(!mem->private && !mem->fictitious);
3076         if (vm_page_free_verify) {
3077                 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3078         }
3079 //      dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5);  /* (TEST/DEBUG) */
3080
3081         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3082
3083         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3084
3085         assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3086         assert(mem->busy);
3087         assert(!mem->laundry);
3088         assert(mem->vm_page_object == 0);
3089         assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3090         assert(mem->listq.next == 0 && mem->listq.prev == 0);
3091 #if CONFIG_BACKGROUND_QUEUE
3092         assert(mem->vm_page_backgroundq.next == 0 &&
3093                mem->vm_page_backgroundq.prev == 0 &&
3094                mem->vm_page_on_backgroundq == FALSE);
3095 #endif
3096         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3097             vm_lopage_free_count < vm_lopage_free_limit &&
3098             VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3099                 /*
3100                  * this exists to support hardware controllers
3101                  * incapable of generating DMAs with more than 32 bits
3102                  * of address on platforms with physical memory > 4G...
3103                  */
3104                 vm_page_queue_enter_first(&vm_lopage_queue_free,
3105                                           mem,
3106                                           vm_page_t,
3107                                           pageq);
3108                 vm_lopage_free_count++;
3109
3110                 if (vm_lopage_free_count >= vm_lopage_free_limit)
3111                         vm_lopage_refill = FALSE;
3112
3113                 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3114                 mem->lopage = TRUE;
3115 #if CONFIG_SECLUDED_MEMORY
3116         } else if (vm_page_free_count > vm_page_free_reserved &&
3117                    vm_page_secluded_count < vm_page_secluded_target &&
3118                    num_tasks_can_use_secluded_mem == 0) {
3119                 /*
3120                  * XXX FBDP TODO: also avoid refilling secluded queue
3121                  * when some IOKit objects are already grabbing from it...
3122                  */
3123                 if (!page_queues_locked) {
3124                         if (!vm_page_trylock_queues()) {
3125                                 /* take locks in right order */
3126                                 lck_mtx_unlock(&vm_page_queue_free_lock);
3127                                 vm_page_lock_queues();
3128                                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3129                         }
3130                 }
3131                 mem->lopage = FALSE;
3132                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3133                 vm_page_queue_enter_first(&vm_page_queue_secluded,
3134                                           mem,
3135                                           vm_page_t,
3136                                           pageq);
3137                 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3138                 vm_page_secluded_count++;
3139                 vm_page_secluded_count_free++;
3140                 if (!page_queues_locked) {
3141                         vm_page_unlock_queues();
3142                 }
3143                 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3144                 if (vm_page_free_wanted_secluded > 0) {
3145                         vm_page_free_wanted_secluded--;
3146                         need_secluded_wakeup = 1;
3147                 }
3148 #endif /* CONFIG_SECLUDED_MEMORY */
3149         } else {
3150                 mem->lopage = FALSE;
3151                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3152
3153                 color = VM_PAGE_GET_COLOR(mem);
3154 #if defined(__x86_64__)
3155                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3156                                           mem,
3157                                           vm_page_t,
3158                                           pageq);
3159 #else
3160                 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3161                                           mem,
3162                                           vm_page_t,
3163                                           pageq);
3164 #endif
3165                 vm_page_free_count++;
3166                 /*
3167                  *      Check if we should wake up someone waiting for page.
3168                  *      But don't bother waking them unless they can allocate.
3169                  *
3170                  *      We wakeup only one thread, to prevent starvation.
3171                  *      Because the scheduling system handles wait queues FIFO,
3172                  *      if we wakeup all waiting threads, one greedy thread
3173                  *      can starve multiple niceguy threads.  When the threads
3174                  *      all wakeup, the greedy threads runs first, grabs the page,
3175                  *      and waits for another page.  It will be the first to run
3176                  *      when the next page is freed.
3177                  *
3178                  *      However, there is a slight danger here.
3179                  *      The thread we wake might not use the free page.
3180                  *      Then the other threads could wait indefinitely
3181                  *      while the page goes unused.  To forestall this,
3182                  *      the pageout daemon will keep making free pages
3183                  *      as long as vm_page_free_wanted is non-zero.
3184                  */
3185
3186                 assert(vm_page_free_count > 0);
3187                 if (vm_page_free_wanted_privileged > 0) {
3188                         vm_page_free_wanted_privileged--;
3189                         need_priv_wakeup = 1;
3190 #if CONFIG_SECLUDED_MEMORY
3191                 } else if (vm_page_free_wanted_secluded > 0 &&
3192                            vm_page_free_count > vm_page_free_reserved) {
3193                         vm_page_free_wanted_secluded--;
3194                         need_secluded_wakeup = 1;
3195 #endif /* CONFIG_SECLUDED_MEMORY */
3196                 } else if (vm_page_free_wanted > 0 &&
3197                            vm_page_free_count > vm_page_free_reserved) {
3198                         vm_page_free_wanted--;
3199                         need_wakeup = 1;
3200                 }
3201         }
3202         lck_mtx_unlock(&vm_page_queue_free_lock);
3203
3204         if (need_priv_wakeup)
3205                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
3206 #if CONFIG_SECLUDED_MEMORY
3207         else if (need_secluded_wakeup)
3208                 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3209 #endif /* CONFIG_SECLUDED_MEMORY */
3210         else if (need_wakeup)
3211                 thread_wakeup_one((event_t) &vm_page_free_count);
3212
3213         VM_CHECK_MEMORYSTATUS;
3214 }
3215
3216 /*
3217  * This version of vm_page_release() is used only at startup
3218  * when we are single-threaded and pages are being released
3219  * for the first time. Hence, no locking or unnecessary checks are made.
3220  * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3221  */
3222 void
3223 vm_page_release_startup(
3224         vm_page_t       mem)
3225 {
3226         vm_page_queue_t queue_free;
3227
3228         if (vm_lopage_free_count < vm_lopage_free_limit &&
3229             VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3230                 mem->lopage = TRUE;
3231                 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3232                 vm_lopage_free_count++;
3233                 queue_free = &vm_lopage_queue_free;
3234 #if CONFIG_SECLUDED_MEMORY
3235         } else if (vm_page_secluded_count < vm_page_secluded_target) {
3236                 mem->lopage = FALSE;
3237                 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3238                 vm_page_secluded_count++;
3239                 vm_page_secluded_count_free++;
3240                 queue_free = &vm_page_queue_secluded;
3241 #endif /* CONFIG_SECLUDED_MEMORY */
3242         } else {
3243                 mem->lopage = FALSE;
3244                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3245                 vm_page_free_count++;
3246                 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
3247         }
3248         if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
3249 #if defined(__x86_64__)
3250                 vm_page_queue_enter_clump(queue_free, mem, vm_page_t, pageq);
3251 #else
3252                 vm_page_queue_enter(queue_free, mem, vm_page_t, pageq);
3253 #endif
3254         } else
3255                 vm_page_queue_enter_first(queue_free, mem, vm_page_t, pageq);
3256 }
3257
3258 /*
3259  *      vm_page_wait:
3260  *
3261  *      Wait for a page to become available.
3262  *      If there are plenty of free pages, then we don't sleep.
3263  *
3264  *      Returns:
3265  *              TRUE:  There may be another page, try again
3266  *              FALSE: We were interrupted out of our wait, don't try again
3267  */
3268
3269 boolean_t
3270 vm_page_wait(
3271         int     interruptible )
3272 {
3273         /*
3274          *      We can't use vm_page_free_reserved to make this
3275          *      determination.  Consider: some thread might
3276          *      need to allocate two pages.  The first allocation
3277          *      succeeds, the second fails.  After the first page is freed,
3278          *      a call to vm_page_wait must really block.
3279          */
3280         kern_return_t   wait_result;
3281         int             need_wakeup = 0;
3282         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3283
3284         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3285
3286         if (is_privileged && vm_page_free_count) {
3287                 lck_mtx_unlock(&vm_page_queue_free_lock);
3288                 return TRUE;
3289         }
3290
3291         if (vm_page_free_count >= vm_page_free_target) {
3292                 lck_mtx_unlock(&vm_page_queue_free_lock);
3293                 return TRUE;
3294         }
3295
3296         if (is_privileged) {
3297                 if (vm_page_free_wanted_privileged++ == 0)
3298                         need_wakeup = 1;
3299                 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3300 #if CONFIG_SECLUDED_MEMORY
3301         } else if (secluded_for_apps &&
3302                    task_can_use_secluded_mem(current_task())) {
3303 #if 00
3304                 /* XXX FBDP: need pageq lock for this... */
3305                 /* XXX FBDP: might wait even if pages available, */
3306                 /* XXX FBDP: hopefully not for too long... */
3307                 if (vm_page_secluded_count > 0) {
3308                         lck_mtx_unlock(&vm_page_queue_free_lock);
3309                         return TRUE;
3310                 }
3311 #endif
3312                 if (vm_page_free_wanted_secluded++ == 0) {
3313                         need_wakeup = 1;
3314                 }
3315                 wait_result = assert_wait(
3316                         (event_t)&vm_page_free_wanted_secluded,
3317                         interruptible);
3318 #endif /* CONFIG_SECLUDED_MEMORY */
3319         } else {
3320                 if (vm_page_free_wanted++ == 0)
3321                         need_wakeup = 1;
3322                 wait_result = assert_wait((event_t)&vm_page_free_count,
3323                                           interruptible);
3324         }
3325         lck_mtx_unlock(&vm_page_queue_free_lock);
3326         counter(c_vm_page_wait_block++);
3327
3328         if (need_wakeup)
3329                 thread_wakeup((event_t)&vm_page_free_wanted);
3330
3331         if (wait_result == THREAD_WAITING) {
3332                 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3333                                vm_page_free_wanted_privileged,
3334                                vm_page_free_wanted,
3335 #if CONFIG_SECLUDED_MEMORY
3336                                vm_page_free_wanted_secluded,
3337 #else /* CONFIG_SECLUDED_MEMORY */
3338                                0,
3339 #endif /* CONFIG_SECLUDED_MEMORY */
3340                                0);
3341                 wait_result = thread_block(THREAD_CONTINUE_NULL);
3342                 VM_DEBUG_EVENT(vm_page_wait_block,
3343                                VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3344         }
3345
3346         return (wait_result == THREAD_AWAKENED);
3347 }
3348
3349 /*
3350  *      vm_page_alloc:
3351  *
3352  *      Allocate and return a memory cell associated
3353  *      with this VM object/offset pair.
3354  *
3355  *      Object must be locked.
3356  */
3357
3358 vm_page_t
3359 vm_page_alloc(
3360         vm_object_t             object,
3361         vm_object_offset_t      offset)
3362 {
3363         vm_page_t       mem;
3364         int             grab_options;
3365
3366         vm_object_lock_assert_exclusive(object);
3367         grab_options = 0;
3368 #if CONFIG_SECLUDED_MEMORY
3369         if (object->can_grab_secluded) {
3370                 grab_options |= VM_PAGE_GRAB_SECLUDED;
3371         }
3372 #endif /* CONFIG_SECLUDED_MEMORY */
3373         mem = vm_page_grab_options(grab_options);
3374         if (mem == VM_PAGE_NULL)
3375                 return VM_PAGE_NULL;
3376
3377         vm_page_insert(mem, object, offset);
3378
3379         return(mem);
3380 }
3381
3382 /*
3383  *      vm_page_alloc_guard:
3384  *
3385  *      Allocate a fictitious page which will be used
3386  *      as a guard page.  The page will be inserted into
3387  *      the object and returned to the caller.
3388  */
3389
3390 vm_page_t
3391 vm_page_alloc_guard(
3392         vm_object_t             object,
3393         vm_object_offset_t      offset)
3394 {
3395         vm_page_t       mem;
3396
3397         vm_object_lock_assert_exclusive(object);
3398         mem = vm_page_grab_guard();
3399         if (mem == VM_PAGE_NULL)
3400                 return VM_PAGE_NULL;
3401
3402         vm_page_insert(mem, object, offset);
3403
3404         return(mem);
3405 }
3406
3407
3408 counter(unsigned int c_laundry_pages_freed = 0;)
3409
3410 /*
3411  *      vm_page_free_prepare:
3412  *
3413  *      Removes page from any queue it may be on
3414  *      and disassociates it from its VM object.
3415  *
3416  *      Object and page queues must be locked prior to entry.
3417  */
3418 static void
3419 vm_page_free_prepare(
3420         vm_page_t       mem)
3421 {
3422         vm_page_free_prepare_queues(mem);
3423         vm_page_free_prepare_object(mem, TRUE);
3424 }
3425
3426
3427 void
3428 vm_page_free_prepare_queues(
3429         vm_page_t       mem)
3430 {
3431         vm_object_t     m_object;
3432
3433         VM_PAGE_CHECK(mem);
3434
3435         assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
3436         assert(!mem->cleaning);
3437         m_object = VM_PAGE_OBJECT(mem);
3438
3439         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3440         if (m_object) {
3441                 vm_object_lock_assert_exclusive(m_object);
3442         }
3443         if (mem->laundry) {
3444                 /*
3445                  * We may have to free a page while it's being laundered
3446                  * if we lost its pager (due to a forced unmount, for example).
3447                  * We need to call vm_pageout_steal_laundry() before removing
3448                  * the page from its VM object, so that we can remove it
3449                  * from its pageout queue and adjust the laundry accounting
3450                  */
3451                 vm_pageout_steal_laundry(mem, TRUE);
3452                 counter(++c_laundry_pages_freed);
3453         }
3454
3455         vm_page_queues_remove(mem, TRUE);
3456
3457         if (VM_PAGE_WIRED(mem)) {
3458                 assert(mem->wire_count > 0);
3459
3460                 if (m_object) {
3461
3462                         VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3463                         VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3464                         VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3465
3466                         assert(m_object->resident_page_count >=
3467                                m_object->wired_page_count);
3468
3469                         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3470                                 OSAddAtomic(+1, &vm_page_purgeable_count);
3471                                 assert(vm_page_purgeable_wired_count > 0);
3472                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3473                         }
3474                         if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3475                              m_object->purgable == VM_PURGABLE_EMPTY) &&
3476                             m_object->vo_purgeable_owner != TASK_NULL) {
3477                                 task_t owner;
3478
3479                                 owner = m_object->vo_purgeable_owner;
3480                                 /*
3481                                  * While wired, this page was accounted
3482                                  * as "non-volatile" but it should now
3483                                  * be accounted as "volatile".
3484                                  */
3485                                 /* one less "non-volatile"... */
3486                                 ledger_debit(owner->ledger,
3487                                              task_ledgers.purgeable_nonvolatile,
3488                                              PAGE_SIZE);
3489                                 /* ... and "phys_footprint" */
3490                                 ledger_debit(owner->ledger,
3491                                              task_ledgers.phys_footprint,
3492                                              PAGE_SIZE);
3493                                 /* one more "volatile" */
3494                                 ledger_credit(owner->ledger,
3495                                               task_ledgers.purgeable_volatile,
3496                                               PAGE_SIZE);
3497                         }
3498                 }
3499                 if (!mem->private && !mem->fictitious)
3500                         vm_page_wire_count--;
3501
3502                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3503                 mem->wire_count = 0;
3504                 assert(!mem->gobbled);
3505         } else if (mem->gobbled) {
3506                 if (!mem->private && !mem->fictitious)
3507                         vm_page_wire_count--;
3508                 vm_page_gobble_count--;
3509         }
3510 }
3511
3512
3513 void
3514 vm_page_free_prepare_object(
3515         vm_page_t       mem,
3516         boolean_t       remove_from_hash)
3517 {
3518         if (mem->tabled)
3519                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
3520
3521         PAGE_WAKEUP(mem);               /* clears wanted */
3522
3523         if (mem->private) {
3524                 mem->private = FALSE;
3525                 mem->fictitious = TRUE;
3526                 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
3527         }
3528         if ( !mem->fictitious) {
3529                 assert(mem->pageq.next == 0);
3530                 assert(mem->pageq.prev == 0);
3531                 assert(mem->listq.next == 0);
3532                 assert(mem->listq.prev == 0);
3533 #if CONFIG_BACKGROUND_QUEUE
3534                 assert(mem->vm_page_backgroundq.next == 0);
3535                 assert(mem->vm_page_backgroundq.prev == 0);
3536 #endif /* CONFIG_BACKGROUND_QUEUE */
3537                 assert(mem->next_m == 0);
3538                 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->lopage);
3539         }
3540 }
3541
3542
3543 /*
3544  *      vm_page_free:
3545  *
3546  *      Returns the given page to the free list,
3547  *      disassociating it with any VM object.
3548  *
3549  *      Object and page queues must be locked prior to entry.
3550  */
3551 void
3552 vm_page_free(
3553         vm_page_t       mem)
3554 {
3555         vm_page_free_prepare(mem);
3556
3557         if (mem->fictitious) {
3558                 vm_page_release_fictitious(mem);
3559         } else {
3560                 vm_page_release(mem,
3561                                 TRUE); /* page queues are locked */
3562         }
3563 }
3564
3565
3566 void
3567 vm_page_free_unlocked(
3568         vm_page_t       mem,
3569         boolean_t       remove_from_hash)
3570 {
3571         vm_page_lockspin_queues();
3572         vm_page_free_prepare_queues(mem);
3573         vm_page_unlock_queues();
3574
3575         vm_page_free_prepare_object(mem, remove_from_hash);
3576
3577         if (mem->fictitious) {
3578                 vm_page_release_fictitious(mem);
3579         } else {
3580                 vm_page_release(mem, FALSE); /* page queues are not locked */
3581         }
3582 }
3583
3584
3585 /*
3586  * Free a list of pages.  The list can be up to several hundred pages,
3587  * as blocked up by vm_pageout_scan().
3588  * The big win is not having to take the free list lock once
3589  * per page.
3590  *
3591  * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3592  * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
3593  */
3594 void
3595 vm_page_free_list(
3596         vm_page_t       freeq,
3597         boolean_t       prepare_object)
3598 {
3599         vm_page_t       mem;
3600         vm_page_t       nxt;
3601         vm_page_t       local_freeq;
3602         int             pg_count;
3603
3604         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3605         LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
3606
3607         while (freeq) {
3608
3609                 pg_count = 0;
3610                 local_freeq = VM_PAGE_NULL;
3611                 mem = freeq;
3612
3613                 /*
3614                  * break up the processing into smaller chunks so
3615                  * that we can 'pipeline' the pages onto the
3616                  * free list w/o introducing too much
3617                  * contention on the global free queue lock
3618                  */
3619                 while (mem && pg_count < 64) {
3620
3621                         assert((mem->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
3622                                (mem->vm_page_q_state == VM_PAGE_IS_WIRED));
3623 #if CONFIG_BACKGROUND_QUEUE
3624                         assert(mem->vm_page_backgroundq.next == 0 &&
3625                                mem->vm_page_backgroundq.prev == 0 &&
3626                                mem->vm_page_on_backgroundq == FALSE);
3627 #endif
3628                         nxt = mem->snext;
3629                         mem->snext = NULL;
3630                         assert(mem->pageq.prev == 0);
3631
3632                         if (vm_page_free_verify && !mem->fictitious && !mem->private) {
3633                                 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3634                         }
3635                         if (prepare_object == TRUE)
3636                                 vm_page_free_prepare_object(mem, TRUE);
3637
3638                         if (!mem->fictitious) {
3639                                 assert(mem->busy);
3640
3641                                 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3642                                     vm_lopage_free_count < vm_lopage_free_limit &&
3643                                     VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3644                                         vm_page_release(mem, FALSE); /* page queues are not locked */
3645 #if CONFIG_SECLUDED_MEMORY
3646                                 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3647                                            num_tasks_can_use_secluded_mem == 0) {
3648                                         vm_page_release(mem,
3649                                                         FALSE); /* page queues are not locked */
3650 #endif /* CONFIG_SECLUDED_MEMORY */
3651                                 } else {
3652                                         /*
3653                                          * IMPORTANT: we can't set the page "free" here
3654                                          * because that would make the page eligible for
3655                                          * a physically-contiguous allocation (see
3656                                          * vm_page_find_contiguous()) right away (we don't
3657                                          * hold the vm_page_queue_free lock).  That would
3658                                          * cause trouble because the page is not actually
3659                                          * in the free queue yet...
3660                                          */
3661                                         mem->snext = local_freeq;
3662                                         local_freeq = mem;
3663                                         pg_count++;
3664
3665                                         pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3666                                 }
3667                         } else {
3668                                 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3669                                        VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
3670                                 vm_page_release_fictitious(mem);
3671                         }
3672                         mem = nxt;
3673                 }
3674                 freeq = mem;
3675
3676                 if ( (mem = local_freeq) ) {
3677                         unsigned int    avail_free_count;
3678                         unsigned int    need_wakeup = 0;
3679                         unsigned int    need_priv_wakeup = 0;
3680 #if CONFIG_SECLUDED_MEMORY
3681                         unsigned int    need_wakeup_secluded = 0;
3682 #endif /* CONFIG_SECLUDED_MEMORY */
3683
3684                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
3685
3686                         while (mem) {
3687                                 int     color;
3688
3689                                 nxt = mem->snext;
3690
3691                                 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3692                                 assert(mem->busy);
3693                                 mem->lopage = FALSE;
3694                                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3695
3696                                 color = VM_PAGE_GET_COLOR(mem);
3697 #if defined(__x86_64__)
3698                                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3699                                                           mem,
3700                                                           vm_page_t,
3701                                                           pageq);
3702 #else
3703                                 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3704                                                           mem,
3705                                                           vm_page_t,
3706                                                           pageq);
3707 #endif
3708                                 mem = nxt;
3709                         }
3710                         vm_page_free_count += pg_count;
3711                         avail_free_count = vm_page_free_count;
3712
3713                         if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3714
3715                                 if (avail_free_count < vm_page_free_wanted_privileged) {
3716                                         need_priv_wakeup = avail_free_count;
3717                                         vm_page_free_wanted_privileged -= avail_free_count;
3718                                         avail_free_count = 0;
3719                                 } else {
3720                                         need_priv_wakeup = vm_page_free_wanted_privileged;
3721                                         avail_free_count -= vm_page_free_wanted_privileged;
3722                                         vm_page_free_wanted_privileged = 0;
3723                                 }
3724                         }
3725 #if CONFIG_SECLUDED_MEMORY
3726                         if (vm_page_free_wanted_secluded > 0 &&
3727                             avail_free_count > vm_page_free_reserved) {
3728                                 unsigned int available_pages;
3729                                 available_pages = (avail_free_count -
3730                                                    vm_page_free_reserved);
3731                                 if (available_pages <
3732                                     vm_page_free_wanted_secluded) {
3733                                         need_wakeup_secluded = available_pages;
3734                                         vm_page_free_wanted_secluded -=
3735                                                 available_pages;
3736                                         avail_free_count -= available_pages;
3737                                 } else {
3738                                         need_wakeup_secluded =
3739                                                 vm_page_free_wanted_secluded;
3740                                         avail_free_count -=
3741                                                 vm_page_free_wanted_secluded;
3742                                         vm_page_free_wanted_secluded = 0;
3743                                 }
3744                         }
3745 #endif /* CONFIG_SECLUDED_MEMORY */
3746                         if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3747                                 unsigned int  available_pages;
3748
3749                                 available_pages = avail_free_count - vm_page_free_reserved;
3750
3751                                 if (available_pages >= vm_page_free_wanted) {
3752                                         need_wakeup = vm_page_free_wanted;
3753                                         vm_page_free_wanted = 0;
3754                                 } else {
3755                                         need_wakeup = available_pages;
3756                                         vm_page_free_wanted -= available_pages;
3757                                 }
3758                         }
3759                         lck_mtx_unlock(&vm_page_queue_free_lock);
3760
3761                         if (need_priv_wakeup != 0) {
3762                                 /*
3763                                  * There shouldn't be that many VM-privileged threads,
3764                                  * so let's wake them all up, even if we don't quite
3765                                  * have enough pages to satisfy them all.
3766                                  */
3767                                 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3768                         }
3769 #if CONFIG_SECLUDED_MEMORY
3770                         if (need_wakeup_secluded != 0 &&
3771                             vm_page_free_wanted_secluded == 0) {
3772                                 thread_wakeup((event_t)
3773                                               &vm_page_free_wanted_secluded);
3774                         } else {
3775                                 for (;
3776                                      need_wakeup_secluded != 0;
3777                                      need_wakeup_secluded--) {
3778                                         thread_wakeup_one(
3779                                                 (event_t)
3780                                                 &vm_page_free_wanted_secluded);
3781                                 }
3782                         }
3783 #endif /* CONFIG_SECLUDED_MEMORY */
3784                         if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3785                                 /*
3786                                  * We don't expect to have any more waiters
3787                                  * after this, so let's wake them all up at
3788                                  * once.
3789                                  */
3790                                 thread_wakeup((event_t) &vm_page_free_count);
3791                         } else for (; need_wakeup != 0; need_wakeup--) {
3792                                 /*
3793                                  * Wake up one waiter per page we just released.
3794                                  */
3795                                 thread_wakeup_one((event_t) &vm_page_free_count);
3796                         }
3797
3798                         VM_CHECK_MEMORYSTATUS;
3799                 }
3800         }
3801 }
3802
3803
3804 /*
3805  *      vm_page_wire:
3806  *
3807  *      Mark this page as wired down by yet
3808  *      another map, removing it from paging queues
3809  *      as necessary.
3810  *
3811  *      The page's object and the page queues must be locked.
3812  */
3813
3814
3815 void
3816 vm_page_wire(
3817         vm_page_t mem,
3818         vm_tag_t           tag,
3819         boolean_t          check_memorystatus)
3820 {
3821         vm_object_t     m_object;
3822
3823         m_object = VM_PAGE_OBJECT(mem);
3824
3825 //      dbgLog(current_thread(), mem->offset, m_object, 1);     /* (TEST/DEBUG) */
3826
3827         VM_PAGE_CHECK(mem);
3828         if (m_object) {
3829                 vm_object_lock_assert_exclusive(m_object);
3830         } else {
3831                 /*
3832                  * In theory, the page should be in an object before it
3833                  * gets wired, since we need to hold the object lock
3834                  * to update some fields in the page structure.
3835                  * However, some code (i386 pmap, for example) might want
3836                  * to wire a page before it gets inserted into an object.
3837                  * That's somewhat OK, as long as nobody else can get to
3838                  * that page and update it at the same time.
3839                  */
3840         }
3841         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3842         if ( !VM_PAGE_WIRED(mem)) {
3843
3844                 if (mem->laundry)
3845                         vm_pageout_steal_laundry(mem, TRUE);
3846
3847                 vm_page_queues_remove(mem, TRUE);
3848
3849                 assert(mem->wire_count == 0);
3850                 mem->vm_page_q_state = VM_PAGE_IS_WIRED;
3851
3852                 if (m_object) {
3853
3854                         VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3855                         VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
3856                         VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
3857
3858                         assert(m_object->resident_page_count >=
3859                                m_object->wired_page_count);
3860                         if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3861                                 assert(vm_page_purgeable_count > 0);
3862                                 OSAddAtomic(-1, &vm_page_purgeable_count);
3863                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3864                         }
3865                         if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3866                              m_object->purgable == VM_PURGABLE_EMPTY) &&
3867                             m_object->vo_purgeable_owner != TASK_NULL) {
3868                                 task_t owner;
3869
3870                                 owner = m_object->vo_purgeable_owner;
3871                                 /* less volatile bytes */
3872                                 ledger_debit(owner->ledger,
3873                                              task_ledgers.purgeable_volatile,
3874                                              PAGE_SIZE);
3875                                 /* more not-quite-volatile bytes */
3876                                 ledger_credit(owner->ledger,
3877                                               task_ledgers.purgeable_nonvolatile,
3878                                               PAGE_SIZE);
3879                                 /* more footprint */
3880                                 ledger_credit(owner->ledger,
3881                                               task_ledgers.phys_footprint,
3882                                               PAGE_SIZE);
3883                         }
3884                         if (m_object->all_reusable) {
3885                                 /*
3886                                  * Wired pages are not counted as "re-usable"
3887                                  * in "all_reusable" VM objects, so nothing
3888                                  * to do here.
3889                                  */
3890                         } else if (mem->reusable) {
3891                                 /*
3892                                  * This page is not "re-usable" when it's
3893                                  * wired, so adjust its state and the
3894                                  * accounting.
3895                                  */
3896                                 vm_object_reuse_pages(m_object,
3897                                                       mem->offset,
3898                                                       mem->offset+PAGE_SIZE_64,
3899                                                       FALSE);
3900                         }
3901                 }
3902                 assert(!mem->reusable);
3903
3904                 if (!mem->private && !mem->fictitious && !mem->gobbled)
3905                         vm_page_wire_count++;
3906                 if (mem->gobbled)
3907                         vm_page_gobble_count--;
3908                 mem->gobbled = FALSE;
3909
3910                 if (check_memorystatus == TRUE) {
3911                         VM_CHECK_MEMORYSTATUS;
3912                 }
3913         }
3914         assert(!mem->gobbled);
3915         assert(mem->vm_page_q_state == VM_PAGE_IS_WIRED);
3916         mem->wire_count++;
3917         if (__improbable(mem->wire_count == 0)) {
3918                 panic("vm_page_wire(%p): wire_count overflow", mem);
3919         }
3920         VM_PAGE_CHECK(mem);
3921 }
3922
3923 /*
3924  *      vm_page_unwire:
3925  *
3926  *      Release one wiring of this page, potentially
3927  *      enabling it to be paged again.
3928  *
3929  *      The page's object and the page queues must be locked.
3930  */
3931 void
3932 vm_page_unwire(
3933         vm_page_t       mem,
3934         boolean_t       queueit)
3935 {
3936         vm_object_t     m_object;
3937
3938         m_object = VM_PAGE_OBJECT(mem);
3939
3940 //      dbgLog(current_thread(), mem->offset, m_object, 0);     /* (TEST/DEBUG) */
3941
3942         VM_PAGE_CHECK(mem);
3943         assert(VM_PAGE_WIRED(mem));
3944         assert(mem->wire_count > 0);
3945         assert(!mem->gobbled);
3946         assert(m_object != VM_OBJECT_NULL);
3947         vm_object_lock_assert_exclusive(m_object);
3948         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3949         if (--mem->wire_count == 0) {
3950
3951                 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3952
3953                 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3954                 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3955                 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3956                 if (!mem->private && !mem->fictitious) {
3957                         vm_page_wire_count--;
3958                 }
3959
3960                 assert(m_object->resident_page_count >=
3961                        m_object->wired_page_count);
3962                 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3963                         OSAddAtomic(+1, &vm_page_purgeable_count);
3964                         assert(vm_page_purgeable_wired_count > 0);
3965                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3966                 }
3967                 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3968                      m_object->purgable == VM_PURGABLE_EMPTY) &&
3969                     m_object->vo_purgeable_owner != TASK_NULL) {
3970                         task_t owner;
3971
3972                         owner = m_object->vo_purgeable_owner;
3973                         /* more volatile bytes */
3974                         ledger_credit(owner->ledger,
3975                                       task_ledgers.purgeable_volatile,
3976                                       PAGE_SIZE);
3977                         /* less not-quite-volatile bytes */
3978                         ledger_debit(owner->ledger,
3979                                      task_ledgers.purgeable_nonvolatile,
3980                                      PAGE_SIZE);
3981                         /* less footprint */
3982                         ledger_debit(owner->ledger,
3983                                      task_ledgers.phys_footprint,
3984                                      PAGE_SIZE);
3985                 }
3986                 assert(m_object != kernel_object);
3987                 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3988
3989                 if (queueit == TRUE) {
3990                         if (m_object->purgable == VM_PURGABLE_EMPTY) {
3991                                 vm_page_deactivate(mem);
3992                         } else {
3993                                 vm_page_activate(mem);
3994                         }
3995                 }
3996
3997                 VM_CHECK_MEMORYSTATUS;
3998
3999         }
4000         VM_PAGE_CHECK(mem);
4001 }
4002
4003 /*
4004  *      vm_page_deactivate:
4005  *
4006  *      Returns the given page to the inactive list,
4007  *      indicating that no physical maps have access
4008  *      to this page.  [Used by the physical mapping system.]
4009  *
4010  *      The page queues must be locked.
4011  */
4012 void
4013 vm_page_deactivate(
4014         vm_page_t       m)
4015 {
4016         vm_page_deactivate_internal(m, TRUE);
4017 }
4018
4019
4020 void
4021 vm_page_deactivate_internal(
4022         vm_page_t       m,
4023         boolean_t       clear_hw_reference)
4024 {
4025         vm_object_t     m_object;
4026
4027         m_object = VM_PAGE_OBJECT(m);
4028
4029         VM_PAGE_CHECK(m);
4030         assert(m_object != kernel_object);
4031         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4032
4033 //      dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6);    /* (TEST/DEBUG) */
4034         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4035         /*
4036          *      This page is no longer very interesting.  If it was
4037          *      interesting (active or inactive/referenced), then we
4038          *      clear the reference bit and (re)enter it in the
4039          *      inactive queue.  Note wired pages should not have
4040          *      their reference bit cleared.
4041          */
4042         assert ( !(m->absent && !m->unusual));
4043
4044         if (m->gobbled) {               /* can this happen? */
4045                 assert( !VM_PAGE_WIRED(m));
4046
4047                 if (!m->private && !m->fictitious)
4048                         vm_page_wire_count--;
4049                 vm_page_gobble_count--;
4050                 m->gobbled = FALSE;
4051         }
4052         /*
4053          * if this page is currently on the pageout queue, we can't do the
4054          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4055          * and we can't remove it manually since we would need the object lock
4056          * (which is not required here) to decrement the activity_in_progress
4057          * reference which is held on the object while the page is in the pageout queue...
4058          * just let the normal laundry processing proceed
4059          */
4060         if (m->laundry || m->private || m->fictitious ||
4061             (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4062             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4063             VM_PAGE_WIRED(m)) {
4064                 return;
4065         }
4066         if (!m->absent && clear_hw_reference == TRUE)
4067                 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
4068
4069         m->reference = FALSE;
4070         m->no_cache = FALSE;
4071
4072         if ( !VM_PAGE_INACTIVE(m)) {
4073                 vm_page_queues_remove(m, FALSE);
4074
4075                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4076                     m->dirty && m_object->internal &&
4077                     (m_object->purgable == VM_PURGABLE_DENY ||
4078                      m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4079                      m_object->purgable == VM_PURGABLE_VOLATILE)) {
4080                         vm_page_check_pageable_safe(m);
4081                         vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4082                         m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4083                         vm_page_throttled_count++;
4084                 } else {
4085                         if (m_object->named && m_object->ref_count == 1) {
4086                                 vm_page_speculate(m, FALSE);
4087 #if DEVELOPMENT || DEBUG
4088                                 vm_page_speculative_recreated++;
4089 #endif
4090                         } else {
4091                                 vm_page_enqueue_inactive(m, FALSE);
4092                         }
4093                 }
4094         }
4095 }
4096
4097 /*
4098  * vm_page_enqueue_cleaned
4099  *
4100  * Put the page on the cleaned queue, mark it cleaned, etc.
4101  * Being on the cleaned queue (and having m->clean_queue set)
4102  * does ** NOT ** guarantee that the page is clean!
4103  *
4104  * Call with the queues lock held.
4105  */
4106
4107 void vm_page_enqueue_cleaned(vm_page_t m)
4108 {
4109         vm_object_t     m_object;
4110
4111         m_object = VM_PAGE_OBJECT(m);
4112
4113         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4114         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4115         assert( !(m->absent && !m->unusual));
4116
4117         if (VM_PAGE_WIRED(m)) {
4118                 return;
4119         }
4120
4121         if (m->gobbled) {
4122                 if (!m->private && !m->fictitious)
4123                         vm_page_wire_count--;
4124                 vm_page_gobble_count--;
4125                 m->gobbled = FALSE;
4126         }
4127         /*
4128          * if this page is currently on the pageout queue, we can't do the
4129          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4130          * and we can't remove it manually since we would need the object lock
4131          * (which is not required here) to decrement the activity_in_progress
4132          * reference which is held on the object while the page is in the pageout queue...
4133          * just let the normal laundry processing proceed
4134          */
4135         if (m->laundry || m->private || m->fictitious ||
4136             (m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4137             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4138                     return;
4139         }
4140         vm_page_queues_remove(m, FALSE);
4141
4142         vm_page_check_pageable_safe(m);
4143         vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
4144         m->vm_page_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
4145         vm_page_cleaned_count++;
4146
4147         vm_page_inactive_count++;
4148         if (m_object->internal) {
4149                 vm_page_pageable_internal_count++;
4150         } else {
4151                 vm_page_pageable_external_count++;
4152         }
4153 #if CONFIG_BACKGROUND_QUEUE
4154         if (m->vm_page_in_background)
4155                 vm_page_add_to_backgroundq(m, TRUE);
4156 #endif
4157         vm_pageout_enqueued_cleaned++;
4158 }
4159
4160 /*
4161  *      vm_page_activate:
4162  *
4163  *      Put the specified page on the active list (if appropriate).
4164  *
4165  *      The page queues must be locked.
4166  */
4167
4168 void
4169 vm_page_activate(
4170         vm_page_t       m)
4171 {
4172         vm_object_t     m_object;
4173
4174         m_object = VM_PAGE_OBJECT(m);
4175
4176         VM_PAGE_CHECK(m);
4177 #ifdef  FIXME_4778297
4178         assert(m_object != kernel_object);
4179 #endif
4180         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4181         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4182         assert( !(m->absent && !m->unusual));
4183
4184         if (m->gobbled) {
4185                 assert( !VM_PAGE_WIRED(m));
4186                 if (!m->private && !m->fictitious)
4187                         vm_page_wire_count--;
4188                 vm_page_gobble_count--;
4189                 m->gobbled = FALSE;
4190         }
4191         /*
4192          * if this page is currently on the pageout queue, we can't do the
4193          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4194          * and we can't remove it manually since we would need the object lock
4195          * (which is not required here) to decrement the activity_in_progress
4196          * reference which is held on the object while the page is in the pageout queue...
4197          * just let the normal laundry processing proceed
4198          */
4199         if (m->laundry || m->private || m->fictitious ||
4200             (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4201             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4202                 return;
4203
4204 #if DEBUG
4205         if (m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q)
4206                 panic("vm_page_activate: already active");
4207 #endif
4208
4209         if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
4210                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4211                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4212         }
4213
4214         vm_page_queues_remove(m, FALSE);
4215
4216         if ( !VM_PAGE_WIRED(m)) {
4217                 vm_page_check_pageable_safe(m);
4218                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4219                     m->dirty && m_object->internal &&
4220                     (m_object->purgable == VM_PURGABLE_DENY ||
4221                      m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4222                      m_object->purgable == VM_PURGABLE_VOLATILE)) {
4223                         vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4224                         m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4225                         vm_page_throttled_count++;
4226                 } else {
4227 #if CONFIG_SECLUDED_MEMORY
4228                         if (secluded_for_filecache &&
4229                             vm_page_secluded_target != 0 &&
4230                             num_tasks_can_use_secluded_mem == 0 &&
4231                             m_object->eligible_for_secluded) {
4232                                 vm_page_queue_enter(&vm_page_queue_secluded, m,
4233                                                     vm_page_t, pageq);
4234                                 m->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
4235                                 vm_page_secluded_count++;
4236                                 vm_page_secluded_count_inuse++;
4237                                 assert(!m_object->internal);
4238 //                              vm_page_pageable_external_count++;
4239                         } else
4240 #endif /* CONFIG_SECLUDED_MEMORY */
4241                         vm_page_enqueue_active(m, FALSE);
4242                 }
4243                 m->reference = TRUE;
4244                 m->no_cache = FALSE;
4245         }
4246         VM_PAGE_CHECK(m);
4247 }
4248
4249
4250 /*
4251  *      vm_page_speculate:
4252  *
4253  *      Put the specified page on the speculative list (if appropriate).
4254  *
4255  *      The page queues must be locked.
4256  */
4257 void
4258 vm_page_speculate(
4259         vm_page_t       m,
4260         boolean_t       new)
4261 {
4262         struct vm_speculative_age_q     *aq;
4263         vm_object_t     m_object;
4264
4265         m_object = VM_PAGE_OBJECT(m);
4266
4267         VM_PAGE_CHECK(m);
4268         vm_page_check_pageable_safe(m);
4269
4270         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4271         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4272         assert( !(m->absent && !m->unusual));
4273         assert(m_object->internal == FALSE);
4274
4275         /*
4276          * if this page is currently on the pageout queue, we can't do the
4277          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4278          * and we can't remove it manually since we would need the object lock
4279          * (which is not required here) to decrement the activity_in_progress
4280          * reference which is held on the object while the page is in the pageout queue...
4281          * just let the normal laundry processing proceed
4282          */
4283         if (m->laundry || m->private || m->fictitious ||
4284             (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4285             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4286                 return;
4287
4288         vm_page_queues_remove(m, FALSE);
4289
4290         if ( !VM_PAGE_WIRED(m)) {
4291                 mach_timespec_t         ts;
4292                 clock_sec_t sec;
4293                 clock_nsec_t nsec;
4294
4295                 clock_get_system_nanotime(&sec, &nsec);
4296                 ts.tv_sec = (unsigned int) sec;
4297                 ts.tv_nsec = nsec;
4298
4299                 if (vm_page_speculative_count == 0) {
4300
4301                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4302                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4303
4304                         aq = &vm_page_queue_speculative[speculative_age_index];
4305
4306                         /*
4307                          * set the timer to begin a new group
4308                          */
4309                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4310                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4311
4312                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4313                 } else {
4314                         aq = &vm_page_queue_speculative[speculative_age_index];
4315
4316                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4317
4318                                 speculative_age_index++;
4319
4320                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4321                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4322                                 if (speculative_age_index == speculative_steal_index) {
4323                                         speculative_steal_index = speculative_age_index + 1;
4324
4325                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4326                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4327                                 }
4328                                 aq = &vm_page_queue_speculative[speculative_age_index];
4329
4330                                 if (!vm_page_queue_empty(&aq->age_q))
4331                                         vm_page_speculate_ageit(aq);
4332
4333                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4334                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4335
4336                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4337                         }
4338                 }
4339                 vm_page_enqueue_tail(&aq->age_q, &m->pageq);
4340                 m->vm_page_q_state = VM_PAGE_ON_SPECULATIVE_Q;
4341                 vm_page_speculative_count++;
4342                 vm_page_pageable_external_count++;
4343
4344                 if (new == TRUE) {
4345                         vm_object_lock_assert_exclusive(m_object);
4346
4347                         m_object->pages_created++;
4348 #if DEVELOPMENT || DEBUG
4349                         vm_page_speculative_created++;
4350 #endif
4351                 }
4352         }
4353         VM_PAGE_CHECK(m);
4354 }
4355
4356
4357 /*
4358  * move pages from the specified aging bin to
4359  * the speculative bin that pageout_scan claims from
4360  *
4361  *      The page queues must be locked.
4362  */
4363 void
4364 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4365 {
4366         struct vm_speculative_age_q     *sq;
4367         vm_page_t       t;
4368
4369         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4370
4371         if (vm_page_queue_empty(&sq->age_q)) {
4372                 sq->age_q.next = aq->age_q.next;
4373                 sq->age_q.prev = aq->age_q.prev;
4374
4375                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
4376                 t->pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
4377
4378                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4379                 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4380         } else {
4381                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4382                 t->pageq.next = aq->age_q.next;
4383
4384                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
4385                 t->pageq.prev = sq->age_q.prev;
4386
4387                 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
4388                 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4389
4390                 sq->age_q.prev = aq->age_q.prev;
4391         }
4392         vm_page_queue_init(&aq->age_q);
4393 }
4394
4395
4396 void
4397 vm_page_lru(
4398         vm_page_t       m)
4399 {
4400         VM_PAGE_CHECK(m);
4401         assert(VM_PAGE_OBJECT(m) != kernel_object);
4402         assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4403
4404         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4405         /*
4406          * if this page is currently on the pageout queue, we can't do the
4407          * vm_page_queues_remove (which doesn't handle the pageout queue case)
4408          * and we can't remove it manually since we would need the object lock
4409          * (which is not required here) to decrement the activity_in_progress
4410          * reference which is held on the object while the page is in the pageout queue...
4411          * just let the normal laundry processing proceed
4412          */
4413         if (m->laundry || m->private ||
4414             (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4415             (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4416             VM_PAGE_WIRED(m))
4417                 return;
4418
4419         m->no_cache = FALSE;
4420
4421         vm_page_queues_remove(m, FALSE);
4422
4423         vm_page_enqueue_inactive(m, FALSE);
4424 }
4425
4426
4427 void
4428 vm_page_reactivate_all_throttled(void)
4429 {
4430         vm_page_t       first_throttled, last_throttled;
4431         vm_page_t       first_active;
4432         vm_page_t       m;
4433         int             extra_active_count;
4434         int             extra_internal_count, extra_external_count;
4435         vm_object_t     m_object;
4436
4437         if (!VM_DYNAMIC_PAGING_ENABLED())
4438                 return;
4439
4440         extra_active_count = 0;
4441         extra_internal_count = 0;
4442         extra_external_count = 0;
4443         vm_page_lock_queues();
4444         if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
4445                 /*
4446                  * Switch "throttled" pages to "active".
4447                  */
4448                 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
4449                         VM_PAGE_CHECK(m);
4450                         assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
4451
4452                         m_object = VM_PAGE_OBJECT(m);
4453
4454                         extra_active_count++;
4455                         if (m_object->internal) {
4456                                 extra_internal_count++;
4457                         } else {
4458                                 extra_external_count++;
4459                         }
4460
4461                         m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4462                         VM_PAGE_CHECK(m);
4463 #if CONFIG_BACKGROUND_QUEUE
4464                         if (m->vm_page_in_background)
4465                                 vm_page_add_to_backgroundq(m, FALSE);
4466 #endif
4467                 }
4468
4469                 /*
4470                  * Transfer the entire throttled queue to a regular LRU page queues.
4471                  * We insert it at the head of the active queue, so that these pages
4472                  * get re-evaluated by the LRU algorithm first, since they've been
4473                  * completely out of it until now.
4474                  */
4475                 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4476                 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4477                 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4478                 if (vm_page_queue_empty(&vm_page_queue_active)) {
4479                         vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4480                 } else {
4481                         first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4482                 }
4483                 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
4484                 first_throttled->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4485                 last_throttled->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4486
4487 #if DEBUG
4488                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4489 #endif
4490                 vm_page_queue_init(&vm_page_queue_throttled);
4491                 /*
4492                  * Adjust the global page counts.
4493                  */
4494                 vm_page_active_count += extra_active_count;
4495                 vm_page_pageable_internal_count += extra_internal_count;
4496                 vm_page_pageable_external_count += extra_external_count;
4497                 vm_page_throttled_count = 0;
4498         }
4499         assert(vm_page_throttled_count == 0);
4500         assert(vm_page_queue_empty(&vm_page_queue_throttled));
4501         vm_page_unlock_queues();
4502 }
4503
4504
4505 /*
4506  * move pages from the indicated local queue to the global active queue
4507  * its ok to fail if we're below the hard limit and force == FALSE
4508  * the nolocks == TRUE case is to allow this function to be run on
4509  * the hibernate path
4510  */
4511
4512 void
4513 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4514 {
4515         struct vpl      *lq;
4516         vm_page_t       first_local, last_local;
4517         vm_page_t       first_active;
4518         vm_page_t       m;
4519         uint32_t        count = 0;
4520
4521         if (vm_page_local_q == NULL)
4522                 return;
4523
4524         lq = &vm_page_local_q[lid].vpl_un.vpl;
4525
4526         if (nolocks == FALSE) {
4527                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4528                         if ( !vm_page_trylockspin_queues())
4529                                 return;
4530                 } else
4531                         vm_page_lockspin_queues();
4532
4533                 VPL_LOCK(&lq->vpl_lock);
4534         }
4535         if (lq->vpl_count) {
4536                 /*
4537                  * Switch "local" pages to "active".
4538                  */
4539                 assert(!vm_page_queue_empty(&lq->vpl_queue));
4540
4541                 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
4542                         VM_PAGE_CHECK(m);
4543                         vm_page_check_pageable_safe(m);
4544                         assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
4545                         assert(!m->fictitious);
4546
4547                         if (m->local_id != lid)
4548                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4549
4550                         m->local_id = 0;
4551                         m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4552                         VM_PAGE_CHECK(m);
4553 #if CONFIG_BACKGROUND_QUEUE
4554                         if (m->vm_page_in_background)
4555                                 vm_page_add_to_backgroundq(m, FALSE);
4556 #endif
4557                         count++;
4558                 }
4559                 if (count != lq->vpl_count)
4560                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4561
4562                 /*
4563                  * Transfer the entire local queue to a regular LRU page queues.
4564                  */
4565                 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4566                 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4567                 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4568
4569                 if (vm_page_queue_empty(&vm_page_queue_active)) {
4570                         vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4571                 } else {
4572                         first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4573                 }
4574                 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
4575                 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4576                 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4577
4578                 vm_page_queue_init(&lq->vpl_queue);
4579                 /*
4580                  * Adjust the global page counts.
4581                  */
4582                 vm_page_active_count += lq->vpl_count;
4583                 vm_page_pageable_internal_count += lq->vpl_internal_count;
4584                 vm_page_pageable_external_count += lq->vpl_external_count;
4585                 lq->vpl_count = 0;
4586                 lq->vpl_internal_count = 0;
4587                 lq->vpl_external_count = 0;
4588         }
4589         assert(vm_page_queue_empty(&lq->vpl_queue));
4590
4591         if (nolocks == FALSE) {
4592                 VPL_UNLOCK(&lq->vpl_lock);
4593                 vm_page_unlock_queues();
4594         }
4595 }
4596
4597 /*
4598  *      vm_page_part_zero_fill:
4599  *
4600  *      Zero-fill a part of the page.
4601  */
4602 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
4603 void
4604 vm_page_part_zero_fill(
4605         vm_page_t       m,
4606         vm_offset_t     m_pa,
4607         vm_size_t       len)
4608 {
4609
4610 #if 0
4611         /*
4612          * we don't hold the page queue lock
4613          * so this check isn't safe to make
4614          */
4615         VM_PAGE_CHECK(m);
4616 #endif
4617
4618 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
4619         pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
4620 #else
4621         vm_page_t       tmp;
4622         while (1) {
4623                 tmp = vm_page_grab();
4624                 if (tmp == VM_PAGE_NULL) {
4625                         vm_page_wait(THREAD_UNINT);
4626                         continue;
4627                 }
4628                 break;
4629         }
4630         vm_page_zero_fill(tmp);
4631         if(m_pa != 0) {
4632                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4633         }
4634         if((m_pa + len) <  PAGE_SIZE) {
4635                 vm_page_part_copy(m, m_pa + len, tmp,
4636                                 m_pa + len, PAGE_SIZE - (m_pa + len));
4637         }
4638         vm_page_copy(tmp,m);
4639         VM_PAGE_FREE(tmp);
4640 #endif
4641
4642 }
4643
4644 /*
4645  *      vm_page_zero_fill:
4646  *
4647  *      Zero-fill the specified page.
4648  */
4649 void
4650 vm_page_zero_fill(
4651         vm_page_t       m)
4652 {
4653         XPR(XPR_VM_PAGE,
4654             "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4655             VM_PAGE_OBJECT(m), m->offset, m, 0,0);
4656 #if 0
4657         /*
4658          * we don't hold the page queue lock
4659          * so this check isn't safe to make
4660          */
4661         VM_PAGE_CHECK(m);
4662 #endif
4663
4664 //      dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0);              /* (BRINGUP) */
4665         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
4666 }
4667
4668 /*
4669  *      vm_page_part_copy:
4670  *
4671  *      copy part of one page to another
4672  */
4673
4674 void
4675 vm_page_part_copy(
4676         vm_page_t       src_m,
4677         vm_offset_t     src_pa,
4678         vm_page_t       dst_m,
4679         vm_offset_t     dst_pa,
4680         vm_size_t       len)
4681 {
4682 #if 0
4683         /*
4684          * we don't hold the page queue lock
4685          * so this check isn't safe to make
4686          */
4687         VM_PAGE_CHECK(src_m);
4688         VM_PAGE_CHECK(dst_m);
4689 #endif
4690         pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4691                             VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
4692 }
4693
4694 /*
4695  *      vm_page_copy:
4696  *
4697  *      Copy one page to another
4698  */
4699
4700 int vm_page_copy_cs_validations = 0;
4701 int vm_page_copy_cs_tainted = 0;
4702
4703 void
4704 vm_page_copy(
4705         vm_page_t       src_m,
4706         vm_page_t       dest_m)
4707 {
4708         vm_object_t     src_m_object;
4709
4710         src_m_object = VM_PAGE_OBJECT(src_m);
4711
4712         XPR(XPR_VM_PAGE,
4713             "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4714             src_m_object, src_m->offset,
4715             VM_PAGE_OBJECT(dest_m), dest_m->offset,
4716             0);
4717 #if 0
4718         /*
4719          * we don't hold the page queue lock
4720          * so this check isn't safe to make
4721          */
4722         VM_PAGE_CHECK(src_m);
4723         VM_PAGE_CHECK(dest_m);
4724 #endif
4725         vm_object_lock_assert_held(src_m_object);
4726
4727         if (src_m_object != VM_OBJECT_NULL &&
4728             src_m_object->code_signed) {
4729                 /*
4730                  * We're copying a page from a code-signed object.
4731                  * Whoever ends up mapping the copy page might care about
4732                  * the original page's integrity, so let's validate the
4733                  * source page now.
4734                  */
4735                 vm_page_copy_cs_validations++;
4736                 vm_page_validate_cs(src_m);
4737 #if DEVELOPMENT || DEBUG
4738                 DTRACE_VM4(codesigned_copy,
4739                            vm_object_t, src_m_object,
4740                            vm_object_offset_t, src_m->offset,
4741                            int, src_m->cs_validated,
4742                            int, src_m->cs_tainted);
4743 #endif /* DEVELOPMENT || DEBUG */
4744
4745         }
4746
4747         if (vm_page_is_slideable(src_m)) {
4748                 boolean_t was_busy = src_m->busy;
4749                 src_m->busy = TRUE;
4750                 (void) vm_page_slide(src_m, 0);
4751                 assert(src_m->busy);
4752                 if (!was_busy) {
4753                         PAGE_WAKEUP_DONE(src_m);
4754                 }
4755         }
4756
4757         /*
4758          * Propagate the cs_tainted bit to the copy page. Do not propagate
4759          * the cs_validated bit.
4760          */
4761         dest_m->cs_tainted = src_m->cs_tainted;
4762         if (dest_m->cs_tainted) {
4763                 vm_page_copy_cs_tainted++;
4764         }
4765         dest_m->slid = src_m->slid;
4766         dest_m->error = src_m->error; /* sliding src_m might have failed... */
4767         pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
4768 }
4769
4770 #if MACH_ASSERT
4771 static void
4772 _vm_page_print(
4773         vm_page_t       p)
4774 {
4775         printf("vm_page %p: \n", p);
4776         printf("  pageq: next=%p prev=%p\n",
4777                (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.next),
4778                (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.prev));
4779         printf("  listq: next=%p prev=%p\n",
4780                (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.next)),
4781                (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.prev)));
4782         printf("  next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m)));
4783         printf("  object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->offset);
4784         printf("  wire_count=%u\n", p->wire_count);
4785         printf("  q_state=%u\n", p->vm_page_q_state);
4786
4787         printf("  %slaundry, %sref, %sgobbled, %sprivate\n",
4788                (p->laundry ? "" : "!"),
4789                (p->reference ? "" : "!"),
4790                (p->gobbled ? "" : "!"),
4791                (p->private ? "" : "!"));
4792         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
4793                (p->busy ? "" : "!"),
4794                (p->wanted ? "" : "!"),
4795                (p->tabled ? "" : "!"),
4796                (p->fictitious ? "" : "!"),
4797                (p->pmapped ? "" : "!"),
4798                (p->wpmapped ? "" : "!"));
4799         printf("  %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4800                (p->free_when_done ? "" : "!"),
4801                (p->absent ? "" : "!"),
4802                (p->error ? "" : "!"),
4803                (p->dirty ? "" : "!"),
4804                (p->cleaning ? "" : "!"),
4805                (p->precious ? "" : "!"),
4806                (p->clustered ? "" : "!"));
4807         printf("  %soverwriting, %srestart, %sunusual\n",
4808                (p->overwriting ? "" : "!"),
4809                (p->restart ? "" : "!"),
4810                (p->unusual ? "" : "!"));
4811         printf("  %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
4812                (p->cs_validated ? "" : "!"),
4813                (p->cs_tainted ? "" : "!"),
4814                (p->cs_nx ? "" : "!"),
4815                (p->no_cache ? "" : "!"));
4816
4817         printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
4818 }
4819
4820 /*
4821  *      Check that the list of pages is ordered by
4822  *      ascending physical address and has no holes.
4823  */
4824 static int
4825 vm_page_verify_contiguous(
4826         vm_page_t       pages,
4827         unsigned int    npages)
4828 {
4829         vm_page_t               m;
4830         unsigned int            page_count;
4831         vm_offset_t             prev_addr;
4832
4833         prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
4834         page_count = 1;
4835         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
4836                 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4837                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
4838                                m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
4839                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
4840                         panic("vm_page_verify_contiguous:  not contiguous!");
4841                 }
4842                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4843                 ++page_count;
4844         }
4845         if (page_count != npages) {
4846                 printf("pages %p actual count 0x%x but requested 0x%x\n",
4847                        pages, page_count, npages);
4848                 panic("vm_page_verify_contiguous:  count error");
4849         }
4850         return 1;
4851 }
4852
4853
4854 /*
4855  *      Check the free lists for proper length etc.
4856  */
4857 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
4858 static unsigned int
4859 vm_page_verify_free_list(
4860         vm_page_queue_head_t    *vm_page_queue,
4861         unsigned int    color,
4862         vm_page_t       look_for_page,
4863         boolean_t       expect_page)
4864 {
4865         unsigned int    npages;
4866         vm_page_t       m;
4867         vm_page_t       prev_m;
4868         boolean_t       found_page;
4869
4870         if (! vm_page_verify_this_free_list_enabled)
4871                 return 0;
4872
4873         found_page = FALSE;
4874         npages = 0;
4875         prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
4876
4877         vm_page_queue_iterate(vm_page_queue,
4878                               m,
4879                               vm_page_t,
4880                               pageq) {
4881
4882                 if (m == look_for_page) {
4883                         found_page = TRUE;
4884                 }
4885                 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev) != prev_m)
4886                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4887                               color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev), prev_m);
4888                 if ( ! m->busy )
4889                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4890                               color, npages, m);
4891                 if (color != (unsigned int) -1) {
4892                         if (VM_PAGE_GET_COLOR(m) != color)
4893                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4894                                       color, npages, m, VM_PAGE_GET_COLOR(m), color);
4895                         if (m->vm_page_q_state != VM_PAGE_ON_FREE_Q)
4896                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
4897                                       color, npages, m, m->vm_page_q_state);
4898                 } else {
4899                         if (m->vm_page_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
4900                                 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
4901                                       npages, m, m->vm_page_q_state);
4902                 }
4903                 ++npages;
4904                 prev_m = m;
4905         }
4906         if (look_for_page != VM_PAGE_NULL) {
4907                 unsigned int other_color;
4908
4909                 if (expect_page && !found_page) {
4910                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4911                                color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4912                         _vm_page_print(look_for_page);
4913                         for (other_color = 0;
4914                              other_color < vm_colors;
4915                              other_color++) {
4916                                 if (other_color == color)
4917                                         continue;
4918                                 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
4919                                                          other_color, look_for_page, FALSE);
4920                         }
4921                         if (color == (unsigned int) -1) {
4922                                 vm_page_verify_free_list(&vm_lopage_queue_free,
4923                                                          (unsigned int) -1, look_for_page, FALSE);
4924                         }
4925                         panic("vm_page_verify_free_list(color=%u)\n", color);
4926                 }
4927                 if (!expect_page && found_page) {
4928                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4929                                color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4930                 }
4931         }
4932         return npages;
4933 }
4934
4935 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
4936 static void
4937 vm_page_verify_free_lists( void )
4938 {
4939         unsigned int    color, npages, nlopages;
4940         boolean_t       toggle = TRUE;
4941
4942         if (! vm_page_verify_all_free_lists_enabled)
4943                 return;
4944
4945         npages = 0;
4946
4947         lck_mtx_lock(&vm_page_queue_free_lock);
4948
4949         if (vm_page_verify_this_free_list_enabled == TRUE) {
4950                 /*
4951                  * This variable has been set globally for extra checking of
4952                  * each free list Q. Since we didn't set it, we don't own it
4953                  * and we shouldn't toggle it.
4954                  */
4955                 toggle = FALSE;
4956         }
4957
4958         if (toggle == TRUE) {
4959                 vm_page_verify_this_free_list_enabled = TRUE;
4960         }
4961
4962         for( color = 0; color < vm_colors; color++ ) {
4963                 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
4964                                                    color, VM_PAGE_NULL, FALSE);
4965         }
4966         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4967                                             (unsigned int) -1,
4968                                             VM_PAGE_NULL, FALSE);
4969         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4970                 panic("vm_page_verify_free_lists:  "
4971                       "npages %u free_count %d nlopages %u lo_free_count %u",
4972                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
4973
4974         if (toggle == TRUE) {
4975                 vm_page_verify_this_free_list_enabled = FALSE;
4976         }
4977
4978         lck_mtx_unlock(&vm_page_queue_free_lock);
4979 }
4980
4981 #endif  /* MACH_ASSERT */
4982
4983
4984
4985 #if __arm64__
4986 /*
4987  *      1 or more clients (currently only SEP) ask for a large contiguous chunk of memory
4988  *      after the system has 'aged'. To ensure that other allocation requests don't mess
4989  *      with the chances of that request being satisfied, we pre-allocate a single contiguous
4990  *      10MB buffer and hand it out to the first request of >= 4MB.
4991  */
4992
4993 kern_return_t cpm_preallocate_early(void);
4994
4995 vm_page_t cpm_preallocated_pages_list = NULL;
4996 boolean_t preallocated_buffer_available = FALSE;
4997
4998 #define PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT                  ((10 * 1024 * 1024) / PAGE_SIZE_64) /* 10 MB */
4999 #define MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER        ((4 * 1024 *1024) / PAGE_SIZE_64)   /* 4 MB */
5000
5001 kern_return_t
5002 cpm_preallocate_early(void)
5003 {
5004
5005         kern_return_t   kr = KERN_SUCCESS;
5006         vm_map_size_t   prealloc_size = (PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT * PAGE_SIZE_64);
5007
5008         printf("cpm_preallocate_early called to preallocate contiguous buffer of %llu pages\n", PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT);
5009
5010         kr = cpm_allocate(CAST_DOWN(vm_size_t, prealloc_size), &cpm_preallocated_pages_list, 0, 0, TRUE, 0);
5011
5012         if (kr != KERN_SUCCESS) {
5013                 printf("cpm_allocate for preallocated contig buffer failed with %d.\n", kr);
5014         } else {
5015                 preallocated_buffer_available = TRUE;
5016         }
5017
5018         return kr;
5019 }
5020 #endif /* __arm64__ */
5021
5022
5023 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
5024
5025 /*
5026  *      CONTIGUOUS PAGE ALLOCATION
5027  *
5028  *      Find a region large enough to contain at least n pages
5029  *      of contiguous physical memory.
5030  *
5031  *      This is done by traversing the vm_page_t array in a linear fashion
5032  *      we assume that the vm_page_t array has the avaiable physical pages in an
5033  *      ordered, ascending list... this is currently true of all our implementations
5034  *      and must remain so... there can be 'holes' in the array...  we also can
5035  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5036  *      which use to happen via 'vm_page_convert'... that function was no longer
5037  *      being called and was removed...
5038  *
5039  *      The basic flow consists of stabilizing some of the interesting state of
5040  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5041  *      sweep at the beginning of the array looking for pages that meet our criterea
5042  *      for a 'stealable' page... currently we are pretty conservative... if the page
5043  *      meets this criterea and is physically contiguous to the previous page in the 'run'
5044  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
5045  *      and start to develop a new run... if at this point we've already considered
5046  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5047  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
5048  *      to other threads trying to acquire free pages (or move pages from q to q),
5049  *      and then continue from the spot we left off... we only make 1 pass through the
5050  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
5051  *      which steals the pages from the queues they're currently on... pages on the free
5052  *      queue can be stolen directly... pages that are on any of the other queues
5053  *      must be removed from the object they are tabled on... this requires taking the
5054  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5055  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
5056  *      dump the pages we've currently stolen back to the free list, and pick up our
5057  *      scan from the point where we aborted the 'current' run.
5058  *
5059  *
5060  *      Requirements:
5061  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
5062  *
5063  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5064  *
5065  * Algorithm:
5066  */
5067
5068 #define MAX_CONSIDERED_BEFORE_YIELD     1000
5069
5070
5071 #define RESET_STATE_OF_RUN()    \
5072         MACRO_BEGIN             \
5073         prevcontaddr = -2;      \
5074         start_pnum = -1;        \
5075         free_considered = 0;    \
5076         substitute_needed = 0;  \
5077         npages = 0;             \
5078         MACRO_END
5079
5080 /*
5081  * Can we steal in-use (i.e. not free) pages when searching for
5082  * physically-contiguous pages ?
5083  */
5084 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5085
5086 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
5087 #if DEBUG
5088 int vm_page_find_contig_debug = 0;
5089 #endif
5090
5091 static vm_page_t
5092 vm_page_find_contiguous(
5093         unsigned int    contig_pages,
5094         ppnum_t         max_pnum,
5095         ppnum_t     pnum_mask,
5096         boolean_t       wire,
5097         int             flags)
5098 {
5099         vm_page_t       m = NULL;
5100         ppnum_t         prevcontaddr = 0;
5101         ppnum_t         start_pnum = 0;
5102         unsigned int    npages = 0, considered = 0, scanned = 0;
5103         unsigned int    page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5104         unsigned int    idx_last_contig_page_found = 0;
5105         int             free_considered = 0, free_available = 0;
5106         int             substitute_needed = 0;
5107         boolean_t       wrapped, zone_gc_called = FALSE;
5108         kern_return_t   kr;
5109 #if DEBUG
5110         clock_sec_t     tv_start_sec = 0, tv_end_sec = 0;
5111         clock_usec_t    tv_start_usec = 0, tv_end_usec = 0;
5112 #endif
5113
5114         int             yielded = 0;
5115         int             dumped_run = 0;
5116         int             stolen_pages = 0;
5117         int             compressed_pages = 0;
5118
5119
5120         if (contig_pages == 0)
5121                 return VM_PAGE_NULL;
5122
5123 full_scan_again:
5124
5125 #if MACH_ASSERT
5126         vm_page_verify_free_lists();
5127 #endif
5128 #if DEBUG
5129         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5130 #endif
5131         PAGE_REPLACEMENT_ALLOWED(TRUE);
5132
5133         vm_page_lock_queues();
5134
5135 #if __arm64__
5136         if (preallocated_buffer_available) {
5137
5138                 if ((contig_pages >= MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER) && (contig_pages <= PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT)) {
5139
5140                         m = cpm_preallocated_pages_list;
5141
5142                         start_idx = (unsigned int) (m - &vm_pages[0]);
5143
5144                         if (wire == FALSE) {
5145
5146                                 last_idx = start_idx;
5147
5148                                 for(npages = 0; npages < contig_pages; npages++, last_idx++) {
5149
5150                                         assert(vm_pages[last_idx].gobbled == FALSE);
5151
5152                                         vm_pages[last_idx].gobbled = TRUE;
5153                                         vm_page_gobble_count++;
5154
5155                                         assert(1 == vm_pages[last_idx].wire_count);
5156                                         /*
5157                                          * Gobbled pages are counted as wired pages. So no need to drop
5158                                          * the global wired page count. Just the page's wire count is fine.
5159                                          */
5160                                         vm_pages[last_idx].wire_count--;
5161                                         vm_pages[last_idx].vm_page_q_state = VM_PAGE_NOT_ON_Q;
5162                                 }
5163
5164                         }
5165
5166                         last_idx = start_idx + contig_pages - 1;
5167
5168                         vm_pages[last_idx].snext = NULL;
5169
5170                         printf("Using preallocated buffer: Requested size (pages):%d... index range: %d-%d...freeing %llu pages\n", contig_pages, start_idx, last_idx, PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT - contig_pages);
5171
5172                         last_idx += 1;
5173                         for(npages = contig_pages; npages < PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT; npages++, last_idx++) {
5174
5175                                 VM_PAGE_ZERO_PAGEQ_ENTRY(&vm_pages[last_idx]);
5176                                 vm_page_free(&vm_pages[last_idx]);
5177                         }
5178
5179                         cpm_preallocated_pages_list = NULL;
5180                         preallocated_buffer_available = FALSE;
5181
5182                         goto done_scanning;
5183                 }
5184         }
5185 #endif /* __arm64__ */
5186
5187         lck_mtx_lock(&vm_page_queue_free_lock);
5188
5189         RESET_STATE_OF_RUN();
5190
5191         scanned = 0;
5192         considered = 0;
5193         free_available = vm_page_free_count - vm_page_free_reserved;
5194
5195         wrapped = FALSE;
5196
5197         if(flags & KMA_LOMEM)
5198                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
5199         else
5200                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
5201
5202         orig_last_idx = idx_last_contig_page_found;
5203         last_idx = orig_last_idx;
5204
5205         for (page_idx = last_idx, start_idx = last_idx;
5206              npages < contig_pages && page_idx < vm_pages_count;
5207              page_idx++) {
5208 retry:
5209                 if (wrapped &&
5210                     npages == 0 &&
5211                     page_idx >= orig_last_idx) {
5212                         /*
5213                          * We're back where we started and we haven't
5214                          * found any suitable contiguous range.  Let's
5215                          * give up.
5216                          */
5217                         break;
5218                 }
5219                 scanned++;
5220                 m = &vm_pages[page_idx];
5221
5222                 assert(!m->fictitious);
5223                 assert(!m->private);
5224
5225                 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
5226                         /* no more low pages... */
5227                         break;
5228                 }
5229                 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
5230                         /*
5231                          * not aligned
5232                          */
5233                         RESET_STATE_OF_RUN();
5234
5235                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
5236                            m->laundry || m->wanted ||
5237                            m->cleaning || m->overwriting || m->free_when_done) {
5238                         /*
5239                          * page is in a transient state
5240                          * or a state we don't want to deal
5241                          * with, so don't consider it which
5242                          * means starting a new run
5243                          */
5244                         RESET_STATE_OF_RUN();
5245
5246                 } else if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
5247                            (m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5248                            (m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5249                            (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5250                         /*
5251                          * page needs to be on one of our queues (other then the pageout or special free queues)
5252                          * or it needs to belong to the compressor pool (which is now indicated
5253                          * by vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5254                          * from the check for VM_PAGE_NOT_ON_Q)
5255                          * in order for it to be stable behind the
5256                          * locks we hold at this point...
5257                          * if not, don't consider it which
5258                          * means starting a new run
5259                          */
5260                         RESET_STATE_OF_RUN();
5261
5262                 } else if ((m->vm_page_q_state != VM_PAGE_ON_FREE_Q) && (!m->tabled || m->busy)) {
5263                         /*
5264                          * pages on the free list are always 'busy'
5265                          * so we couldn't test for 'busy' in the check
5266                          * for the transient states... pages that are
5267                          * 'free' are never 'tabled', so we also couldn't
5268                          * test for 'tabled'.  So we check here to make
5269                          * sure that a non-free page is not busy and is
5270                          * tabled on an object...
5271                          * if not, don't consider it which
5272                          * means starting a new run
5273                          */
5274                         RESET_STATE_OF_RUN();
5275
5276                 } else {
5277                         if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5278                                 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
5279                                         RESET_STATE_OF_RUN();
5280                                         goto did_consider;
5281                                 } else {
5282                                         npages = 1;
5283                                         start_idx = page_idx;
5284                                         start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
5285                                 }
5286                         } else {
5287                                 npages++;
5288                         }
5289                         prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
5290
5291                         VM_PAGE_CHECK(m);
5292                         if (m->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5293                                 free_considered++;
5294                         } else {
5295                                 /*
5296                                  * This page is not free.
5297                                  * If we can't steal used pages,
5298                                  * we have to give up this run
5299                                  * and keep looking.
5300                                  * Otherwise, we might need to
5301                                  * move the contents of this page
5302                                  * into a substitute page.
5303                                  */
5304 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5305                                 if (m->pmapped || m->dirty || m->precious) {
5306                                         substitute_needed++;
5307                                 }
5308 #else
5309                                 RESET_STATE_OF_RUN();
5310 #endif
5311                         }
5312
5313                         if ((free_considered + substitute_needed) > free_available) {
5314                                 /*
5315                                  * if we let this run continue
5316                                  * we will end up dropping the vm_page_free_count
5317                                  * below the reserve limit... we need to abort
5318                                  * this run, but we can at least re-consider this
5319                                  * page... thus the jump back to 'retry'
5320                                  */
5321                                 RESET_STATE_OF_RUN();
5322
5323                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5324                                         considered++;
5325                                         goto retry;
5326                                 }
5327                                 /*
5328                                  * free_available == 0
5329                                  * so can't consider any free pages... if
5330                                  * we went to retry in this case, we'd
5331                                  * get stuck looking at the same page
5332                                  * w/o making any forward progress
5333                                  * we also want to take this path if we've already
5334                                  * reached our limit that controls the lock latency
5335                                  */
5336                         }
5337                 }
5338 did_consider:
5339                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
5340
5341                         PAGE_REPLACEMENT_ALLOWED(FALSE);
5342
5343                         lck_mtx_unlock(&vm_page_queue_free_lock);
5344                         vm_page_unlock_queues();
5345
5346                         mutex_pause(0);
5347
5348                         PAGE_REPLACEMENT_ALLOWED(TRUE);
5349
5350                         vm_page_lock_queues();
5351                         lck_mtx_lock(&vm_page_queue_free_lock);
5352
5353                         RESET_STATE_OF_RUN();
5354                         /*
5355                          * reset our free page limit since we
5356                          * dropped the lock protecting the vm_page_free_queue
5357                          */
5358                         free_available = vm_page_free_count - vm_page_free_reserved;
5359                         considered = 0;
5360
5361                         yielded++;
5362
5363                         goto retry;
5364                 }
5365                 considered++;
5366         }
5367         m = VM_PAGE_NULL;
5368
5369         if (npages != contig_pages) {
5370                 if (!wrapped) {
5371                         /*
5372                          * We didn't find a contiguous range but we didn't
5373                          * start from the very first page.
5374                          * Start again from the very first page.
5375                          */
5376                         RESET_STATE_OF_RUN();
5377                         if( flags & KMA_LOMEM)
5378                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
5379                         else
5380                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5381                         last_idx = 0;
5382                         page_idx = last_idx;
5383                         wrapped = TRUE;
5384                         goto retry;
5385                 }
5386                 lck_mtx_unlock(&vm_page_queue_free_lock);
5387         } else {
5388                 vm_page_t       m1;
5389                 vm_page_t       m2;
5390                 unsigned int    cur_idx;
5391                 unsigned int    tmp_start_idx;
5392                 vm_object_t     locked_object = VM_OBJECT_NULL;
5393                 boolean_t       abort_run = FALSE;
5394
5395                 assert(page_idx - start_idx == contig_pages);
5396
5397                 tmp_start_idx = start_idx;
5398
5399                 /*
5400                  * first pass through to pull the free pages
5401                  * off of the free queue so that in case we
5402                  * need substitute pages, we won't grab any
5403                  * of the free pages in the run... we'll clear
5404                  * the 'free' bit in the 2nd pass, and even in
5405                  * an abort_run case, we'll collect all of the
5406                  * free pages in this run and return them to the free list
5407                  */
5408                 while (start_idx < page_idx) {
5409
5410                         m1 = &vm_pages[start_idx++];
5411
5412 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5413                         assert(m1->vm_page_q_state == VM_PAGE_ON_FREE_Q);
5414 #endif
5415
5416                         if (m1->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5417                                 unsigned int color;
5418
5419                                 color = VM_PAGE_GET_COLOR(m1);
5420 #if MACH_ASSERT
5421                                 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
5422 #endif
5423                                 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5424                                                      m1,
5425                                                      vm_page_t,
5426                                                      pageq);
5427
5428                                 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5429 #if MACH_ASSERT
5430                                 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
5431 #endif
5432                                 /*
5433                                  * Clear the "free" bit so that this page
5434                                  * does not get considered for another
5435                                  * concurrent physically-contiguous allocation.
5436                                  */
5437                                 m1->vm_page_q_state = VM_PAGE_NOT_ON_Q;
5438                                 assert(m1->busy);
5439
5440                                 vm_page_free_count--;
5441                         }
5442                 }
5443                 if( flags & KMA_LOMEM)
5444                         vm_page_lomem_find_contiguous_last_idx = page_idx;
5445                 else
5446                         vm_page_find_contiguous_last_idx = page_idx;
5447
5448                 /*
5449                  * we can drop the free queue lock at this point since
5450                  * we've pulled any 'free' candidates off of the list
5451                  * we need it dropped so that we can do a vm_page_grab
5452                  * when substituing for pmapped/dirty pages
5453                  */
5454                 lck_mtx_unlock(&vm_page_queue_free_lock);
5455
5456                 start_idx = tmp_start_idx;
5457                 cur_idx = page_idx - 1;
5458
5459                 while (start_idx++ < page_idx) {
5460                         /*
5461                          * must go through the list from back to front
5462                          * so that the page list is created in the
5463                          * correct order - low -> high phys addresses
5464                          */
5465                         m1 = &vm_pages[cur_idx--];
5466
5467                         if (m1->vm_page_object == 0) {
5468                                 /*
5469                                  * page has already been removed from
5470                                  * the free list in the 1st pass
5471                                  */
5472                                 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5473                                 assert(m1->offset == (vm_object_offset_t) -1);
5474                                 assert(m1->busy);
5475                                 assert(!m1->wanted);
5476                                 assert(!m1->laundry);
5477                         } else {
5478                                 vm_object_t object;
5479                                 int refmod;
5480                                 boolean_t disconnected, reusable;
5481
5482                                 if (abort_run == TRUE)
5483                                         continue;
5484
5485                                 assert(m1->vm_page_q_state != VM_PAGE_NOT_ON_Q);
5486
5487                                 object = VM_PAGE_OBJECT(m1);
5488
5489                                 if (object != locked_object) {
5490                                         if (locked_object) {
5491                                                 vm_object_unlock(locked_object);
5492                                                 locked_object = VM_OBJECT_NULL;
5493                                         }
5494                                         if (vm_object_lock_try(object))
5495                                                 locked_object = object;
5496                                 }
5497                                 if (locked_object == VM_OBJECT_NULL ||
5498                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
5499                                      m1->laundry || m1->wanted ||
5500                                      m1->cleaning || m1->overwriting || m1->free_when_done || m1->busy) ||
5501                                     (m1->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5502
5503                                         if (locked_object) {
5504                                                 vm_object_unlock(locked_object);
5505                                                 locked_object = VM_OBJECT_NULL;
5506                                         }
5507                                         tmp_start_idx = cur_idx;
5508                                         abort_run = TRUE;
5509                                         continue;
5510                                 }
5511
5512                                 disconnected = FALSE;
5513                                 reusable = FALSE;
5514
5515                                 if ((m1->reusable ||
5516                                      object->all_reusable) &&
5517                                     (m1->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
5518                                     !m1->dirty &&
5519                                     !m1->reference) {
5520                                         /* reusable page... */
5521                                         refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5522                                         disconnected = TRUE;
5523                                         if (refmod == 0) {
5524                                                 /*
5525                                                  * ... not reused: can steal
5526                                                  * without relocating contents.
5527                                                  */
5528                                                 reusable = TRUE;
5529                                         }
5530                                 }
5531
5532                                 if ((m1->pmapped &&
5533                                      ! reusable) ||
5534                                     m1->dirty ||
5535                                     m1->precious) {
5536                                         vm_object_offset_t offset;
5537
5538                                         m2 = vm_page_grab();
5539
5540                                         if (m2 == VM_PAGE_NULL) {
5541                                                 if (locked_object) {
5542                                                         vm_object_unlock(locked_object);
5543                                                         locked_object = VM_OBJECT_NULL;
5544                                                 }
5545                                                 tmp_start_idx = cur_idx;
5546                                                 abort_run = TRUE;
5547                                                 continue;
5548                                         }
5549                                         if (! disconnected) {
5550                                                 if (m1->pmapped)
5551                                                         refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5552                                                 else
5553                                                         refmod = 0;
5554                                         }
5555
5556                                         /* copy the page's contents */
5557                                         pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
5558                                         /* copy the page's state */
5559                                         assert(!VM_PAGE_WIRED(m1));
5560                                         assert(m1->vm_page_q_state != VM_PAGE_ON_FREE_Q);
5561                                         assert(m1->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q);
5562                                         assert(!m1->laundry);
5563                                         m2->reference   = m1->reference;
5564                                         assert(!m1->gobbled);
5565                                         assert(!m1->private);
5566                                         m2->no_cache    = m1->no_cache;
5567                                         m2->xpmapped    = 0;
5568                                         assert(!m1->busy);
5569                                         assert(!m1->wanted);
5570                                         assert(!m1->fictitious);
5571                                         m2->pmapped     = m1->pmapped; /* should flush cache ? */
5572                                         m2->wpmapped    = m1->wpmapped;
5573                                         assert(!m1->free_when_done);
5574                                         m2->absent      = m1->absent;
5575                                         m2->error       = m1->error;
5576                                         m2->dirty       = m1->dirty;
5577                                         assert(!m1->cleaning);
5578                                         m2->precious    = m1->precious;
5579                                         m2->clustered   = m1->clustered;
5580                                         assert(!m1->overwriting);
5581                                         m2->restart     = m1->restart;
5582                                         m2->unusual     = m1->unusual;
5583                                         m2->cs_validated = m1->cs_validated;
5584                                         m2->cs_tainted  = m1->cs_tainted;
5585                                         m2->cs_nx       = m1->cs_nx;
5586
5587                                         /*
5588                                          * If m1 had really been reusable,
5589                                          * we would have just stolen it, so
5590                                          * let's not propagate it's "reusable"
5591                                          * bit and assert that m2 is not
5592                                          * marked as "reusable".
5593                                          */
5594                                         // m2->reusable = m1->reusable;
5595                                         assert(!m2->reusable);
5596
5597                                         // assert(!m1->lopage);
5598                                         m2->slid        = m1->slid;
5599
5600                                         if (m1->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5601                                                 m2->vm_page_q_state = VM_PAGE_USED_BY_COMPRESSOR;
5602
5603                                         /*
5604                                          * page may need to be flushed if
5605                                          * it is marshalled into a UPL
5606                                          * that is going to be used by a device
5607                                          * that doesn't support coherency
5608                                          */
5609                                         m2->written_by_kernel = TRUE;
5610
5611                                         /*
5612                                          * make sure we clear the ref/mod state
5613                                          * from the pmap layer... else we risk
5614                                          * inheriting state from the last time
5615                                          * this page was used...
5616                                          */
5617                                         pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5618
5619                                         if (refmod & VM_MEM_REFERENCED)
5620                                                 m2->reference = TRUE;
5621                                         if (refmod & VM_MEM_MODIFIED) {
5622                                                 SET_PAGE_DIRTY(m2, TRUE);
5623                                         }
5624                                         offset = m1->offset;
5625
5626                                         /*
5627                                          * completely cleans up the state
5628                                          * of the page so that it is ready
5629                                          * to be put onto the free list, or
5630                                          * for this purpose it looks like it
5631                                          * just came off of the free list
5632                                          */
5633                                         vm_page_free_prepare(m1);
5634
5635                                         /*
5636                                          * now put the substitute page
5637                                          * on the object
5638                                          */
5639                                         vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
5640
5641                                         if (m2->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
5642                                                 m2->pmapped = TRUE;
5643                                                 m2->wpmapped = TRUE;
5644
5645                                                 PMAP_ENTER(kernel_pmap, m2->offset, m2,
5646                                                            VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
5647
5648                                                 assert(kr == KERN_SUCCESS);
5649
5650                                                 compressed_pages++;
5651
5652                                         } else {
5653                                                 if (m2->reference)
5654                                                         vm_page_activate(m2);
5655                                                 else
5656                                                         vm_page_deactivate(m2);
5657                                         }
5658                                         PAGE_WAKEUP_DONE(m2);
5659
5660                                 } else {
5661                                         assert(m1->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
5662
5663                                         /*
5664                                          * completely cleans up the state
5665                                          * of the page so that it is ready
5666                                          * to be put onto the free list, or
5667                                          * for this purpose it looks like it
5668                                          * just came off of the free list
5669                                          */
5670                                         vm_page_free_prepare(m1);
5671                                 }
5672
5673                                 stolen_pages++;
5674
5675                         }
5676 #if CONFIG_BACKGROUND_QUEUE
5677                         vm_page_assign_background_state(m1);
5678 #endif
5679                         VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5680                         m1->snext = m;
5681                         m = m1;
5682                 }
5683                 if (locked_object) {
5684                         vm_object_unlock(locked_object);
5685                         locked_object = VM_OBJECT_NULL;
5686                 }
5687
5688                 if (abort_run == TRUE) {
5689                         /*
5690                          * want the index of the last
5691                          * page in this run that was
5692                          * successfully 'stolen', so back
5693                          * it up 1 for the auto-decrement on use
5694                          * and 1 more to bump back over this page
5695                          */
5696                         page_idx = tmp_start_idx + 2;
5697                         if (page_idx >= vm_pages_count) {
5698                                 if (wrapped) {
5699                                         if (m != VM_PAGE_NULL) {
5700                                                 vm_page_unlock_queues();
5701                                                 vm_page_free_list(m, FALSE);
5702                                                 vm_page_lock_queues();
5703                                                 m = VM_PAGE_NULL;
5704                                         }
5705                                         dumped_run++;
5706                                         goto done_scanning;
5707                                 }
5708                                 page_idx = last_idx = 0;
5709                                 wrapped = TRUE;
5710                         }
5711                         abort_run = FALSE;
5712
5713                         /*
5714                          * We didn't find a contiguous range but we didn't
5715                          * start from the very first page.
5716                          * Start again from the very first page.
5717                          */
5718                         RESET_STATE_OF_RUN();
5719
5720                         if( flags & KMA_LOMEM)
5721                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
5722                         else
5723                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5724
5725                         last_idx = page_idx;
5726
5727                         if (m != VM_PAGE_NULL) {
5728                                 vm_page_unlock_queues();
5729                                 vm_page_free_list(m, FALSE);
5730                                 vm_page_lock_queues();
5731                                 m = VM_PAGE_NULL;
5732                         }
5733                         dumped_run++;
5734
5735                         lck_mtx_lock(&vm_page_queue_free_lock);
5736                         /*
5737                         * reset our free page limit since we
5738                         * dropped the lock protecting the vm_page_free_queue
5739                         */
5740                         free_available = vm_page_free_count - vm_page_free_reserved;
5741                         goto retry;
5742                 }
5743
5744                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
5745
5746                         assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5747                         assert(m1->wire_count == 0);
5748
5749                         if (wire == TRUE) {
5750                                 m1->wire_count++;
5751                                 m1->vm_page_q_state = VM_PAGE_IS_WIRED;
5752                         } else
5753                                 m1->gobbled = TRUE;
5754                 }
5755                 if (wire == FALSE)
5756                         vm_page_gobble_count += npages;
5757
5758                 /*
5759                  * gobbled pages are also counted as wired pages
5760                  */
5761                 vm_page_wire_count += npages;
5762
5763                 assert(vm_page_verify_contiguous(m, npages));
5764         }
5765 done_scanning:
5766         PAGE_REPLACEMENT_ALLOWED(FALSE);
5767
5768         vm_page_unlock_queues();
5769
5770 #if DEBUG
5771         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5772
5773         tv_end_sec -= tv_start_sec;
5774         if (tv_end_usec < tv_start_usec) {
5775                 tv_end_sec--;
5776                 tv_end_usec += 1000000;
5777         }
5778         tv_end_usec -= tv_start_usec;
5779         if (tv_end_usec >= 1000000) {
5780                 tv_end_sec++;
5781                 tv_end_sec -= 1000000;
5782         }
5783         if (vm_page_find_contig_debug) {
5784                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages\n",
5785                        __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5786                        (long)tv_end_sec, tv_end_usec, orig_last_idx,
5787                        scanned, yielded, dumped_run, stolen_pages, compressed_pages);
5788         }
5789
5790 #endif
5791 #if MACH_ASSERT
5792         vm_page_verify_free_lists();
5793 #endif
5794         if (m == NULL && zone_gc_called == FALSE) {
5795                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5796                        __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5797                        scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5798
5799                 if (consider_buffer_cache_collect != NULL) {
5800                         (void)(*consider_buffer_cache_collect)(1);
5801                 }
5802
5803                 consider_zone_gc(FALSE);
5804
5805                 zone_gc_called = TRUE;
5806
5807                 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5808                 goto full_scan_again;
5809         }
5810
5811         return m;
5812 }
5813
5814 /*
5815  *      Allocate a list of contiguous, wired pages.
5816  */
5817 kern_return_t
5818 cpm_allocate(
5819         vm_size_t       size,
5820         vm_page_t       *list,
5821         ppnum_t         max_pnum,
5822         ppnum_t         pnum_mask,
5823         boolean_t       wire,
5824         int             flags)
5825 {
5826         vm_page_t               pages;
5827         unsigned int            npages;
5828
5829         if (size % PAGE_SIZE != 0)
5830                 return KERN_INVALID_ARGUMENT;
5831
5832         npages = (unsigned int) (size / PAGE_SIZE);
5833         if (npages != size / PAGE_SIZE) {
5834                 /* 32-bit overflow */
5835                 return KERN_INVALID_ARGUMENT;
5836         }
5837
5838         /*
5839          *      Obtain a pointer to a subset of the free
5840          *      list large enough to satisfy the request;
5841          *      the region will be physically contiguous.
5842          */
5843         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
5844
5845         if (pages == VM_PAGE_NULL)
5846                 return KERN_NO_SPACE;
5847         /*
5848          * determine need for wakeups
5849          */
5850         if ((vm_page_free_count < vm_page_free_min) ||
5851              ((vm_page_free_count < vm_page_free_target) &&
5852               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
5853                  thread_wakeup((event_t) &vm_page_free_wanted);
5854
5855         VM_CHECK_MEMORYSTATUS;
5856
5857         /*
5858          *      The CPM pages should now be available and
5859          *      ordered by ascending physical address.
5860          */
5861         assert(vm_page_verify_contiguous(pages, npages));
5862
5863         *list = pages;
5864         return KERN_SUCCESS;
5865 }
5866
5867
5868 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
5869
5870 /*
5871  * when working on a 'run' of pages, it is necessary to hold
5872  * the vm_page_queue_lock (a hot global lock) for certain operations
5873  * on the page... however, the majority of the work can be done
5874  * while merely holding the object lock... in fact there are certain
5875  * collections of pages that don't require any work brokered by the
5876  * vm_page_queue_lock... to mitigate the time spent behind the global
5877  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
5878  * while doing all of the work that doesn't require the vm_page_queue_lock...
5879  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
5880  * necessary work for each page... we will grab the busy bit on the page
5881  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
5882  * if it can't immediately take the vm_page_queue_lock in order to compete
5883  * for the locks in the same order that vm_pageout_scan takes them.
5884  * the operation names are modeled after the names of the routines that
5885  * need to be called in order to make the changes very obvious in the
5886  * original loop
5887  */
5888
5889 void
5890 vm_page_do_delayed_work(
5891         vm_object_t     object,
5892         vm_tag_t        tag,
5893         struct vm_page_delayed_work *dwp,
5894         int             dw_count)
5895 {
5896         int             j;
5897         vm_page_t       m;
5898         vm_page_t       local_free_q = VM_PAGE_NULL;
5899
5900         /*
5901          * pageout_scan takes the vm_page_lock_queues first
5902          * then tries for the object lock... to avoid what
5903          * is effectively a lock inversion, we'll go to the
5904          * trouble of taking them in that same order... otherwise
5905          * if this object contains the majority of the pages resident
5906          * in the UBC (or a small set of large objects actively being
5907          * worked on contain the majority of the pages), we could
5908          * cause the pageout_scan thread to 'starve' in its attempt
5909          * to find pages to move to the free queue, since it has to
5910          * successfully acquire the object lock of any candidate page
5911          * before it can steal/clean it.
5912          */
5913         if (!vm_page_trylockspin_queues()) {
5914                 vm_object_unlock(object);
5915
5916                 vm_page_lockspin_queues();
5917
5918                 for (j = 0; ; j++) {
5919                         if (!vm_object_lock_avoid(object) &&
5920                             _vm_object_lock_try(object))
5921                                 break;
5922                         vm_page_unlock_queues();
5923                         mutex_pause(j);
5924                         vm_page_lockspin_queues();
5925                 }
5926         }
5927         for (j = 0; j < dw_count; j++, dwp++) {
5928
5929                 m = dwp->dw_m;
5930
5931                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
5932                         vm_pageout_throttle_up(m);
5933 #if CONFIG_PHANTOM_CACHE
5934                 if (dwp->dw_mask & DW_vm_phantom_cache_update)
5935                         vm_phantom_cache_update(m);
5936 #endif
5937                 if (dwp->dw_mask & DW_vm_page_wire)
5938                         vm_page_wire(m, tag, FALSE);
5939                 else if (dwp->dw_mask & DW_vm_page_unwire) {
5940                         boolean_t       queueit;
5941
5942                         queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
5943
5944                         vm_page_unwire(m, queueit);
5945                 }
5946                 if (dwp->dw_mask & DW_vm_page_free) {
5947                         vm_page_free_prepare_queues(m);
5948
5949                         assert(m->pageq.next == 0 && m->pageq.prev == 0);
5950                         /*
5951                          * Add this page to our list of reclaimed pages,
5952                          * to be freed later.
5953                          */
5954                         m->snext = local_free_q;
5955                         local_free_q = m;
5956                 } else {
5957                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5958                                 vm_page_deactivate_internal(m, FALSE);
5959                         else if (dwp->dw_mask & DW_vm_page_activate) {
5960                                 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q) {
5961                                         vm_page_activate(m);
5962                                 }
5963                         }
5964                         else if (dwp->dw_mask & DW_vm_page_speculate)
5965                                 vm_page_speculate(m, TRUE);
5966                         else if (dwp->dw_mask & DW_enqueue_cleaned) {
5967                                 /*
5968                                  * if we didn't hold the object lock and did this,
5969                                  * we might disconnect the page, then someone might
5970                                  * soft fault it back in, then we would put it on the
5971                                  * cleaned queue, and so we would have a referenced (maybe even dirty)
5972                                  * page on that queue, which we don't want
5973                                  */
5974                                 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5975
5976                                 if ((refmod_state & VM_MEM_REFERENCED)) {
5977                                         /*
5978                                          * this page has been touched since it got cleaned; let's activate it
5979                                          * if it hasn't already been
5980                                          */
5981                                         vm_pageout_enqueued_cleaned++;
5982                                         vm_pageout_cleaned_reactivated++;
5983                                         vm_pageout_cleaned_commit_reactivated++;
5984
5985                                         if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q)
5986                                                 vm_page_activate(m);
5987                                 } else {
5988                                         m->reference = FALSE;
5989                                         vm_page_enqueue_cleaned(m);
5990                                 }
5991                         }
5992                         else if (dwp->dw_mask & DW_vm_page_lru)
5993                                 vm_page_lru(m);
5994                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
5995                                 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q)
5996                                         vm_page_queues_remove(m, TRUE);
5997                         }
5998                         if (dwp->dw_mask & DW_set_reference)
5999                                 m->reference = TRUE;
6000                         else if (dwp->dw_mask & DW_clear_reference)
6001                                 m->reference = FALSE;
6002
6003                         if (dwp->dw_mask & DW_move_page) {
6004                                 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6005                                         vm_page_queues_remove(m, FALSE);
6006
6007                                         assert(VM_PAGE_OBJECT(m) != kernel_object);
6008
6009                                         vm_page_enqueue_inactive(m, FALSE);
6010                                 }
6011                         }
6012                         if (dwp->dw_mask & DW_clear_busy)
6013                                 m->busy = FALSE;
6014
6015                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
6016                                 PAGE_WAKEUP(m);
6017                 }
6018         }
6019         vm_page_unlock_queues();
6020
6021         if (local_free_q)
6022                 vm_page_free_list(local_free_q, TRUE);
6023
6024         VM_CHECK_MEMORYSTATUS;
6025
6026 }
6027
6028 kern_return_t
6029 vm_page_alloc_list(
6030         int     page_count,
6031         int     flags,
6032         vm_page_t *list)
6033 {
6034         vm_page_t       lo_page_list = VM_PAGE_NULL;
6035         vm_page_t       mem;
6036         int             i;
6037
6038         if ( !(flags & KMA_LOMEM))
6039                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6040
6041         for (i = 0; i < page_count; i++) {
6042
6043                 mem = vm_page_grablo();
6044
6045                 if (mem == VM_PAGE_NULL) {
6046                         if (lo_page_list)
6047                                 vm_page_free_list(lo_page_list, FALSE);
6048
6049                         *list = VM_PAGE_NULL;
6050
6051                         return (KERN_RESOURCE_SHORTAGE);
6052                 }
6053                 mem->snext = lo_page_list;
6054                 lo_page_list = mem;
6055         }
6056         *list = lo_page_list;
6057
6058         return (KERN_SUCCESS);
6059 }
6060
6061 void
6062 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6063 {
6064         page->offset = offset;
6065 }
6066
6067 vm_page_t
6068 vm_page_get_next(vm_page_t page)
6069 {
6070         return (page->snext);
6071 }
6072
6073 vm_object_offset_t
6074 vm_page_get_offset(vm_page_t page)
6075 {
6076         return (page->offset);
6077 }
6078
6079 ppnum_t
6080 vm_page_get_phys_page(vm_page_t page)
6081 {
6082         return (VM_PAGE_GET_PHYS_PAGE(page));
6083 }
6084
6085
6086 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6087
6088 #if HIBERNATION
6089
6090 static vm_page_t hibernate_gobble_queue;
6091
6092 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
6093 static int  hibernate_flush_dirty_pages(int);
6094 static int  hibernate_flush_queue(vm_page_queue_head_t *, int);
6095
6096 void hibernate_flush_wait(void);
6097 void hibernate_mark_in_progress(void);
6098 void hibernate_clear_in_progress(void);
6099
6100 void            hibernate_free_range(int, int);
6101 void            hibernate_hash_insert_page(vm_page_t);
6102 uint32_t        hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
6103 void            hibernate_rebuild_vm_structs(void);
6104 uint32_t        hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6105 ppnum_t         hibernate_lookup_paddr(unsigned int);
6106
6107 struct hibernate_statistics {
6108         int hibernate_considered;
6109         int hibernate_reentered_on_q;
6110         int hibernate_found_dirty;
6111         int hibernate_skipped_cleaning;
6112         int hibernate_skipped_transient;
6113         int hibernate_skipped_precious;
6114         int hibernate_skipped_external;
6115         int hibernate_queue_nolock;
6116         int hibernate_queue_paused;
6117         int hibernate_throttled;
6118         int hibernate_throttle_timeout;
6119         int hibernate_drained;
6120         int hibernate_drain_timeout;
6121         int cd_lock_failed;
6122         int cd_found_precious;
6123         int cd_found_wired;
6124         int cd_found_busy;
6125         int cd_found_unusual;
6126         int cd_found_cleaning;
6127         int cd_found_laundry;
6128         int cd_found_dirty;
6129         int cd_found_xpmapped;
6130         int cd_skipped_xpmapped;
6131         int cd_local_free;
6132         int cd_total_free;
6133         int cd_vm_page_wire_count;
6134         int cd_vm_struct_pages_unneeded;
6135         int cd_pages;
6136         int cd_discarded;
6137         int cd_count_wire;
6138 } hibernate_stats;
6139
6140
6141 /*
6142  * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6143  * so that we don't overrun the estimated image size, which would
6144  * result in a hibernation failure.
6145  */
6146 #define HIBERNATE_XPMAPPED_LIMIT        40000
6147
6148
6149 static int
6150 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6151 {
6152         wait_result_t   wait_result;
6153
6154         vm_page_lock_queues();
6155
6156         while ( !vm_page_queue_empty(&q->pgo_pending) ) {
6157
6158                 q->pgo_draining = TRUE;
6159
6160                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
6161
6162                 vm_page_unlock_queues();
6163
6164                 wait_result = thread_block(THREAD_CONTINUE_NULL);
6165
6166                 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
6167                         hibernate_stats.hibernate_drain_timeout++;
6168
6169                         if (q == &vm_pageout_queue_external)
6170                                 return (0);
6171
6172                         return (1);
6173                 }
6174                 vm_page_lock_queues();
6175
6176                 hibernate_stats.hibernate_drained++;
6177         }
6178         vm_page_unlock_queues();
6179
6180         return (0);
6181 }
6182
6183
6184 boolean_t hibernate_skip_external = FALSE;
6185
6186 static int
6187 hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
6188 {
6189         vm_page_t       m;
6190         vm_object_t     l_object = NULL;
6191         vm_object_t     m_object = NULL;
6192         int             refmod_state = 0;
6193         int             try_failed_count = 0;
6194         int             retval = 0;
6195         int             current_run = 0;
6196         struct  vm_pageout_queue *iq;
6197         struct  vm_pageout_queue *eq;
6198         struct  vm_pageout_queue *tq;
6199
6200         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
6201                         VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
6202
6203         iq = &vm_pageout_queue_internal;
6204         eq = &vm_pageout_queue_external;
6205
6206         vm_page_lock_queues();
6207
6208         while (qcount && !vm_page_queue_empty(q)) {
6209
6210                 if (current_run++ == 1000) {
6211                         if (hibernate_should_abort()) {
6212                                 retval = 1;
6213                                 break;
6214                         }
6215                         current_run = 0;
6216                 }
6217
6218                 m = (vm_page_t) vm_page_queue_first(q);
6219                 m_object = VM_PAGE_OBJECT(m);
6220
6221                 /*
6222                  * check to see if we currently are working
6223                  * with the same object... if so, we've
6224                  * already got the lock
6225                  */
6226                 if (m_object != l_object) {
6227                         /*
6228                          * the object associated with candidate page is
6229                          * different from the one we were just working
6230                          * with... dump the lock if we still own it
6231                          */
6232                         if (l_object != NULL) {
6233                                 vm_object_unlock(l_object);
6234                                 l_object = NULL;
6235                         }
6236                         /*
6237                          * Try to lock object; since we've alread got the
6238                          * page queues lock, we can only 'try' for this one.
6239                          * if the 'try' fails, we need to do a mutex_pause
6240                          * to allow the owner of the object lock a chance to
6241                          * run...
6242                          */
6243                         if ( !vm_object_lock_try_scan(m_object)) {
6244
6245                                 if (try_failed_count > 20) {
6246                                         hibernate_stats.hibernate_queue_nolock++;
6247
6248                                         goto reenter_pg_on_q;
6249                                 }
6250
6251                                 vm_page_unlock_queues();
6252                                 mutex_pause(try_failed_count++);
6253                                 vm_page_lock_queues();
6254
6255                                 hibernate_stats.hibernate_queue_paused++;
6256                                 continue;
6257                         } else {
6258                                 l_object = m_object;
6259                         }
6260                 }
6261                 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
6262                         /*
6263                          * page is not to be cleaned
6264                          * put it back on the head of its queue
6265                          */
6266                         if (m->cleaning)
6267                                 hibernate_stats.hibernate_skipped_cleaning++;
6268                         else
6269                                 hibernate_stats.hibernate_skipped_transient++;
6270
6271                         goto reenter_pg_on_q;
6272                 }
6273                 if (m_object->copy == VM_OBJECT_NULL) {
6274                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6275                                 /*
6276                                  * let the normal hibernate image path
6277                                  * deal with these
6278                                  */
6279                                 goto reenter_pg_on_q;
6280                         }
6281                 }
6282                 if ( !m->dirty && m->pmapped) {
6283                         refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6284
6285                         if ((refmod_state & VM_MEM_MODIFIED)) {
6286                                 SET_PAGE_DIRTY(m, FALSE);
6287                         }
6288                 } else
6289                         refmod_state = 0;
6290
6291                 if ( !m->dirty) {
6292                         /*
6293                          * page is not to be cleaned
6294                          * put it back on the head of its queue
6295                          */
6296                         if (m->precious)
6297                                 hibernate_stats.hibernate_skipped_precious++;
6298
6299                         goto reenter_pg_on_q;
6300                 }
6301
6302                 if (hibernate_skip_external == TRUE && !m_object->internal) {
6303
6304                         hibernate_stats.hibernate_skipped_external++;
6305
6306                         goto reenter_pg_on_q;
6307                 }
6308                 tq = NULL;
6309
6310                 if (m_object->internal) {
6311                         if (VM_PAGE_Q_THROTTLED(iq))
6312                                 tq = iq;
6313                 } else if (VM_PAGE_Q_THROTTLED(eq))
6314                         tq = eq;
6315
6316                 if (tq != NULL) {
6317                         wait_result_t   wait_result;
6318                         int             wait_count = 5;
6319
6320                         if (l_object != NULL) {
6321                                 vm_object_unlock(l_object);
6322                                 l_object = NULL;
6323                         }
6324
6325                         while (retval == 0) {
6326
6327                                 tq->pgo_throttled = TRUE;
6328
6329                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6330
6331                                 vm_page_unlock_queues();
6332
6333                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
6334
6335                                 vm_page_lock_queues();
6336
6337                                 if (wait_result != THREAD_TIMED_OUT)
6338                                         break;
6339                                 if (!VM_PAGE_Q_THROTTLED(tq))
6340                                         break;
6341
6342                                 if (hibernate_should_abort())
6343                                         retval = 1;
6344
6345                                 if (--wait_count == 0) {
6346
6347                                         hibernate_stats.hibernate_throttle_timeout++;
6348
6349                                         if (tq == eq) {
6350                                                 hibernate_skip_external = TRUE;
6351                                                 break;
6352                                         }
6353                                         retval = 1;
6354                                 }
6355                         }
6356                         if (retval)
6357                                 break;
6358
6359                         hibernate_stats.hibernate_throttled++;
6360
6361                         continue;
6362                 }
6363                 /*
6364                  * we've already factored out pages in the laundry which
6365                  * means this page can't be on the pageout queue so it's
6366                  * safe to do the vm_page_queues_remove
6367                  */
6368                 vm_page_queues_remove(m, TRUE);
6369
6370                 if (m_object->internal == TRUE)
6371                         pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
6372
6373                 vm_pageout_cluster(m);
6374
6375                 hibernate_stats.hibernate_found_dirty++;
6376
6377                 goto next_pg;
6378
6379 reenter_pg_on_q:
6380                 vm_page_queue_remove(q, m, vm_page_t, pageq);
6381                 vm_page_queue_enter(q, m, vm_page_t, pageq);
6382
6383                 hibernate_stats.hibernate_reentered_on_q++;
6384 next_pg:
6385                 hibernate_stats.hibernate_considered++;
6386
6387                 qcount--;
6388                 try_failed_count = 0;
6389         }
6390         if (l_object != NULL) {
6391                 vm_object_unlock(l_object);
6392                 l_object = NULL;
6393         }
6394
6395         vm_page_unlock_queues();
6396
6397         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6398
6399         return (retval);
6400 }
6401
6402
6403 static int
6404 hibernate_flush_dirty_pages(int pass)
6405 {
6406         struct vm_speculative_age_q     *aq;
6407         uint32_t        i;
6408
6409         if (vm_page_local_q) {
6410                 for (i = 0; i < vm_page_local_q_count; i++)
6411                         vm_page_reactivate_local(i, TRUE, FALSE);
6412         }
6413
6414         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6415                 int             qcount;
6416                 vm_page_t       m;
6417
6418                 aq = &vm_page_queue_speculative[i];
6419
6420                 if (vm_page_queue_empty(&aq->age_q))
6421                         continue;
6422                 qcount = 0;
6423
6424                 vm_page_lockspin_queues();
6425
6426                 vm_page_queue_iterate(&aq->age_q,
6427                               m,
6428                               vm_page_t,
6429                               pageq)
6430                 {
6431                         qcount++;
6432                 }
6433                 vm_page_unlock_queues();
6434
6435                 if (qcount) {
6436                         if (hibernate_flush_queue(&aq->age_q, qcount))
6437                                 return (1);
6438                 }
6439         }
6440         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
6441                 return (1);
6442         /* XXX FBDP TODO: flush secluded queue */
6443         if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6444                 return (1);
6445         if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
6446                 return (1);
6447         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6448                 return (1);
6449
6450         if (pass == 1)
6451                 vm_compressor_record_warmup_start();
6452
6453         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
6454                 if (pass == 1)
6455                         vm_compressor_record_warmup_end();
6456                 return (1);
6457         }
6458         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
6459                 if (pass == 1)
6460                         vm_compressor_record_warmup_end();
6461                 return (1);
6462         }
6463         if (pass == 1)
6464                 vm_compressor_record_warmup_end();
6465
6466         if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6467                 return (1);
6468
6469         return (0);
6470 }
6471
6472
6473 void
6474 hibernate_reset_stats()
6475 {
6476         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6477 }
6478
6479
6480 int
6481 hibernate_flush_memory()
6482 {
6483         int     retval;
6484
6485         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6486
6487         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6488
6489         hibernate_cleaning_in_progress = TRUE;
6490         hibernate_skip_external = FALSE;
6491
6492         if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6493
6494                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6495
6496                 vm_compressor_flush();
6497
6498                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6499
6500                 if (consider_buffer_cache_collect != NULL) {
6501                         unsigned int orig_wire_count;
6502
6503                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6504                         orig_wire_count = vm_page_wire_count;
6505
6506                         (void)(*consider_buffer_cache_collect)(1);
6507                         consider_zone_gc(FALSE);
6508
6509                         HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6510
6511                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
6512                 }
6513         }
6514         hibernate_cleaning_in_progress = FALSE;
6515
6516         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6517
6518         if (retval)
6519                 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6520
6521
6522     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6523                 hibernate_stats.hibernate_considered,
6524                 hibernate_stats.hibernate_reentered_on_q,
6525                 hibernate_stats.hibernate_found_dirty);
6526     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
6527                 hibernate_stats.hibernate_skipped_cleaning,
6528                 hibernate_stats.hibernate_skipped_transient,
6529                 hibernate_stats.hibernate_skipped_precious,
6530                 hibernate_stats.hibernate_skipped_external,
6531                 hibernate_stats.hibernate_queue_nolock);
6532     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6533                 hibernate_stats.hibernate_queue_paused,
6534                 hibernate_stats.hibernate_throttled,
6535                 hibernate_stats.hibernate_throttle_timeout,
6536                 hibernate_stats.hibernate_drained,
6537                 hibernate_stats.hibernate_drain_timeout);
6538
6539         return (retval);
6540 }
6541
6542
6543 static void
6544 hibernate_page_list_zero(hibernate_page_list_t *list)
6545 {
6546     uint32_t             bank;
6547     hibernate_bitmap_t * bitmap;
6548
6549     bitmap = &list->bank_bitmap[0];
6550     for (bank = 0; bank < list->bank_count; bank++)
6551     {
6552         uint32_t last_bit;
6553
6554         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6555         // set out-of-bound bits at end of bitmap.
6556         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6557         if (last_bit)
6558             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6559
6560         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6561     }
6562 }
6563
6564 void
6565 hibernate_free_gobble_pages(void)
6566 {
6567     vm_page_t m, next;
6568     uint32_t  count = 0;
6569
6570     m = (vm_page_t) hibernate_gobble_queue;
6571     while(m)
6572     {
6573         next = m->snext;
6574         vm_page_free(m);
6575         count++;
6576         m = next;
6577     }
6578     hibernate_gobble_queue = VM_PAGE_NULL;
6579
6580     if (count)
6581         HIBLOG("Freed %d pages\n", count);
6582 }
6583
6584 static boolean_t
6585 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
6586 {
6587     vm_object_t object = NULL;
6588     int                  refmod_state;
6589     boolean_t            discard = FALSE;
6590
6591     do
6592     {
6593         if (m->private)
6594             panic("hibernate_consider_discard: private");
6595
6596         object = VM_PAGE_OBJECT(m);
6597
6598         if (!vm_object_lock_try(object)) {
6599             object = NULL;
6600             if (!preflight) hibernate_stats.cd_lock_failed++;
6601             break;
6602         }
6603         if (VM_PAGE_WIRED(m)) {
6604             if (!preflight) hibernate_stats.cd_found_wired++;
6605             break;
6606         }
6607         if (m->precious) {
6608             if (!preflight) hibernate_stats.cd_found_precious++;
6609             break;
6610         }
6611         if (m->busy || !object->alive) {
6612            /*
6613             *   Somebody is playing with this page.
6614             */
6615             if (!preflight) hibernate_stats.cd_found_busy++;
6616             break;
6617         }
6618         if (m->absent || m->unusual || m->error) {
6619            /*
6620             * If it's unusual in anyway, ignore it
6621             */
6622             if (!preflight) hibernate_stats.cd_found_unusual++;
6623             break;
6624         }
6625         if (m->cleaning) {
6626             if (!preflight) hibernate_stats.cd_found_cleaning++;
6627             break;
6628         }
6629         if (m->laundry) {
6630             if (!preflight) hibernate_stats.cd_found_laundry++;
6631             break;
6632         }
6633         if (!m->dirty)
6634         {
6635                 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6636
6637             if (refmod_state & VM_MEM_REFERENCED)
6638                 m->reference = TRUE;
6639             if (refmod_state & VM_MEM_MODIFIED) {
6640                 SET_PAGE_DIRTY(m, FALSE);
6641             }
6642         }
6643
6644         /*
6645          * If it's clean or purgeable we can discard the page on wakeup.
6646          */
6647         discard = (!m->dirty)
6648                     || (VM_PURGABLE_VOLATILE == object->purgable)
6649                     || (VM_PURGABLE_EMPTY    == object->purgable);
6650
6651
6652         if (discard == FALSE) {
6653                 if (!preflight)
6654                         hibernate_stats.cd_found_dirty++;
6655         } else if (m->xpmapped && m->reference && !object->internal) {
6656                 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6657                         if (!preflight)
6658                                 hibernate_stats.cd_found_xpmapped++;
6659                         discard = FALSE;
6660                 } else {
6661                         if (!preflight)
6662                                 hibernate_stats.cd_skipped_xpmapped++;
6663                 }
6664         }
6665     }
6666     while (FALSE);
6667
6668     if (object)
6669         vm_object_unlock(object);
6670
6671     return (discard);
6672 }
6673
6674
6675 static void
6676 hibernate_discard_page(vm_page_t m)
6677 {
6678     vm_object_t m_object;
6679
6680     if (m->absent || m->unusual || m->error)
6681        /*
6682         * If it's unusual in anyway, ignore
6683         */
6684         return;
6685
6686     m_object = VM_PAGE_OBJECT(m);
6687
6688 #if MACH_ASSERT || DEBUG
6689     if (!vm_object_lock_try(m_object))
6690         panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6691 #else
6692     /* No need to lock page queue for token delete, hibernate_vm_unlock()
6693        makes sure these locks are uncontended before sleep */
6694 #endif /* MACH_ASSERT || DEBUG */
6695
6696     if (m->pmapped == TRUE)
6697     {
6698         __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6699     }
6700
6701     if (m->laundry)
6702         panic("hibernate_discard_page(%p) laundry", m);
6703     if (m->private)
6704         panic("hibernate_discard_page(%p) private", m);
6705     if (m->fictitious)
6706         panic("hibernate_discard_page(%p) fictitious", m);
6707
6708     if (VM_PURGABLE_VOLATILE == m_object->purgable)
6709     {
6710         /* object should be on a queue */
6711         assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6712         purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
6713         assert(old_queue);
6714         if (m_object->purgeable_when_ripe) {
6715                 vm_purgeable_token_delete_first(old_queue);
6716         }
6717         vm_object_lock_assert_exclusive(m_object);
6718         m_object->purgable = VM_PURGABLE_EMPTY;
6719
6720         /*
6721          * Purgeable ledgers:  pages of VOLATILE and EMPTY objects are
6722          * accounted in the "volatile" ledger, so no change here.
6723          * We have to update vm_page_purgeable_count, though, since we're
6724          * effectively purging this object.
6725          */
6726         unsigned int delta;
6727         assert(m_object->resident_page_count >= m_object->wired_page_count);
6728         delta = (m_object->resident_page_count - m_object->wired_page_count);
6729         assert(vm_page_purgeable_count >= delta);
6730         assert(delta > 0);
6731         OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
6732     }
6733
6734     vm_page_free(m);
6735
6736 #if MACH_ASSERT || DEBUG
6737     vm_object_unlock(m_object);
6738 #endif  /* MACH_ASSERT || DEBUG */
6739 }
6740
6741 /*
6742  Grab locks for hibernate_page_list_setall()
6743 */
6744 void
6745 hibernate_vm_lock_queues(void)
6746 {
6747     vm_object_lock(compressor_object);
6748     vm_page_lock_queues();
6749     lck_mtx_lock(&vm_page_queue_free_lock);
6750     lck_mtx_lock(&vm_purgeable_queue_lock);
6751
6752     if (vm_page_local_q) {
6753         uint32_t  i;
6754         for (i = 0; i < vm_page_local_q_count; i++) {
6755             struct vpl  *lq;
6756             lq = &vm_page_local_q[i].vpl_un.vpl;
6757             VPL_LOCK(&lq->vpl_lock);
6758         }
6759     }
6760 }
6761
6762 void
6763 hibernate_vm_unlock_queues(void)
6764 {
6765     if (vm_page_local_q) {
6766         uint32_t  i;
6767         for (i = 0; i < vm_page_local_q_count; i++) {
6768             struct vpl  *lq;
6769             lq = &vm_page_local_q[i].vpl_un.vpl;
6770             VPL_UNLOCK(&lq->vpl_lock);
6771         }
6772     }
6773     lck_mtx_unlock(&vm_purgeable_queue_lock);
6774     lck_mtx_unlock(&vm_page_queue_free_lock);
6775     vm_page_unlock_queues();
6776     vm_object_unlock(compressor_object);
6777 }
6778
6779 /*
6780  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6781  pages known to VM to not need saving are subtracted.
6782  Wired pages to be saved are present in page_list_wired, pageable in page_list.
6783 */
6784
6785 void
6786 hibernate_page_list_setall(hibernate_page_list_t * page_list,
6787                            hibernate_page_list_t * page_list_wired,
6788                            hibernate_page_list_t * page_list_pal,
6789                            boolean_t preflight,
6790                            boolean_t will_discard,
6791                            uint32_t * pagesOut)
6792 {
6793     uint64_t start, end, nsec;
6794     vm_page_t m;
6795     vm_page_t next;
6796     uint32_t pages = page_list->page_count;
6797     uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
6798     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
6799     uint32_t count_wire = pages;
6800     uint32_t count_discard_active    = 0;
6801     uint32_t count_discard_inactive  = 0;
6802     uint32_t count_discard_cleaned   = 0;
6803     uint32_t count_discard_purgeable = 0;
6804     uint32_t count_discard_speculative = 0;
6805     uint32_t count_discard_vm_struct_pages = 0;
6806     uint32_t i;
6807     uint32_t             bank;
6808     hibernate_bitmap_t * bitmap;
6809     hibernate_bitmap_t * bitmap_wired;
6810     boolean_t                    discard_all;
6811     boolean_t            discard;
6812
6813     HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
6814
6815     if (preflight) {
6816         page_list       = NULL;
6817         page_list_wired = NULL;
6818         page_list_pal   = NULL;
6819                 discard_all     = FALSE;
6820     } else {
6821                 discard_all     = will_discard;
6822     }
6823
6824 #if MACH_ASSERT || DEBUG
6825     if (!preflight)
6826     {
6827         assert(hibernate_vm_locks_are_safe());
6828         vm_page_lock_queues();
6829         if (vm_page_local_q) {
6830             for (i = 0; i < vm_page_local_q_count; i++) {
6831                 struct vpl      *lq;
6832                 lq = &vm_page_local_q[i].vpl_un.vpl;
6833                 VPL_LOCK(&lq->vpl_lock);
6834             }
6835         }
6836     }
6837 #endif  /* MACH_ASSERT || DEBUG */
6838
6839
6840     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
6841
6842     clock_get_uptime(&start);
6843
6844     if (!preflight) {
6845         hibernate_page_list_zero(page_list);
6846         hibernate_page_list_zero(page_list_wired);
6847         hibernate_page_list_zero(page_list_pal);
6848
6849         hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
6850         hibernate_stats.cd_pages = pages;
6851     }
6852
6853     if (vm_page_local_q) {
6854             for (i = 0; i < vm_page_local_q_count; i++)
6855                     vm_page_reactivate_local(i, TRUE, !preflight);
6856     }
6857
6858     if (preflight) {
6859         vm_object_lock(compressor_object);
6860         vm_page_lock_queues();
6861         lck_mtx_lock(&vm_page_queue_free_lock);
6862     }
6863
6864     m = (vm_page_t) hibernate_gobble_queue;
6865     while (m)
6866     {
6867         pages--;
6868         count_wire--;
6869         if (!preflight) {
6870             hibernate_page_bitset(page_list,       TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6871             hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6872         }
6873         m = m->snext;
6874     }
6875
6876     if (!preflight) for( i = 0; i < real_ncpus; i++ )
6877     {
6878         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
6879         {
6880         for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->snext)
6881             {
6882                 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
6883
6884                 pages--;
6885                 count_wire--;
6886                 hibernate_page_bitset(page_list,       TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6887                 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6888
6889                 hibernate_stats.cd_local_free++;
6890                 hibernate_stats.cd_total_free++;
6891             }
6892         }
6893     }
6894
6895     for( i = 0; i < vm_colors; i++ )
6896     {
6897         vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
6898                               m,
6899                               vm_page_t,
6900                               pageq)
6901         {
6902             assert(m->vm_page_q_state == VM_PAGE_ON_FREE_Q);
6903
6904             pages--;
6905             count_wire--;
6906             if (!preflight) {
6907                 hibernate_page_bitset(page_list,       TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6908                 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6909
6910                 hibernate_stats.cd_total_free++;
6911             }
6912         }
6913     }
6914
6915     vm_page_queue_iterate(&vm_lopage_queue_free,
6916                           m,
6917                           vm_page_t,
6918                           pageq)
6919     {
6920         assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
6921
6922         pages--;
6923         count_wire--;
6924         if (!preflight) {
6925             hibernate_page_bitset(page_list,       TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6926             hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6927
6928             hibernate_stats.cd_total_free++;
6929         }
6930     }
6931
6932     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
6933     while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
6934     {
6935         assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
6936
6937         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6938         discard = FALSE;
6939         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6940          && hibernate_consider_discard(m, preflight))
6941         {
6942             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6943             count_discard_inactive++;
6944             discard = discard_all;
6945         }
6946         else
6947             count_throttled++;
6948         count_wire--;
6949         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6950
6951         if (discard) hibernate_discard_page(m);
6952         m = next;
6953     }
6954
6955     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
6956     while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
6957     {
6958         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
6959
6960         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6961         discard = FALSE;
6962         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6963          && hibernate_consider_discard(m, preflight))
6964         {
6965             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6966             if (m->dirty)
6967                 count_discard_purgeable++;
6968             else
6969                 count_discard_inactive++;
6970             discard = discard_all;
6971         }
6972         else
6973             count_anonymous++;
6974         count_wire--;
6975         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6976         if (discard)    hibernate_discard_page(m);
6977         m = next;
6978     }
6979
6980     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
6981     while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
6982     {
6983         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
6984
6985         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6986         discard = FALSE;
6987         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6988          && hibernate_consider_discard(m, preflight))
6989         {
6990             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6991             if (m->dirty)
6992                 count_discard_purgeable++;
6993             else
6994                 count_discard_cleaned++;
6995             discard = discard_all;
6996         }
6997         else
6998             count_cleaned++;
6999         count_wire--;
7000         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7001         if (discard)    hibernate_discard_page(m);
7002         m = next;
7003     }
7004
7005     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7006     while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7007     {
7008         assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7009
7010         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7011         discard = FALSE;
7012         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
7013          && hibernate_consider_discard(m, preflight))
7014         {
7015             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7016             if (m->dirty)
7017                 count_discard_purgeable++;
7018             else
7019                 count_discard_active++;
7020             discard = discard_all;
7021         }
7022         else
7023             count_active++;
7024         count_wire--;
7025         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7026         if (discard)    hibernate_discard_page(m);
7027         m = next;
7028     }
7029
7030     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7031     while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7032     {
7033         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7034
7035         next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7036         discard = FALSE;
7037         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7038          && hibernate_consider_discard(m, preflight))
7039         {
7040             if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7041             if (m->dirty)
7042                 count_discard_purgeable++;
7043             else
7044                 count_discard_inactive++;
7045             discard = discard_all;
7046         }
7047         else
7048             count_inactive++;
7049         count_wire--;
7050         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7051         if (discard)    hibernate_discard_page(m);
7052         m = next;
7053     }
7054     /* XXX FBDP TODO: secluded queue */
7055
7056     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7057     {
7058         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7059         while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7060         {
7061             assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7062
7063             next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7064             discard = FALSE;
7065             if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7066              && hibernate_consider_discard(m, preflight))
7067             {
7068                 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7069                 count_discard_speculative++;
7070                 discard = discard_all;
7071             }
7072             else
7073                 count_speculative++;
7074             count_wire--;
7075             if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7076             if (discard)    hibernate_discard_page(m);
7077             m = next;
7078         }
7079     }
7080
7081     vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
7082     {
7083         assert(m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR);
7084
7085         count_compressor++;
7086         count_wire--;
7087         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7088     }
7089
7090     if (preflight == FALSE && discard_all == TRUE) {
7091             KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
7092
7093             HIBLOG("hibernate_teardown started\n");
7094             count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7095             HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
7096
7097             pages -= count_discard_vm_struct_pages;
7098             count_wire -= count_discard_vm_struct_pages;
7099
7100             hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
7101
7102             KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
7103     }
7104
7105     if (!preflight) {
7106         // pull wired from hibernate_bitmap
7107         bitmap = &page_list->bank_bitmap[0];
7108         bitmap_wired = &page_list_wired->bank_bitmap[0];
7109         for (bank = 0; bank < page_list->bank_count; bank++)
7110         {
7111             for (i = 0; i < bitmap->bitmapwords; i++)
7112                 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7113             bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
7114             bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7115         }
7116     }
7117
7118     // machine dependent adjustments
7119     hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
7120
7121     if (!preflight) {
7122         hibernate_stats.cd_count_wire = count_wire;
7123         hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7124                 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
7125     }
7126
7127     clock_get_uptime(&end);
7128     absolutetime_to_nanoseconds(end - start, &nsec);
7129     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
7130
7131     HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n  %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7132            pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7133                 discard_all ? "did" : "could",
7134                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7135
7136     if (hibernate_stats.cd_skipped_xpmapped)
7137             HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7138
7139     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
7140
7141     if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
7142
7143 #if MACH_ASSERT || DEBUG
7144     if (!preflight)
7145     {
7146         if (vm_page_local_q) {
7147             for (i = 0; i < vm_page_local_q_count; i++) {
7148                 struct vpl      *lq;
7149                 lq = &vm_page_local_q[i].vpl_un.vpl;
7150                 VPL_UNLOCK(&lq->vpl_lock);
7151             }
7152         }
7153         vm_page_unlock_queues();
7154     }
7155 #endif  /* MACH_ASSERT || DEBUG */
7156
7157     if (preflight) {
7158         lck_mtx_unlock(&vm_page_queue_free_lock);
7159         vm_page_unlock_queues();
7160         vm_object_unlock(compressor_object);
7161     }
7162
7163     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
7164 }
7165
7166 void
7167 hibernate_page_list_discard(hibernate_page_list_t * page_list)
7168 {
7169     uint64_t  start, end, nsec;
7170     vm_page_t m;
7171     vm_page_t next;
7172     uint32_t  i;
7173     uint32_t  count_discard_active    = 0;
7174     uint32_t  count_discard_inactive  = 0;
7175     uint32_t  count_discard_purgeable = 0;
7176     uint32_t  count_discard_cleaned   = 0;
7177     uint32_t  count_discard_speculative = 0;
7178
7179
7180 #if MACH_ASSERT || DEBUG
7181         vm_page_lock_queues();
7182         if (vm_page_local_q) {
7183             for (i = 0; i < vm_page_local_q_count; i++) {
7184                 struct vpl      *lq;
7185                 lq = &vm_page_local_q[i].vpl_un.vpl;
7186                 VPL_LOCK(&lq->vpl_lock);
7187             }
7188         }
7189 #endif  /* MACH_ASSERT || DEBUG */
7190
7191     clock_get_uptime(&start);
7192
7193     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7194     while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
7195     {
7196         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7197
7198         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7199         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7200         {
7201             if (m->dirty)
7202                 count_discard_purgeable++;
7203             else
7204                 count_discard_inactive++;
7205             hibernate_discard_page(m);
7206         }
7207         m = next;
7208     }
7209
7210     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7211     {
7212        m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7213        while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7214        {
7215            assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7216
7217            next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7218            if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7219            {
7220                count_discard_speculative++;
7221                hibernate_discard_page(m);
7222            }
7223            m = next;
7224        }
7225     }
7226
7227     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7228     while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7229     {
7230         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7231
7232         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7233         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7234         {
7235             if (m->dirty)
7236                 count_discard_purgeable++;
7237             else
7238                 count_discard_inactive++;
7239             hibernate_discard_page(m);
7240         }
7241         m = next;
7242     }
7243     /* XXX FBDP TODO: secluded queue */
7244
7245     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7246     while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7247     {
7248         assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7249
7250         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7251         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7252         {
7253             if (m->dirty)
7254                 count_discard_purgeable++;
7255             else
7256                 count_discard_active++;
7257             hibernate_discard_page(m);
7258         }
7259         m = next;
7260     }
7261
7262     m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7263     while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
7264     {
7265         assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7266
7267         next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7268         if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7269         {
7270             if (m->dirty)
7271                 count_discard_purgeable++;
7272             else
7273                 count_discard_cleaned++;
7274             hibernate_discard_page(m);
7275         }
7276         m = next;
7277     }
7278
7279 #if MACH_ASSERT || DEBUG
7280         if (vm_page_local_q) {
7281             for (i = 0; i < vm_page_local_q_count; i++) {
7282                 struct vpl      *lq;
7283                 lq = &vm_page_local_q[i].vpl_un.vpl;
7284                 VPL_UNLOCK(&lq->vpl_lock);
7285             }
7286         }
7287         vm_page_unlock_queues();
7288 #endif  /* MACH_ASSERT || DEBUG */
7289
7290     clock_get_uptime(&end);
7291     absolutetime_to_nanoseconds(end - start, &nsec);
7292     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
7293                 nsec / 1000000ULL,
7294                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7295 }
7296
7297 boolean_t       hibernate_paddr_map_inited = FALSE;
7298 unsigned int    hibernate_teardown_last_valid_compact_indx = -1;
7299 vm_page_t       hibernate_rebuild_hash_list = NULL;
7300
7301 unsigned int    hibernate_teardown_found_tabled_pages = 0;
7302 unsigned int    hibernate_teardown_found_created_pages = 0;
7303 unsigned int    hibernate_teardown_found_free_pages = 0;
7304 unsigned int    hibernate_teardown_vm_page_free_count;
7305
7306
7307 struct ppnum_mapping {
7308         struct ppnum_mapping    *ppnm_next;
7309         ppnum_t                 ppnm_base_paddr;
7310         unsigned int            ppnm_sindx;
7311         unsigned int            ppnm_eindx;
7312 };
7313
7314 struct ppnum_mapping    *ppnm_head;
7315 struct ppnum_mapping    *ppnm_last_found = NULL;
7316
7317
7318 void
7319 hibernate_create_paddr_map()
7320 {
7321         unsigned int    i;
7322         ppnum_t         next_ppnum_in_run = 0;
7323         struct ppnum_mapping *ppnm = NULL;
7324
7325         if (hibernate_paddr_map_inited == FALSE) {
7326
7327                 for (i = 0; i < vm_pages_count; i++) {
7328
7329                         if (ppnm)
7330                                 ppnm->ppnm_eindx = i;
7331
7332                         if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
7333
7334                                 ppnm = kalloc(sizeof(struct ppnum_mapping));
7335
7336                                 ppnm->ppnm_next = ppnm_head;
7337                                 ppnm_head = ppnm;
7338
7339                                 ppnm->ppnm_sindx = i;
7340                                 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
7341                         }
7342                         next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
7343                 }
7344                 ppnm->ppnm_eindx++;
7345
7346                 hibernate_paddr_map_inited = TRUE;
7347         }
7348 }
7349
7350 ppnum_t
7351 hibernate_lookup_paddr(unsigned int indx)
7352 {
7353         struct ppnum_mapping *ppnm = NULL;
7354
7355         ppnm = ppnm_last_found;
7356
7357         if (ppnm) {
7358                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7359                         goto done;
7360         }
7361         for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7362
7363                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7364                         ppnm_last_found = ppnm;
7365                         break;
7366                 }
7367         }
7368         if (ppnm == NULL)
7369                 panic("hibernate_lookup_paddr of %d failed\n", indx);
7370 done:
7371         return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7372 }
7373
7374
7375 uint32_t
7376 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7377 {
7378         addr64_t        saddr_aligned;
7379         addr64_t        eaddr_aligned;
7380         addr64_t        addr;
7381         ppnum_t         paddr;
7382         unsigned int    mark_as_unneeded_pages = 0;
7383
7384         saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7385         eaddr_aligned = eaddr & ~PAGE_MASK_64;
7386
7387         for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7388
7389                 paddr = pmap_find_phys(kernel_pmap, addr);
7390
7391                 assert(paddr);
7392
7393                 hibernate_page_bitset(page_list,       TRUE, paddr);
7394                 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7395
7396                 mark_as_unneeded_pages++;
7397         }
7398         return (mark_as_unneeded_pages);
7399 }
7400
7401
7402 void
7403 hibernate_hash_insert_page(vm_page_t mem)
7404 {
7405         vm_page_bucket_t *bucket;
7406         int             hash_id;
7407         vm_object_t     m_object;
7408
7409         m_object = VM_PAGE_OBJECT(mem);
7410
7411         assert(mem->hashed);
7412         assert(m_object);
7413         assert(mem->offset != (vm_object_offset_t) -1);
7414
7415         /*
7416          *      Insert it into the object_object/offset hash table
7417          */
7418         hash_id = vm_page_hash(m_object, mem->offset);
7419         bucket = &vm_page_buckets[hash_id];
7420
7421         mem->next_m = bucket->page_list;
7422         bucket->page_list = VM_PAGE_PACK_PTR(mem);
7423 }
7424
7425
7426 void
7427 hibernate_free_range(int sindx, int eindx)
7428 {
7429         vm_page_t       mem;
7430         unsigned int    color;
7431
7432         while (sindx < eindx) {
7433                 mem = &vm_pages[sindx];
7434
7435                 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7436
7437                 mem->lopage = FALSE;
7438                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7439
7440                 color = VM_PAGE_GET_COLOR(mem);
7441 #if defined(__x86_64__)
7442                 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
7443                                           mem,
7444                                           vm_page_t,
7445                                           pageq);
7446 #else
7447                 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
7448                                           mem,
7449                                           vm_page_t,
7450                                           pageq);
7451 #endif
7452                 vm_page_free_count++;
7453
7454                 sindx++;
7455         }
7456 }
7457
7458
7459 extern void hibernate_rebuild_pmap_structs(void);
7460
7461 void
7462 hibernate_rebuild_vm_structs(void)
7463 {
7464         int             i, cindx, sindx, eindx;
7465         vm_page_t       mem, tmem, mem_next;
7466         AbsoluteTime    startTime, endTime;
7467         uint64_t        nsec;
7468
7469         if (hibernate_rebuild_needed == FALSE)
7470                 return;
7471
7472         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
7473         HIBLOG("hibernate_rebuild started\n");
7474
7475         clock_get_uptime(&startTime);
7476
7477         hibernate_rebuild_pmap_structs();
7478
7479         bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7480         eindx = vm_pages_count;
7481
7482         /*
7483          * Mark all the vm_pages[] that have not been initialized yet as being
7484          * transient. This is needed to ensure that buddy page search is corrrect.
7485          * Without this random data in these vm_pages[] can trip the buddy search
7486          */
7487         for (i = hibernate_teardown_last_valid_compact_indx+1; i < eindx; ++i)
7488                 vm_pages[i].vm_page_q_state = VM_PAGE_NOT_ON_Q;
7489
7490         for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7491
7492                 mem = &vm_pages[cindx];
7493                 assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
7494                 /*
7495                  * hibernate_teardown_vm_structs leaves the location where
7496                  * this vm_page_t must be located in "next".
7497                  */
7498                 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7499                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
7500
7501                 sindx = (int)(tmem - &vm_pages[0]);
7502
7503                 if (mem != tmem) {
7504                         /*
7505                          * this vm_page_t was moved by hibernate_teardown_vm_structs,
7506                          * so move it back to its real location
7507                          */
7508                         *tmem = *mem;
7509                         mem = tmem;
7510                 }
7511                 if (mem->hashed)
7512                         hibernate_hash_insert_page(mem);
7513                 /*
7514                  * the 'hole' between this vm_page_t and the previous
7515                  * vm_page_t we moved needs to be initialized as
7516                  * a range of free vm_page_t's
7517                  */
7518                 hibernate_free_range(sindx + 1, eindx);
7519
7520                 eindx = sindx;
7521         }
7522         if (sindx)
7523                 hibernate_free_range(0, sindx);
7524
7525         assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7526
7527         /*
7528          * process the list of vm_page_t's that were entered in the hash,
7529          * but were not located in the vm_pages arrary... these are
7530          * vm_page_t's that were created on the fly (i.e. fictitious)
7531          */
7532         for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
7533                 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7534
7535                 mem->next_m = 0;
7536                 hibernate_hash_insert_page(mem);
7537         }
7538         hibernate_rebuild_hash_list = NULL;
7539
7540         clock_get_uptime(&endTime);
7541         SUB_ABSOLUTETIME(&endTime, &startTime);
7542         absolutetime_to_nanoseconds(endTime, &nsec);
7543
7544         HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7545
7546         hibernate_rebuild_needed = FALSE;
7547
7548         KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
7549 }
7550
7551
7552 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7553
7554 uint32_t
7555 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7556 {
7557         unsigned int    i;
7558         unsigned int    compact_target_indx;
7559         vm_page_t       mem, mem_next;
7560         vm_page_bucket_t *bucket;
7561         unsigned int    mark_as_unneeded_pages = 0;
7562         unsigned int    unneeded_vm_page_bucket_pages = 0;
7563         unsigned int    unneeded_vm_pages_pages = 0;
7564         unsigned int    unneeded_pmap_pages = 0;
7565         addr64_t        start_of_unneeded = 0;
7566         addr64_t        end_of_unneeded = 0;
7567
7568
7569         if (hibernate_should_abort())
7570                 return (0);
7571
7572         hibernate_rebuild_needed = TRUE;
7573
7574         HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7575                vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7576                vm_page_cleaned_count, compressor_object->resident_page_count);
7577
7578         for (i = 0; i < vm_page_bucket_count; i++) {
7579
7580                 bucket = &vm_page_buckets[i];
7581
7582                 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
7583                         assert(mem->hashed);
7584
7585                         mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7586
7587                         if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
7588                                 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
7589                                 hibernate_rebuild_hash_list = mem;
7590                         }
7591                 }
7592         }
7593         unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7594         mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7595
7596         hibernate_teardown_vm_page_free_count = vm_page_free_count;
7597
7598         compact_target_indx = 0;
7599
7600         for (i = 0; i < vm_pages_count; i++) {
7601
7602                 mem = &vm_pages[i];
7603
7604                 if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7605                         unsigned int color;
7606
7607                         assert(mem->busy);
7608                         assert(!mem->lopage);
7609
7610                         color = VM_PAGE_GET_COLOR(mem);
7611
7612                         vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7613                                              mem,
7614                                              vm_page_t,
7615                                              pageq);
7616
7617                         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7618
7619                         vm_page_free_count--;
7620
7621                         hibernate_teardown_found_free_pages++;
7622
7623                         if (vm_pages[compact_target_indx].vm_page_q_state != VM_PAGE_ON_FREE_Q)
7624                                 compact_target_indx = i;
7625                 } else {
7626                         /*
7627                          * record this vm_page_t's original location
7628                          * we need this even if it doesn't get moved
7629                          * as an indicator to the rebuild function that
7630                          * we don't have to move it
7631                          */
7632                         mem->next_m = VM_PAGE_PACK_PTR(mem);
7633
7634                         if (vm_pages[compact_target_indx].vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7635                                 /*
7636                                  * we've got a hole to fill, so
7637                                  * move this vm_page_t to it's new home
7638                                  */
7639                                 vm_pages[compact_target_indx] = *mem;
7640                                 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7641
7642                                 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7643                                 compact_target_indx++;
7644                         } else
7645                                 hibernate_teardown_last_valid_compact_indx = i;
7646                 }
7647         }
7648         unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7649                                                              (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7650         mark_as_unneeded_pages += unneeded_vm_pages_pages;
7651
7652         hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7653
7654         if (start_of_unneeded) {
7655                 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7656                 mark_as_unneeded_pages += unneeded_pmap_pages;
7657         }
7658         HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7659
7660         return (mark_as_unneeded_pages);
7661 }
7662
7663
7664 #endif /* HIBERNATION */
7665
7666 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7667
7668 #include <mach_vm_debug.h>
7669 #if     MACH_VM_DEBUG
7670
7671 #include <mach_debug/hash_info.h>
7672 #include <vm/vm_debug.h>
7673
7674 /*
7675  *      Routine:        vm_page_info
7676  *      Purpose:
7677  *              Return information about the global VP table.
7678  *              Fills the buffer with as much information as possible
7679  *              and returns the desired size of the buffer.
7680  *      Conditions:
7681  *              Nothing locked.  The caller should provide
7682  *              possibly-pageable memory.
7683  */
7684
7685 unsigned int
7686 vm_page_info(
7687         hash_info_bucket_t *info,
7688         unsigned int count)
7689 {
7690         unsigned int i;
7691         lck_spin_t      *bucket_lock;
7692
7693         if (vm_page_bucket_count < count)
7694                 count = vm_page_bucket_count;
7695
7696         for (i = 0; i < count; i++) {
7697                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7698                 unsigned int bucket_count = 0;
7699                 vm_page_t m;
7700
7701                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7702                 lck_spin_lock(bucket_lock);
7703
7704                 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7705                      m != VM_PAGE_NULL;
7706                      m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->next_m)))
7707                         bucket_count++;
7708
7709                 lck_spin_unlock(bucket_lock);
7710
7711                 /* don't touch pageable memory while holding locks */
7712                 info[i].hib_count = bucket_count;
7713         }
7714
7715         return vm_page_bucket_count;
7716 }
7717 #endif  /* MACH_VM_DEBUG */
7718
7719 #if VM_PAGE_BUCKETS_CHECK
7720 void
7721 vm_page_buckets_check(void)
7722 {
7723         unsigned int i;
7724         vm_page_t p;
7725         unsigned int p_hash;
7726         vm_page_bucket_t *bucket;
7727         lck_spin_t      *bucket_lock;
7728
7729         if (!vm_page_buckets_check_ready) {
7730                 return;
7731         }
7732
7733 #if HIBERNATION
7734         if (hibernate_rebuild_needed ||
7735             hibernate_rebuild_hash_list) {
7736                 panic("BUCKET_CHECK: hibernation in progress: "
7737                       "rebuild_needed=%d rebuild_hash_list=%p\n",
7738                       hibernate_rebuild_needed,
7739                       hibernate_rebuild_hash_list);
7740         }
7741 #endif /* HIBERNATION */
7742
7743 #if VM_PAGE_FAKE_BUCKETS
7744         char *cp;
7745         for (cp = (char *) vm_page_fake_buckets_start;
7746              cp < (char *) vm_page_fake_buckets_end;
7747              cp++) {
7748                 if (*cp != 0x5a) {
7749                         panic("BUCKET_CHECK: corruption at %p in fake buckets "
7750                               "[0x%llx:0x%llx]\n",
7751                               cp,
7752                               (uint64_t) vm_page_fake_buckets_start,
7753                               (uint64_t) vm_page_fake_buckets_end);
7754                 }
7755         }
7756 #endif /* VM_PAGE_FAKE_BUCKETS */
7757
7758         for (i = 0; i < vm_page_bucket_count; i++) {
7759                 vm_object_t     p_object;
7760
7761                 bucket = &vm_page_buckets[i];
7762                 if (!bucket->page_list) {
7763                         continue;
7764                 }
7765
7766                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7767                 lck_spin_lock(bucket_lock);
7768                 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7769
7770                 while (p != VM_PAGE_NULL) {
7771                         p_object = VM_PAGE_OBJECT(p);
7772
7773                         if (!p->hashed) {
7774                                 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7775                                       "hash %d in bucket %d at %p "
7776                                       "is not hashed\n",
7777                                       p, p_object, p->offset,
7778                                       p_hash, i, bucket);
7779                         }
7780                         p_hash = vm_page_hash(p_object, p->offset);
7781                         if (p_hash != i) {
7782                                 panic("BUCKET_CHECK: corruption in bucket %d "
7783                                       "at %p: page %p object %p offset 0x%llx "
7784                                       "hash %d\n",
7785                                       i, bucket, p, p_object, p->offset,
7786                                       p_hash);
7787                         }
7788                         p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m));
7789                 }
7790                 lck_spin_unlock(bucket_lock);
7791         }
7792
7793 //      printf("BUCKET_CHECK: checked buckets\n");
7794 }
7795 #endif /* VM_PAGE_BUCKETS_CHECK */
7796
7797 /*
7798  * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7799  * local queues if they exist... its the only spot in the system where we add pages
7800  * to those queues...  once on those queues, those pages can only move to one of the
7801  * global page queues or the free queues... they NEVER move from local q to local q.
7802  * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7803  * the global vm_page_queue_lock at this point...  we still need to take the local lock
7804  * in case this operation is being run on a different CPU then the local queue's identity,
7805  * but we don't have to worry about the page moving to a global queue or becoming wired
7806  * while we're grabbing the local lock since those operations would require the global
7807  * vm_page_queue_lock to be held, and we already own it.
7808  *
7809  * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
7810  * 'wired' and local are ALWAYS mutually exclusive conditions.
7811  */
7812
7813 #if CONFIG_BACKGROUND_QUEUE
7814 void
7815 vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
7816 #else
7817 void
7818 vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
7819 #endif
7820 {
7821         boolean_t       was_pageable = TRUE;
7822         vm_object_t     m_object;
7823
7824         m_object = VM_PAGE_OBJECT(mem);
7825
7826         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7827
7828         if (mem->vm_page_q_state == VM_PAGE_NOT_ON_Q)
7829         {
7830                 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7831 #if CONFIG_BACKGROUND_QUEUE
7832                 if (remove_from_backgroundq == TRUE) {
7833                         vm_page_remove_from_backgroundq(mem);
7834                 }
7835                 if (mem->vm_page_on_backgroundq) {
7836                         assert(mem->vm_page_backgroundq.next != 0);
7837                         assert(mem->vm_page_backgroundq.prev != 0);
7838                 } else {
7839                         assert(mem->vm_page_backgroundq.next == 0);
7840                         assert(mem->vm_page_backgroundq.prev == 0);
7841                 }
7842 #endif /* CONFIG_BACKGROUND_QUEUE */
7843                 return;
7844         }
7845
7846         if (mem->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
7847         {
7848                 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7849 #if CONFIG_BACKGROUND_QUEUE
7850                 assert(mem->vm_page_backgroundq.next == 0 &&
7851                        mem->vm_page_backgroundq.prev == 0 &&
7852                        mem->vm_page_on_backgroundq == FALSE);
7853 #endif
7854                 return;
7855         }
7856         if (mem->vm_page_q_state == VM_PAGE_IS_WIRED) {
7857                 /*
7858                  * might put these guys on a list for debugging purposes
7859                  * if we do, we'll need to remove this assert
7860                  */
7861                 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7862 #if CONFIG_BACKGROUND_QUEUE
7863                 assert(mem->vm_page_backgroundq.next == 0 &&
7864                        mem->vm_page_backgroundq.prev == 0 &&
7865                        mem->vm_page_on_backgroundq == FALSE);
7866 #endif
7867                 return;
7868         }
7869
7870         assert(m_object != compressor_object);
7871         assert(m_object != kernel_object);
7872         assert(m_object != vm_submap_object);
7873         assert(!mem->fictitious);
7874
7875         switch(mem->vm_page_q_state) {
7876
7877         case VM_PAGE_ON_ACTIVE_LOCAL_Q:
7878         {
7879                 struct vpl      *lq;
7880
7881                 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
7882                 VPL_LOCK(&lq->vpl_lock);
7883                 vm_page_queue_remove(&lq->vpl_queue,
7884                                      mem, vm_page_t, pageq);
7885                 mem->local_id = 0;
7886                 lq->vpl_count--;
7887                 if (m_object->internal) {
7888                         lq->vpl_internal_count--;
7889                 } else {
7890                         lq->vpl_external_count--;
7891                 }
7892                 VPL_UNLOCK(&lq->vpl_lock);
7893                 was_pageable = FALSE;
7894                 break;
7895         }
7896         case VM_PAGE_ON_ACTIVE_Q:
7897         {
7898                 vm_page_queue_remove(&vm_page_queue_active,
7899                                      mem, vm_page_t, pageq);
7900                 vm_page_active_count--;
7901                 break;
7902         }
7903
7904         case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
7905         {
7906                 assert(m_object->internal == TRUE);
7907
7908                 vm_page_inactive_count--;
7909                 vm_page_queue_remove(&vm_page_queue_anonymous,
7910                                      mem, vm_page_t, pageq);
7911                 vm_page_anonymous_count--;
7912                 vm_purgeable_q_advance_all();
7913                 break;
7914         }
7915
7916         case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
7917         {
7918                 assert(m_object->internal == FALSE);
7919
7920                 vm_page_inactive_count--;
7921                 vm_page_queue_remove(&vm_page_queue_inactive,
7922                                      mem, vm_page_t, pageq);
7923                 vm_purgeable_q_advance_all();
7924                 break;
7925         }
7926
7927         case VM_PAGE_ON_INACTIVE_CLEANED_Q:
7928         {
7929                 assert(m_object->internal == FALSE);
7930
7931                 vm_page_inactive_count--;
7932                 vm_page_queue_remove(&vm_page_queue_cleaned,
7933                                      mem, vm_page_t, pageq);
7934                 vm_page_cleaned_count--;
7935                 break;
7936         }
7937
7938         case VM_PAGE_ON_THROTTLED_Q:
7939         {
7940                 assert(m_object->internal == TRUE);
7941
7942                 vm_page_queue_remove(&vm_page_queue_throttled,
7943                                      mem, vm_page_t, pageq);
7944                 vm_page_throttled_count--;
7945                 was_pageable = FALSE;
7946                 break;
7947         }
7948
7949         case VM_PAGE_ON_SPECULATIVE_Q:
7950         {
7951                 assert(m_object->internal == FALSE);
7952
7953                 vm_page_remque(&mem->pageq);
7954                 vm_page_speculative_count--;
7955                 break;
7956         }
7957
7958 #if CONFIG_SECLUDED_MEMORY
7959         case VM_PAGE_ON_SECLUDED_Q:
7960         {
7961                 vm_page_queue_remove(&vm_page_queue_secluded,
7962                                      mem, vm_page_t, pageq);
7963                 vm_page_secluded_count--;
7964                 if (m_object == VM_OBJECT_NULL) {
7965                         vm_page_secluded_count_free--;
7966                         was_pageable = FALSE;
7967                 } else {
7968                         assert(!m_object->internal);
7969                         vm_page_secluded_count_inuse--;
7970                         was_pageable = FALSE;
7971 //                      was_pageable = TRUE;
7972                 }
7973                 break;
7974         }
7975 #endif /* CONFIG_SECLUDED_MEMORY */
7976
7977         default:
7978         {
7979                 /*
7980                  *      if (mem->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
7981                  *              NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
7982                  *              the caller is responsible for determing if the page is on that queue, and if so, must
7983                  *              either first remove it (it needs both the page queues lock and the object lock to do
7984                  *              this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
7985                  *
7986                  *      we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
7987                  *      or any of the undefined states
7988                  */
7989                 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vm_page_q_state);
7990                 break;
7991         }
7992
7993         }
7994         VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7995         mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
7996
7997 #if CONFIG_BACKGROUND_QUEUE
7998         if (remove_from_backgroundq == TRUE)
7999                 vm_page_remove_from_backgroundq(mem);
8000 #endif
8001         if (was_pageable) {
8002                 if (m_object->internal) {
8003                         vm_page_pageable_internal_count--;
8004                 } else {
8005                         vm_page_pageable_external_count--;
8006                 }
8007         }
8008 }
8009
8010 void
8011 vm_page_remove_internal(vm_page_t page)
8012 {
8013         vm_object_t __object = VM_PAGE_OBJECT(page);
8014         if (page == __object->memq_hint) {
8015                 vm_page_t       __new_hint;
8016                 vm_page_queue_entry_t   __qe;
8017                 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->listq);
8018                 if (vm_page_queue_end(&__object->memq, __qe)) {
8019                         __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->listq);
8020                         if (vm_page_queue_end(&__object->memq, __qe)) {
8021                                 __qe = NULL;
8022                         }
8023                 }
8024                 __new_hint = (vm_page_t)((uintptr_t) __qe);
8025                 __object->memq_hint = __new_hint;
8026         }
8027         vm_page_queue_remove(&__object->memq, page, vm_page_t, listq);
8028 #if CONFIG_SECLUDED_MEMORY
8029         if (__object->eligible_for_secluded) {
8030                 vm_page_secluded.eligible_for_secluded--;
8031         }
8032 #endif /* CONFIG_SECLUDED_MEMORY */
8033 }
8034
8035 void
8036 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8037 {
8038         vm_object_t     m_object;
8039
8040         m_object = VM_PAGE_OBJECT(mem);
8041
8042         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8043         assert(!mem->fictitious);
8044         assert(!mem->laundry);
8045         assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8046         vm_page_check_pageable_safe(mem);
8047
8048         if (m_object->internal) {
8049                 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
8050
8051                 if (first == TRUE)
8052                         vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
8053                 else
8054                         vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
8055
8056                 vm_page_anonymous_count++;
8057                 vm_page_pageable_internal_count++;
8058         } else {
8059                 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
8060
8061                 if (first == TRUE)
8062                         vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
8063                 else
8064                         vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
8065
8066                 vm_page_pageable_external_count++;
8067         }
8068         vm_page_inactive_count++;
8069         token_new_pagecount++;
8070
8071 #if CONFIG_BACKGROUND_QUEUE
8072         if (mem->vm_page_in_background)
8073                 vm_page_add_to_backgroundq(mem, FALSE);
8074 #endif
8075 }
8076
8077 void
8078 vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8079 {
8080         vm_object_t     m_object;
8081
8082         m_object = VM_PAGE_OBJECT(mem);
8083
8084         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8085         assert(!mem->fictitious);
8086         assert(!mem->laundry);
8087         assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8088         vm_page_check_pageable_safe(mem);
8089
8090         mem->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
8091         if (first == TRUE)
8092                 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, pageq);
8093         else
8094                 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
8095         vm_page_active_count++;
8096
8097         if (m_object->internal) {
8098                 vm_page_pageable_internal_count++;
8099         } else {
8100                 vm_page_pageable_external_count++;
8101         }
8102
8103 #if CONFIG_BACKGROUND_QUEUE
8104         if (mem->vm_page_in_background)
8105                 vm_page_add_to_backgroundq(mem, FALSE);
8106 #endif
8107 }
8108
8109 /*
8110  * Pages from special kernel objects shouldn't
8111  * be placed on pageable queues.
8112  */
8113 void
8114 vm_page_check_pageable_safe(vm_page_t page)
8115 {
8116         vm_object_t     page_object;
8117
8118         page_object = VM_PAGE_OBJECT(page);
8119
8120         if (page_object == kernel_object) {
8121                 panic("vm_page_check_pageable_safe: trying to add page" \
8122                          "from kernel object (%p) to pageable queue", kernel_object);
8123         }
8124
8125         if (page_object == compressor_object) {
8126                 panic("vm_page_check_pageable_safe: trying to add page" \
8127                          "from compressor object (%p) to pageable queue", compressor_object);
8128         }
8129
8130         if (page_object == vm_submap_object) {
8131                 panic("vm_page_check_pageable_safe: trying to add page" \
8132                         "from submap object (%p) to pageable queue", vm_submap_object);
8133         }
8134 }
8135
8136 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8137  * wired page diagnose
8138  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8139
8140 #include <libkern/OSKextLibPrivate.h>
8141
8142 #define KA_SIZE(namelen, subtotalscount)        \
8143         (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8144
8145 #define KA_NAME(alloc)  \
8146         ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8147
8148 #define KA_NAME_LEN(alloc)      \
8149     (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8150
8151 vm_tag_t
8152 vm_tag_bt(void)
8153 {
8154     uintptr_t* frameptr;
8155     uintptr_t* frameptr_next;
8156     uintptr_t retaddr;
8157     uintptr_t kstackb, kstackt;
8158     const vm_allocation_site_t * site;
8159     thread_t cthread;
8160     kern_allocation_name_t name;
8161
8162     cthread = current_thread();
8163     if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
8164
8165     if ((name = thread_get_kernel_state(cthread)->allocation_name))
8166     {
8167         if (!name->tag) vm_tag_alloc(name);
8168         return name->tag;
8169     }
8170
8171     kstackb = cthread->kernel_stack;
8172     kstackt = kstackb + kernel_stack_size;
8173
8174     /* Load stack frame pointer (EBP on x86) into frameptr */
8175     frameptr = __builtin_frame_address(0);
8176     site = NULL;
8177     while (frameptr != NULL)
8178     {
8179         /* Verify thread stack bounds */
8180         if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
8181
8182         /* Next frame pointer is pointed to by the previous one */
8183         frameptr_next = (uintptr_t*) *frameptr;
8184
8185         /* Pull return address from one spot above the frame pointer */
8186         retaddr = *(frameptr + 1);
8187
8188
8189         if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
8190         {
8191                 site = OSKextGetAllocationSiteForCaller(retaddr);
8192                 break;
8193         }
8194         frameptr = frameptr_next;
8195     }
8196
8197     return (site ? site->tag : VM_KERN_MEMORY_NONE);
8198 }
8199
8200 static uint64_t free_tag_bits[VM_MAX_TAG_VALUE/64];
8201
8202 void
8203 vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
8204 {
8205     vm_tag_t tag;
8206     uint64_t avail;
8207     uint32_t idx;
8208     vm_allocation_site_t * prev;
8209
8210     if (site->tag) return;
8211
8212     idx = 0;
8213     while (TRUE)
8214     {
8215                 avail = free_tag_bits[idx];
8216                 if (avail)
8217                 {
8218                     tag = __builtin_clzll(avail);
8219                     avail &= ~(1ULL << (63 - tag));
8220                     free_tag_bits[idx] = avail;
8221                     tag += (idx << 6);
8222                     break;
8223                 }
8224                 idx++;
8225                 if (idx >= ARRAY_COUNT(free_tag_bits))
8226                 {
8227                     for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++)
8228                     {
8229                                 prev = vm_allocation_sites[idx];
8230                                 if (!prev)               continue;
8231                                 if (!KA_NAME_LEN(prev))  continue;
8232                                 if (!prev->tag)          continue;
8233                                 if (prev->total)         continue;
8234                                 if (1 != prev->refcount) continue;
8235
8236                                 assert(idx == prev->tag);
8237                                 tag = idx;
8238                                 prev->tag = VM_KERN_MEMORY_NONE;
8239                                 *releasesiteP = prev;
8240                                 break;
8241                     }
8242                     if (idx >= ARRAY_COUNT(vm_allocation_sites))
8243                         {
8244                                 tag = VM_KERN_MEMORY_ANY;
8245                         }
8246                     break;
8247                 }
8248     }
8249     site->tag = tag;
8250
8251     OSAddAtomic16(1, &site->refcount);
8252
8253     if (VM_KERN_MEMORY_ANY != tag) vm_allocation_sites[tag] = site;
8254
8255     if (tag > vm_allocation_tag_highest) vm_allocation_tag_highest = tag;
8256 }
8257
8258 static void
8259 vm_tag_free_locked(vm_tag_t tag)
8260 {
8261     uint64_t avail;
8262     uint32_t idx;
8263     uint64_t bit;
8264
8265     if (VM_KERN_MEMORY_ANY == tag) return;
8266
8267     idx = (tag >> 6);
8268     avail = free_tag_bits[idx];
8269     tag &= 63;
8270     bit = (1ULL << (63 - tag));
8271     assert(!(avail & bit));
8272     free_tag_bits[idx] = (avail | bit);
8273 }
8274
8275 static void
8276 vm_tag_init(void)
8277 {
8278     vm_tag_t tag;
8279     for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
8280     {
8281         vm_tag_free_locked(tag);
8282     }
8283
8284     for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++)
8285     {
8286         vm_tag_free_locked(tag);
8287     }
8288 }
8289
8290 vm_tag_t
8291 vm_tag_alloc(vm_allocation_site_t * site)
8292 {
8293     vm_tag_t tag;
8294     vm_allocation_site_t * releasesite;
8295
8296     if (VM_TAG_BT & site->flags)
8297     {
8298                 tag = vm_tag_bt();
8299                 if (VM_KERN_MEMORY_NONE != tag) return (tag);
8300     }
8301
8302     if (!site->tag)
8303     {
8304                 releasesite = NULL;
8305                 lck_spin_lock(&vm_allocation_sites_lock);
8306                 vm_tag_alloc_locked(site, &releasesite);
8307                 lck_spin_unlock(&vm_allocation_sites_lock);
8308         if (releasesite) kern_allocation_name_release(releasesite);
8309     }
8310
8311     return (site->tag);
8312 }
8313
8314 void
8315 vm_tag_update_size(vm_tag_t tag, int64_t delta)
8316 {
8317     vm_allocation_site_t * allocation;
8318     uint64_t prior;
8319
8320     assert(VM_KERN_MEMORY_NONE != tag);
8321     assert(tag < VM_MAX_TAG_VALUE);
8322
8323     allocation = vm_allocation_sites[tag];
8324     assert(allocation);
8325
8326     if (delta < 0) {
8327                 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
8328     }
8329     prior = OSAddAtomic64(delta, &allocation->total);
8330
8331 #if DEBUG || DEVELOPMENT
8332
8333     uint64_t new, peak;
8334         new = prior + delta;
8335     do
8336     {
8337         peak = allocation->peak;
8338         if (new <= peak) break;
8339     }
8340     while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8341
8342 #endif /* DEBUG || DEVELOPMENT */
8343
8344     if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) return;
8345
8346     if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8347 }
8348
8349 void
8350 kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
8351 {
8352     uint64_t prior;
8353
8354     if (delta < 0) {
8355                 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
8356     }
8357     prior = OSAddAtomic64(delta, &allocation->total);
8358
8359 #if DEBUG || DEVELOPMENT
8360
8361     uint64_t new, peak;
8362         new = prior + delta;
8363     do
8364     {
8365         peak = allocation->peak;
8366         if (new <= peak) break;
8367     }
8368     while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8369
8370 #endif /* DEBUG || DEVELOPMENT */
8371
8372     if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8373 }
8374
8375 #if VM_MAX_TAG_ZONES
8376
8377 void
8378 vm_allocation_zones_init(void)
8379 {
8380     kern_return_t ret;
8381         vm_offset_t       addr;
8382         vm_size_t     size;
8383
8384     size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **)
8385          + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8386
8387         ret = kernel_memory_allocate(kernel_map,
8388                                         &addr, round_page(size), 0,
8389                                         KMA_ZERO, VM_KERN_MEMORY_DIAG);
8390     assert(KERN_SUCCESS == ret);
8391
8392     vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
8393     addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **);
8394
8395     // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
8396     // in vm_tag_update_zone_size() won't recurse
8397     vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG]   = (vm_allocation_zone_total_t *) addr;
8398     addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8399     vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
8400 }
8401
8402 void
8403 vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
8404 {
8405     vm_allocation_zone_total_t * zone;
8406
8407     assert(VM_KERN_MEMORY_NONE != tag);
8408     assert(tag < VM_MAX_TAG_VALUE);
8409
8410     if (zidx >= VM_MAX_TAG_ZONES) return;
8411
8412         zone = vm_allocation_zone_totals[tag];
8413     if (!zone)
8414     {
8415         zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
8416         if (!zone) return;
8417         bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8418         if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag]))
8419                 {
8420                         kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8421                 }
8422     }
8423 }
8424
8425 void
8426 vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
8427 {
8428     vm_allocation_zone_total_t * zone;
8429     uint32_t new;
8430
8431     assert(VM_KERN_MEMORY_NONE != tag);
8432     assert(tag < VM_MAX_TAG_VALUE);
8433
8434     if (zidx >= VM_MAX_TAG_ZONES) return;
8435
8436         zone = vm_allocation_zone_totals[tag];
8437     assert(zone);
8438     zone += zidx;
8439
8440     /* the zone is locked */
8441     if (delta < 0)
8442     {
8443                 assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
8444             zone->total += delta;
8445     }
8446     else
8447     {
8448                 zone->total += delta;
8449                 if (zone->total > zone->peak) zone->peak = zone->total;
8450         if (dwaste)
8451         {
8452                         new = zone->waste;
8453                         if (zone->wastediv < 65536) zone->wastediv++;
8454                         else                        new -= (new >> 16);
8455                         __assert_only bool ov = os_add_overflow(new, dwaste, &new);
8456                         assert(!ov);
8457                         zone->waste = new;
8458         }
8459     }
8460 }
8461
8462 #endif /* VM_MAX_TAG_ZONES */
8463
8464 void
8465 kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
8466 {
8467     kern_allocation_name_t other;
8468         struct vm_allocation_total * total;
8469     uint32_t subidx;
8470
8471     subidx = 0;
8472     assert(VM_KERN_MEMORY_NONE != subtag);
8473         for (; subidx < allocation->subtotalscount; subidx++)
8474         {
8475                 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag)
8476                 {
8477                         allocation->subtotals[subidx].tag = subtag;
8478                         break;
8479                 }
8480                 if (subtag == allocation->subtotals[subidx].tag) break;
8481         }
8482     assert(subidx < allocation->subtotalscount);
8483     if (subidx >= allocation->subtotalscount) return;
8484
8485     total = &allocation->subtotals[subidx];
8486     other = vm_allocation_sites[subtag];
8487     assert(other);
8488
8489     if (delta < 0)
8490     {
8491                 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
8492             OSAddAtomic64(delta, &total->total);
8493                 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
8494         OSAddAtomic64(delta, &other->mapped);
8495     }
8496     else
8497     {
8498         OSAddAtomic64(delta, &other->mapped);
8499             OSAddAtomic64(delta, &total->total);
8500     }
8501 }
8502
8503 const char *
8504 kern_allocation_get_name(kern_allocation_name_t allocation)
8505 {
8506     return (KA_NAME(allocation));
8507 }
8508
8509 kern_allocation_name_t
8510 kern_allocation_name_allocate(const char * name, uint32_t subtotalscount)
8511 {
8512     uint32_t namelen;
8513
8514     namelen = (uint32_t) strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
8515
8516     kern_allocation_name_t allocation;
8517     allocation = kalloc(KA_SIZE(namelen, subtotalscount));
8518     bzero(allocation, KA_SIZE(namelen, subtotalscount));
8519
8520     allocation->refcount       = 1;
8521     allocation->subtotalscount = subtotalscount;
8522     allocation->flags          = (namelen << VM_TAG_NAME_LEN_SHIFT);
8523     strlcpy(KA_NAME(allocation), name, namelen + 1);
8524
8525     return (allocation);
8526 }
8527
8528 void
8529 kern_allocation_name_release(kern_allocation_name_t allocation)
8530 {
8531     assert(allocation->refcount > 0);
8532     if (1 == OSAddAtomic16(-1, &allocation->refcount))
8533     {
8534         kfree(allocation, KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
8535     }
8536 }
8537
8538 vm_tag_t
8539 kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
8540 {
8541     return (vm_tag_alloc(allocation));
8542 }
8543
8544 static void
8545 vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
8546 {
8547     if (!object->wired_page_count) return;
8548     if (object != kernel_object)
8549     {
8550                 assert(object->wire_tag < num_info);
8551                 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
8552     }
8553 }
8554
8555 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
8556                                      unsigned int num_info, vm_object_t object);
8557
8558 static void
8559 vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
8560                                   vm_page_iterate_proc proc, purgeable_q_t queue,
8561                                   int group)
8562 {
8563     vm_object_t object;
8564
8565     for (object = (vm_object_t) queue_first(&queue->objq[group]);
8566                 !queue_end(&queue->objq[group], (queue_entry_t) object);
8567                 object = (vm_object_t) queue_next(&object->objq))
8568     {
8569                 proc(info, num_info, object);
8570     }
8571 }
8572
8573 static void
8574 vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
8575                         vm_page_iterate_proc proc)
8576 {
8577     purgeable_q_t   volatile_q;
8578     queue_head_t  * nonvolatile_q;
8579     vm_object_t     object;
8580     int             group;
8581
8582     lck_spin_lock(&vm_objects_wired_lock);
8583     queue_iterate(&vm_objects_wired,
8584                   object,
8585                   vm_object_t,
8586                   objq)
8587     {
8588                 proc(info, num_info, object);
8589     }
8590     lck_spin_unlock(&vm_objects_wired_lock);
8591
8592     lck_mtx_lock(&vm_purgeable_queue_lock);
8593     nonvolatile_q = &purgeable_nonvolatile_queue;
8594     for (object = (vm_object_t) queue_first(nonvolatile_q);
8595                  !queue_end(nonvolatile_q, (queue_entry_t) object);
8596                  object = (vm_object_t) queue_next(&object->objq))
8597     {
8598                 proc(info, num_info, object);
8599     }
8600
8601     volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
8602     vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, 0);
8603
8604     volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
8605     for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8606     {
8607                 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, group);
8608     }
8609
8610     volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
8611     for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8612     {
8613                 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, group);
8614     }
8615     lck_mtx_unlock(&vm_purgeable_queue_lock);
8616 }
8617
8618 static uint64_t
8619 process_account(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, boolean_t iterated)
8620 {
8621     size_t                 namelen;
8622     unsigned int           idx, count, nextinfo;
8623     vm_allocation_site_t * site;
8624         lck_spin_lock(&vm_allocation_sites_lock);
8625
8626         for (idx = 0; idx <= vm_allocation_tag_highest; idx++)
8627     {
8628                 site = vm_allocation_sites[idx];
8629                 if (!site) continue;
8630                 info[idx].mapped = site->mapped;
8631                 info[idx].tag    = site->tag;
8632         if (!iterated)
8633         {
8634                         info[idx].size = site->total;
8635 #if DEBUG || DEVELOPMENT
8636                         info[idx].peak = site->peak;
8637 #endif /* DEBUG || DEVELOPMENT */
8638         }
8639         else
8640         {
8641                         if (!site->subtotalscount && (site->total != info[idx].size))
8642                         {
8643                             printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
8644                             info[idx].size = site->total;
8645                         }
8646         }
8647     }
8648
8649     nextinfo = (vm_allocation_tag_highest + 1);
8650     count    = nextinfo;
8651     if (count >= num_info) count = num_info;
8652
8653     for (idx = 0; idx < count; idx++)
8654     {
8655                 site = vm_allocation_sites[idx];
8656                 if (!site) continue;
8657                 info[idx].flags |= VM_KERN_SITE_WIRED;
8658                 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8659                 {
8660                     info[idx].site   = idx;
8661                     info[idx].flags |= VM_KERN_SITE_TAG;
8662                     if (VM_KERN_MEMORY_ZONE == idx)
8663                     {
8664                                 info[idx].flags |= VM_KERN_SITE_HIDE;
8665                                 info[idx].flags &= ~VM_KERN_SITE_WIRED;
8666                                 info[idx].collectable_bytes = zones_collectable_bytes;
8667                         }
8668                 }
8669                 else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT))))
8670                 {
8671                     info[idx].site   = 0;
8672                     info[idx].flags |= VM_KERN_SITE_NAMED;
8673                     if (namelen > sizeof(info[idx].name)) namelen = sizeof(info[idx].name);
8674                     strncpy(&info[idx].name[0], KA_NAME(site), namelen);
8675                 }
8676                 else if (VM_TAG_KMOD & site->flags)
8677                 {
8678                     info[idx].site   = OSKextGetKmodIDForSite(site, NULL, 0);
8679                     info[idx].flags |= VM_KERN_SITE_KMOD;
8680                 }
8681                 else
8682                 {
8683                     info[idx].site   = VM_KERNEL_UNSLIDE(site);
8684                     info[idx].flags |= VM_KERN_SITE_KERNEL;
8685                 }
8686 #if VM_MAX_TAG_ZONES
8687                 vm_allocation_zone_total_t * zone;
8688                 unsigned int                 zidx;
8689                 vm_size_t                    elem_size;
8690
8691         if (vm_allocation_zone_totals
8692                         && (zone = vm_allocation_zone_totals[idx])
8693                         && (nextinfo < num_info))
8694         {
8695             for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++)
8696             {
8697                 if (!zone[zidx].peak)              continue;
8698                                 info[nextinfo]                   = info[idx];
8699                                 info[nextinfo].zone              = zone_index_from_tag_index(zidx, &elem_size);
8700                                 info[nextinfo].flags            &= ~VM_KERN_SITE_WIRED;
8701                                 info[nextinfo].flags            |= VM_KERN_SITE_ZONE;
8702                                 info[nextinfo].size              = zone[zidx].total;
8703                                 info[nextinfo].peak              = zone[zidx].peak;
8704                                 info[nextinfo].mapped            = 0;
8705                 if (zone[zidx].wastediv)
8706                                 {
8707                                         info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
8708                                 }
8709                 nextinfo++;
8710              }
8711         }
8712 #endif /* VM_MAX_TAG_ZONES */
8713         if (site->subtotalscount)
8714         {
8715                         uint64_t mapped, mapcost, take;
8716                         uint32_t sub;
8717                         vm_tag_t alloctag;
8718
8719             info[idx].size = site->total;
8720             mapped = info[idx].size;
8721             info[idx].mapped = mapped;
8722             mapcost = 0;
8723             for (sub = 0; sub < site->subtotalscount; sub++)
8724             {
8725                                 alloctag = site->subtotals[sub].tag;
8726                                 assert(alloctag < num_info);
8727                                 if (info[alloctag].name[0]) continue;
8728                                 take = info[alloctag].mapped;
8729                                 if (take > info[alloctag].size) take = info[alloctag].size;
8730                                 if (take > mapped) take = mapped;
8731                                 info[alloctag].mapped  -= take;
8732                                 info[alloctag].size    -= take;
8733                                 mapped                 -= take;
8734                                 mapcost                += take;
8735             }
8736             info[idx].size = mapcost;
8737         }
8738         }
8739         lck_spin_unlock(&vm_allocation_sites_lock);
8740
8741     return (0);
8742 }
8743
8744 uint32_t
8745 vm_page_diagnose_estimate(void)
8746 {
8747     vm_allocation_site_t * site;
8748     uint32_t               count;
8749     uint32_t               idx;
8750
8751         lck_spin_lock(&vm_allocation_sites_lock);
8752         for (count = idx = 0; idx < VM_MAX_TAG_VALUE; idx++)
8753     {
8754                 site = vm_allocation_sites[idx];
8755                 if (!site) continue;
8756                 count++;
8757 #if VM_MAX_TAG_ZONES
8758                 if (vm_allocation_zone_totals)
8759                 {
8760                         vm_allocation_zone_total_t * zone;
8761                         zone = vm_allocation_zone_totals[idx];
8762                         if (!zone) continue;
8763                         for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) if (zone[zidx].peak) count++;
8764                 }
8765 #endif
8766     }
8767         lck_spin_unlock(&vm_allocation_sites_lock);
8768
8769     /* some slop for new tags created */
8770     count += 8;
8771     count += VM_KERN_COUNTER_COUNT;
8772
8773     return (count);
8774 }
8775
8776
8777 kern_return_t
8778 vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
8779 {
8780     uint64_t                 wired_size;
8781     uint64_t                 wired_managed_size;
8782     uint64_t                 wired_reserved_size;
8783     uint64_t                 booter_size;
8784     boolean_t                iterate;
8785     mach_memory_info_t     * counts;
8786
8787     bzero(info, num_info * sizeof(mach_memory_info_t));
8788
8789     if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8790
8791 #if CONFIG_EMBEDDED
8792     wired_size          = ptoa_64(vm_page_wire_count);
8793     wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
8794 #else
8795     wired_size          = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8796     wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
8797 #endif
8798     wired_managed_size  = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8799
8800     booter_size = ml_get_booter_memory_size();
8801     wired_size += booter_size;
8802
8803     assert(num_info >= VM_KERN_COUNTER_COUNT);
8804     num_info -= VM_KERN_COUNTER_COUNT;
8805     counts = &info[num_info];
8806
8807 #define SET_COUNT(xcount, xsize, xflags)                        \
8808     counts[xcount].tag   = VM_MAX_TAG_VALUE + xcount;   \
8809     counts[xcount].site  = (xcount);                            \
8810     counts[xcount].size  = (xsize);                                 \
8811     counts[xcount].mapped  = (xsize);                           \
8812     counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8813
8814     SET_COUNT(VM_KERN_COUNT_MANAGED,              ptoa_64(vm_page_pages),        0);
8815     SET_COUNT(VM_KERN_COUNT_WIRED,                wired_size,                    0);
8816     SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED,        wired_managed_size,            0);
8817     SET_COUNT(VM_KERN_COUNT_RESERVED,             wired_reserved_size,           VM_KERN_SITE_WIRED);
8818     SET_COUNT(VM_KERN_COUNT_STOLEN,               ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8819     SET_COUNT(VM_KERN_COUNT_LOPAGE,               ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
8820     SET_COUNT(VM_KERN_COUNT_WIRED_BOOT,           ptoa_64(vm_page_wire_count_on_boot), 0);
8821     SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN,          booter_size,                   VM_KERN_SITE_WIRED);
8822
8823 #define SET_MAP(xcount, xsize, xfree, xlargest) \
8824     counts[xcount].site    = (xcount);                  \
8825     counts[xcount].size    = (xsize);                   \
8826     counts[xcount].mapped  = (xsize);                   \
8827     counts[xcount].free    = (xfree);                   \
8828     counts[xcount].largest = (xlargest);                \
8829     counts[xcount].flags   = VM_KERN_SITE_COUNTER;
8830
8831     vm_map_size_t map_size, map_free, map_largest;
8832
8833     vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
8834     SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
8835
8836     vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
8837     SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
8838
8839     vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
8840     SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
8841
8842     iterate = !VM_TAG_ACTIVE_UPDATE;
8843     if (iterate)
8844         {
8845                 enum                       { kMaxKernelDepth = 1 };
8846                 vm_map_t                     maps   [kMaxKernelDepth];
8847                 vm_map_entry_t               entries[kMaxKernelDepth];
8848                 vm_map_t                     map;
8849                 vm_map_entry_t               entry;
8850                 vm_object_offset_t           offset;
8851                 vm_page_t                    page;
8852                 int                          stackIdx, count;
8853
8854             vm_page_iterate_objects(info, num_info, &vm_page_count_object);
8855
8856             map = kernel_map;
8857             stackIdx = 0;
8858             while (map)
8859             {
8860                         vm_map_lock(map);
8861                         for (entry = map->hdr.links.next; map; entry = entry->links.next)
8862                         {
8863                             if (entry->is_sub_map)
8864                             {
8865                                         assert(stackIdx < kMaxKernelDepth);
8866                                         maps[stackIdx] = map;
8867                                         entries[stackIdx] = entry;
8868                                         stackIdx++;
8869                                         map = VME_SUBMAP(entry);
8870                                         entry = NULL;
8871                                         break;
8872                             }
8873                             if (VME_OBJECT(entry) == kernel_object)
8874                             {
8875                                         count = 0;
8876                                         vm_object_lock(VME_OBJECT(entry));
8877                                         for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
8878                                         {
8879                                                 page = vm_page_lookup(VME_OBJECT(entry), offset);
8880                                                 if (page && VM_PAGE_WIRED(page)) count++;
8881                                         }
8882                                         vm_object_unlock(VME_OBJECT(entry));
8883
8884                                         if (count)
8885                                         {
8886                                             assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
8887                                             assert(VME_ALIAS(entry) < num_info);
8888                                             info[VME_ALIAS(entry)].size += ptoa_64(count);
8889                                         }
8890                             }
8891                             while (map && (entry == vm_map_last_entry(map)))
8892                             {
8893                                         vm_map_unlock(map);
8894                                         if (!stackIdx) map = NULL;
8895                                         else
8896                                         {
8897                                             --stackIdx;
8898                                             map = maps[stackIdx];
8899                                             entry = entries[stackIdx];
8900                                         }
8901                             }
8902                         }
8903             }
8904     }
8905
8906     process_account(info, num_info, zones_collectable_bytes, iterate);
8907
8908     return (KERN_SUCCESS);
8909 }
8910
8911 #if DEBUG || DEVELOPMENT
8912
8913 kern_return_t
8914 vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
8915 {
8916     kern_return_t  ret;
8917     vm_size_t      zsize;
8918     vm_map_t       map;
8919     vm_map_entry_t entry;
8920
8921     zsize = zone_element_info((void *) addr, tag);
8922     if (zsize)
8923     {
8924                 *zone_size = *size = zsize;
8925                 return (KERN_SUCCESS);
8926     }
8927
8928         *zone_size = 0;
8929     ret = KERN_INVALID_ADDRESS;
8930     for (map = kernel_map; map; )
8931         {
8932                 vm_map_lock(map);
8933                 if (!vm_map_lookup_entry(map, addr, &entry)) break;
8934             if (entry->is_sub_map)
8935             {
8936             if (map != kernel_map)                   break;
8937                         map = VME_SUBMAP(entry);
8938                         continue;
8939             }
8940                 if (entry->vme_start != addr)                break;
8941                 *tag = VME_ALIAS(entry);
8942                 *size = (entry->vme_end - addr);
8943                 ret = KERN_SUCCESS;
8944                 break;
8945         }
8946         if (map != kernel_map) vm_map_unlock(map);
8947         vm_map_unlock(kernel_map);
8948
8949         return (ret);
8950 }
8951
8952 #endif /* DEBUG || DEVELOPMENT */
8953
8954 uint32_t
8955 vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
8956 {
8957     vm_allocation_site_t * site;
8958     uint32_t               kmodId;
8959
8960     kmodId = 0;
8961     lck_spin_lock(&vm_allocation_sites_lock);
8962     if ((site = vm_allocation_sites[tag]))
8963     {
8964         if (VM_TAG_KMOD & site->flags)
8965         {
8966             kmodId = OSKextGetKmodIDForSite(site, name, namelen);
8967         }
8968     }
8969     lck_spin_unlock(&vm_allocation_sites_lock);
8970
8971     return (kmodId);
8972 }