osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <kern/ledger.h>
  80 #include <vm/pmap.h>
  81 #include <vm/vm_init.h>
  82 #include <vm/vm_map.h>
  83 #include <vm/vm_page.h>
  84 #include <vm/vm_pageout.h>
  85 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  86 #include <kern/misc_protos.h>
  87 #include <zone_debug.h>
  88 #include <vm/cpm.h>
  89 #include <pexpert/pexpert.h>
  90
  91 #include <vm/vm_protos.h>
  92 #include <vm/memory_object.h>
  93 #include <vm/vm_purgeable_internal.h>
  94 #include <vm/vm_compressor.h>
  95
  96 #if CONFIG_PHANTOM_CACHE
  97 #include <vm/vm_phantom_cache.h>
  98 #endif
  99
 100 #include <IOKit/IOHibernatePrivate.h>
 101
 102 #include <sys/kdebug.h>
 103
 104 boolean_t       hibernate_cleaning_in_progress = FALSE;
 105 boolean_t       vm_page_free_verify = TRUE;
 106
 107 uint32_t        vm_lopage_free_count = 0;
 108 uint32_t        vm_lopage_free_limit = 0;
 109 uint32_t        vm_lopage_lowater    = 0;
 110 boolean_t       vm_lopage_refill = FALSE;
 111 boolean_t       vm_lopage_needed = FALSE;
 112
 113 lck_mtx_ext_t   vm_page_queue_lock_ext;
 114 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 115 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 116
 117 int             speculative_age_index = 0;
 118 int             speculative_steal_index = 0;
 119 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 120
 121
 122 __private_extern__ void         vm_page_init_lck_grp(void);
 123
 124 static void             vm_page_free_prepare(vm_page_t  page);
 125 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 126
 127
 128
 129
 130 /*
 131  *      Associated with page of user-allocatable memory is a
 132  *      page structure.
 133  */
 134
 135 /*
 136  *      These variables record the values returned by vm_page_bootstrap,
 137  *      for debugging purposes.  The implementation of pmap_steal_memory
 138  *      and pmap_startup here also uses them internally.
 139  */
 140
 141 vm_offset_t virtual_space_start;
 142 vm_offset_t virtual_space_end;
 143 uint32_t        vm_page_pages;
 144
 145 /*
 146  *      The vm_page_lookup() routine, which provides for fast
 147  *      (virtual memory object, offset) to page lookup, employs
 148  *      the following hash table.  The vm_page_{insert,remove}
 149  *      routines install and remove associations in the table.
 150  *      [This table is often called the virtual-to-physical,
 151  *      or VP, table.]
 152  */
 153 typedef struct {
 154         vm_page_packed_t page_list;
 155 #if     MACH_PAGE_HASH_STATS
 156         int             cur_count;              /* current count */
 157         int             hi_count;               /* high water mark */
 158 #endif /* MACH_PAGE_HASH_STATS */
 159 } vm_page_bucket_t;
 160
 161
 162 #define BUCKETS_PER_LOCK        16
 163
 164 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 165 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 166 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 167 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 168 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 169 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 170
 171 lck_spin_t      *vm_page_bucket_locks;
 172
 173 #if VM_PAGE_BUCKETS_CHECK
 174 boolean_t vm_page_buckets_check_ready = FALSE;
 175 #if VM_PAGE_FAKE_BUCKETS
 176 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
 177 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
 178 #endif /* VM_PAGE_FAKE_BUCKETS */
 179 #endif /* VM_PAGE_BUCKETS_CHECK */
 180
 181 #if     MACH_PAGE_HASH_STATS
 182 /* This routine is only for debug.  It is intended to be called by
 183  * hand by a developer using a kernel debugger.  This routine prints
 184  * out vm_page_hash table statistics to the kernel debug console.
 185  */
 186 void
 187 hash_debug(void)
 188 {
 189         int     i;
 190         int     numbuckets = 0;
 191         int     highsum = 0;
 192         int     maxdepth = 0;
 193
 194         for (i = 0; i < vm_page_bucket_count; i++) {
 195                 if (vm_page_buckets[i].hi_count) {
 196                         numbuckets++;
 197                         highsum += vm_page_buckets[i].hi_count;
 198                         if (vm_page_buckets[i].hi_count > maxdepth)
 199                                 maxdepth = vm_page_buckets[i].hi_count;
 200                 }
 201         }
 202         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 203         printf("Number used buckets:     %d = %d%%\n",
 204                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 205         printf("Number unused buckets:   %d = %d%%\n",
 206                 vm_page_bucket_count - numbuckets,
 207                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 208         printf("Sum of bucket max depth: %d\n", highsum);
 209         printf("Average bucket depth:    %d.%2d\n",
 210                 highsum/vm_page_bucket_count,
 211                 highsum%vm_page_bucket_count);
 212         printf("Maximum bucket depth:    %d\n", maxdepth);
 213 }
 214 #endif /* MACH_PAGE_HASH_STATS */
 215
 216 /*
 217  *      The virtual page size is currently implemented as a runtime
 218  *      variable, but is constant once initialized using vm_set_page_size.
 219  *      This initialization must be done in the machine-dependent
 220  *      bootstrap sequence, before calling other machine-independent
 221  *      initializations.
 222  *
 223  *      All references to the virtual page size outside this
 224  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 225  *      constants.
 226  */
 227 vm_size_t       page_size  = PAGE_SIZE;
 228 vm_size_t       page_mask  = PAGE_MASK;
 229 int             page_shift = PAGE_SHIFT;
 230
 231 /*
 232  *      Resident page structures are initialized from
 233  *      a template (see vm_page_alloc).
 234  *
 235  *      When adding a new field to the virtual memory
 236  *      object structure, be sure to add initialization
 237  *      (see vm_page_bootstrap).
 238  */
 239 struct vm_page  vm_page_template;
 240
 241 vm_page_t       vm_pages = VM_PAGE_NULL;
 242 unsigned int    vm_pages_count = 0;
 243 ppnum_t         vm_page_lowest = 0;
 244
 245 /*
 246  *      Resident pages that represent real memory
 247  *      are allocated from a set of free lists,
 248  *      one per color.
 249  */
 250 unsigned int    vm_colors;
 251 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 252 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 253 unsigned int    vm_free_magazine_refill_limit = 0;
 254 queue_head_t    vm_page_queue_free[MAX_COLORS];
 255 unsigned int    vm_page_free_wanted;
 256 unsigned int    vm_page_free_wanted_privileged;
 257 unsigned int    vm_page_free_count;
 258 unsigned int    vm_page_fictitious_count;
 259
 260 /*
 261  *      Occasionally, the virtual memory system uses
 262  *      resident page structures that do not refer to
 263  *      real pages, for example to leave a page with
 264  *      important state information in the VP table.
 265  *
 266  *      These page structures are allocated the way
 267  *      most other kernel structures are.
 268  */
 269 zone_t  vm_page_zone;
 270 vm_locks_array_t vm_page_locks;
 271 decl_lck_mtx_data(,vm_page_alloc_lock)
 272 lck_mtx_ext_t vm_page_alloc_lock_ext;
 273
 274 unsigned int io_throttle_zero_fill;
 275
 276 unsigned int    vm_page_local_q_count = 0;
 277 unsigned int    vm_page_local_q_soft_limit = 250;
 278 unsigned int    vm_page_local_q_hard_limit = 500;
 279 struct vplq     *vm_page_local_q = NULL;
 280
 281 /* N.B. Guard and fictitious pages must not
 282  * be assigned a zero phys_page value.
 283  */
 284 /*
 285  *      Fictitious pages don't have a physical address,
 286  *      but we must initialize phys_page to something.
 287  *      For debugging, this should be a strange value
 288  *      that the pmap module can recognize in assertions.
 289  */
 290 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 291
 292 /*
 293  *      Guard pages are not accessible so they don't
 294  *      need a physical address, but we need to enter
 295  *      one in the pmap.
 296  *      Let's make it recognizable and make sure that
 297  *      we don't use a real physical page with that
 298  *      physical address.
 299  */
 300 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 301
 302 /*
 303  *      Resident page structures are also chained on
 304  *      queues that are used by the page replacement
 305  *      system (pageout daemon).  These queues are
 306  *      defined here, but are shared by the pageout
 307  *      module.  The inactive queue is broken into
 308  *      file backed and anonymous for convenience as the
 309  *      pageout daemon often assignes a higher
 310  *      importance to anonymous pages (less likely to pick)
 311  */
 312 queue_head_t    vm_page_queue_active;
 313 queue_head_t    vm_page_queue_inactive;
 314 queue_head_t    vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
 315 queue_head_t    vm_page_queue_throttled;
 316
 317 unsigned int    vm_page_active_count;
 318 unsigned int    vm_page_inactive_count;
 319 unsigned int    vm_page_anonymous_count;
 320 unsigned int    vm_page_throttled_count;
 321 unsigned int    vm_page_speculative_count;
 322 unsigned int    vm_page_wire_count;
 323 unsigned int    vm_page_wire_count_initial;
 324 unsigned int    vm_page_gobble_count = 0;
 325
 326 #define VM_PAGE_WIRE_COUNT_WARNING      0
 327 #define VM_PAGE_GOBBLE_COUNT_WARNING    0
 328
 329 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 330 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 331 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 332
 333 unsigned int    vm_page_xpmapped_external_count = 0;
 334 unsigned int    vm_page_external_count = 0;
 335 unsigned int    vm_page_internal_count = 0;
 336 unsigned int    vm_page_pageable_external_count = 0;
 337 unsigned int    vm_page_pageable_internal_count = 0;
 338
 339 #if DEVELOPMENT || DEBUG
 340 unsigned int    vm_page_speculative_recreated = 0;
 341 unsigned int    vm_page_speculative_created = 0;
 342 unsigned int    vm_page_speculative_used = 0;
 343 #endif
 344
 345 queue_head_t    vm_page_queue_cleaned;
 346
 347 unsigned int    vm_page_cleaned_count = 0;
 348 unsigned int    vm_pageout_enqueued_cleaned = 0;
 349
 350 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 351 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 352
 353
 354 /*
 355  *      Several page replacement parameters are also
 356  *      shared with this module, so that page allocation
 357  *      (done here in vm_page_alloc) can trigger the
 358  *      pageout daemon.
 359  */
 360 unsigned int    vm_page_free_target = 0;
 361 unsigned int    vm_page_free_min = 0;
 362 unsigned int    vm_page_throttle_limit = 0;
 363 uint32_t        vm_page_creation_throttle = 0;
 364 unsigned int    vm_page_inactive_target = 0;
 365 unsigned int    vm_page_anonymous_min = 0;
 366 unsigned int    vm_page_inactive_min = 0;
 367 unsigned int    vm_page_free_reserved = 0;
 368 unsigned int    vm_page_throttle_count = 0;
 369
 370
 371 /*
 372  *      The VM system has a couple of heuristics for deciding
 373  *      that pages are "uninteresting" and should be placed
 374  *      on the inactive queue as likely candidates for replacement.
 375  *      These variables let the heuristics be controlled at run-time
 376  *      to make experimentation easier.
 377  */
 378
 379 boolean_t vm_page_deactivate_hint = TRUE;
 380
 381 struct vm_page_stats_reusable vm_page_stats_reusable;
 382
 383 /*
 384  *      vm_set_page_size:
 385  *
 386  *      Sets the page size, perhaps based upon the memory
 387  *      size.  Must be called before any use of page-size
 388  *      dependent functions.
 389  *
 390  *      Sets page_shift and page_mask from page_size.
 391  */
 392 void
 393 vm_set_page_size(void)
 394 {
 395         page_size  = PAGE_SIZE;
 396         page_mask  = PAGE_MASK;
 397         page_shift = PAGE_SHIFT;
 398
 399         if ((page_mask & page_size) != 0)
 400                 panic("vm_set_page_size: page size not a power of two");
 401
 402         for (page_shift = 0; ; page_shift++)
 403                 if ((1U << page_shift) == page_size)
 404                         break;
 405 }
 406
 407 #define COLOR_GROUPS_TO_STEAL   4
 408
 409
 410 /* Called once during statup, once the cache geometry is known.
 411  */
 412 static void
 413 vm_page_set_colors( void )
 414 {
 415         unsigned int    n, override;
 416
 417         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 418                 n = override;
 419         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 420                 n = vm_cache_geometry_colors;
 421         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 422
 423         if ( n == 0 )
 424                 n = 1;
 425         if ( n > MAX_COLORS )
 426                 n = MAX_COLORS;
 427
 428         /* the count must be a power of 2  */
 429         if ( ( n & (n - 1)) != 0  )
 430                 panic("vm_page_set_colors");
 431
 432         vm_colors = n;
 433         vm_color_mask = n - 1;
 434
 435         vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
 436 }
 437
 438
 439 lck_grp_t               vm_page_lck_grp_free;
 440 lck_grp_t               vm_page_lck_grp_queue;
 441 lck_grp_t               vm_page_lck_grp_local;
 442 lck_grp_t               vm_page_lck_grp_purge;
 443 lck_grp_t               vm_page_lck_grp_alloc;
 444 lck_grp_t               vm_page_lck_grp_bucket;
 445 lck_grp_attr_t          vm_page_lck_grp_attr;
 446 lck_attr_t              vm_page_lck_attr;
 447
 448
 449 __private_extern__ void
 450 vm_page_init_lck_grp(void)
 451 {
 452         /*
 453          * initialze the vm_page lock world
 454          */
 455         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 456         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 457         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 458         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 459         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 460         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 461         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 462         lck_attr_setdefault(&vm_page_lck_attr);
 463         lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 464
 465         vm_compressor_init_locks();
 466 }
 467
 468 void
 469 vm_page_init_local_q()
 470 {
 471         unsigned int            num_cpus;
 472         unsigned int            i;
 473         struct vplq             *t_local_q;
 474
 475         num_cpus = ml_get_max_cpus();
 476
 477         /*
 478          * no point in this for a uni-processor system
 479          */
 480         if (num_cpus >= 2) {
 481                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 482
 483                 for (i = 0; i < num_cpus; i++) {
 484                         struct vpl      *lq;
 485
 486                         lq = &t_local_q[i].vpl_un.vpl;
 487                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 488                         queue_init(&lq->vpl_queue);
 489                         lq->vpl_count = 0;
 490                         lq->vpl_internal_count = 0;
 491                         lq->vpl_external_count = 0;
 492                 }
 493                 vm_page_local_q_count = num_cpus;
 494
 495                 vm_page_local_q = (struct vplq *)t_local_q;
 496         }
 497 }
 498
 499
 500 /*
 501  *      vm_page_bootstrap:
 502  *
 503  *      Initializes the resident memory module.
 504  *
 505  *      Allocates memory for the page cells, and
 506  *      for the object/offset-to-page hash table headers.
 507  *      Each page cell is initialized and placed on the free list.
 508  *      Returns the range of available kernel virtual memory.
 509  */
 510
 511 void
 512 vm_page_bootstrap(
 513         vm_offset_t             *startp,
 514         vm_offset_t             *endp)
 515 {
 516         register vm_page_t      m;
 517         unsigned int            i;
 518         unsigned int            log1;
 519         unsigned int            log2;
 520         unsigned int            size;
 521
 522         /*
 523          *      Initialize the vm_page template.
 524          */
 525
 526         m = &vm_page_template;
 527         bzero(m, sizeof (*m));
 528
 529         m->pageq.next = NULL;
 530         m->pageq.prev = NULL;
 531         m->listq.next = NULL;
 532         m->listq.prev = NULL;
 533         m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
 534
 535         m->object = VM_OBJECT_NULL;             /* reset later */
 536         m->offset = (vm_object_offset_t) -1;    /* reset later */
 537
 538         m->wire_count = 0;
 539         m->local = FALSE;
 540         m->inactive = FALSE;
 541         m->active = FALSE;
 542         m->pageout_queue = FALSE;
 543         m->speculative = FALSE;
 544         m->laundry = FALSE;
 545         m->free = FALSE;
 546         m->reference = FALSE;
 547         m->gobbled = FALSE;
 548         m->private = FALSE;
 549         m->throttled = FALSE;
 550         m->__unused_pageq_bits = 0;
 551
 552         m->phys_page = 0;               /* reset later */
 553
 554         m->busy = TRUE;
 555         m->wanted = FALSE;
 556         m->tabled = FALSE;
 557         m->hashed = FALSE;
 558         m->fictitious = FALSE;
 559         m->pmapped = FALSE;
 560         m->wpmapped = FALSE;
 561         m->pageout = FALSE;
 562         m->absent = FALSE;
 563         m->error = FALSE;
 564         m->dirty = FALSE;
 565         m->cleaning = FALSE;
 566         m->precious = FALSE;
 567         m->clustered = FALSE;
 568         m->overwriting = FALSE;
 569         m->restart = FALSE;
 570         m->unusual = FALSE;
 571         m->encrypted = FALSE;
 572         m->encrypted_cleaning = FALSE;
 573         m->cs_validated = FALSE;
 574         m->cs_tainted = FALSE;
 575         m->no_cache = FALSE;
 576         m->reusable = FALSE;
 577         m->slid = FALSE;
 578         m->xpmapped = FALSE;
 579         m->compressor = FALSE;
 580         m->written_by_kernel = FALSE;
 581         m->__unused_object_bits = 0;
 582
 583         /*
 584          *      Initialize the page queues.
 585          */
 586         vm_page_init_lck_grp();
 587
 588         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 589         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 590         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 591
 592         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 593                 int group;
 594
 595                 purgeable_queues[i].token_q_head = 0;
 596                 purgeable_queues[i].token_q_tail = 0;
 597                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 598                         queue_init(&purgeable_queues[i].objq[group]);
 599
 600                 purgeable_queues[i].type = i;
 601                 purgeable_queues[i].new_pages = 0;
 602 #if MACH_ASSERT
 603                 purgeable_queues[i].debug_count_tokens = 0;
 604                 purgeable_queues[i].debug_count_objects = 0;
 605 #endif
 606         };
 607         purgeable_nonvolatile_count = 0;
 608         queue_init(&purgeable_nonvolatile_queue);
 609
 610         for (i = 0; i < MAX_COLORS; i++ )
 611                 queue_init(&vm_page_queue_free[i]);
 612
 613         queue_init(&vm_lopage_queue_free);
 614         queue_init(&vm_page_queue_active);
 615         queue_init(&vm_page_queue_inactive);
 616         queue_init(&vm_page_queue_cleaned);
 617         queue_init(&vm_page_queue_throttled);
 618         queue_init(&vm_page_queue_anonymous);
 619
 620         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 621                 queue_init(&vm_page_queue_speculative[i].age_q);
 622
 623                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 624                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 625         }
 626         vm_page_free_wanted = 0;
 627         vm_page_free_wanted_privileged = 0;
 628
 629         vm_page_set_colors();
 630
 631
 632         /*
 633          *      Steal memory for the map and zone subsystems.
 634          */
 635         kernel_debug_string("zone_steal_memory");
 636         zone_steal_memory();
 637         kernel_debug_string("vm_map_steal_memory");
 638         vm_map_steal_memory();
 639
 640         /*
 641          *      Allocate (and initialize) the virtual-to-physical
 642          *      table hash buckets.
 643          *
 644          *      The number of buckets should be a power of two to
 645          *      get a good hash function.  The following computation
 646          *      chooses the first power of two that is greater
 647          *      than the number of physical pages in the system.
 648          */
 649
 650         if (vm_page_bucket_count == 0) {
 651                 unsigned int npages = pmap_free_pages();
 652
 653                 vm_page_bucket_count = 1;
 654                 while (vm_page_bucket_count < npages)
 655                         vm_page_bucket_count <<= 1;
 656         }
 657         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 658
 659         vm_page_hash_mask = vm_page_bucket_count - 1;
 660
 661         /*
 662          *      Calculate object shift value for hashing algorithm:
 663          *              O = log2(sizeof(struct vm_object))
 664          *              B = log2(vm_page_bucket_count)
 665          *              hash shifts the object left by
 666          *              B/2 - O
 667          */
 668         size = vm_page_bucket_count;
 669         for (log1 = 0; size > 1; log1++)
 670                 size /= 2;
 671         size = sizeof(struct vm_object);
 672         for (log2 = 0; size > 1; log2++)
 673                 size /= 2;
 674         vm_page_hash_shift = log1/2 - log2 + 1;
 675
 676         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 677         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 678         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 679
 680         if (vm_page_hash_mask & vm_page_bucket_count)
 681                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 682
 683 #if VM_PAGE_BUCKETS_CHECK
 684 #if VM_PAGE_FAKE_BUCKETS
 685         /*
 686          * Allocate a decoy set of page buckets, to detect
 687          * any stomping there.
 688          */
 689         vm_page_fake_buckets = (vm_page_bucket_t *)
 690                 pmap_steal_memory(vm_page_bucket_count *
 691                                   sizeof(vm_page_bucket_t));
 692         vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
 693         vm_page_fake_buckets_end =
 694                 vm_map_round_page((vm_page_fake_buckets_start +
 695                                    (vm_page_bucket_count *
 696                                     sizeof (vm_page_bucket_t))),
 697                                   PAGE_MASK);
 698         char *cp;
 699         for (cp = (char *)vm_page_fake_buckets_start;
 700              cp < (char *)vm_page_fake_buckets_end;
 701              cp++) {
 702                 *cp = 0x5a;
 703         }
 704 #endif /* VM_PAGE_FAKE_BUCKETS */
 705 #endif /* VM_PAGE_BUCKETS_CHECK */
 706
 707         kernel_debug_string("vm_page_buckets");
 708         vm_page_buckets = (vm_page_bucket_t *)
 709                 pmap_steal_memory(vm_page_bucket_count *
 710                                   sizeof(vm_page_bucket_t));
 711
 712         kernel_debug_string("vm_page_bucket_locks");
 713         vm_page_bucket_locks = (lck_spin_t *)
 714                 pmap_steal_memory(vm_page_bucket_lock_count *
 715                                   sizeof(lck_spin_t));
 716
 717         for (i = 0; i < vm_page_bucket_count; i++) {
 718                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 719
 720                 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
 721 #if     MACH_PAGE_HASH_STATS
 722                 bucket->cur_count = 0;
 723                 bucket->hi_count = 0;
 724 #endif /* MACH_PAGE_HASH_STATS */
 725         }
 726
 727         for (i = 0; i < vm_page_bucket_lock_count; i++)
 728                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 729
 730 #if VM_PAGE_BUCKETS_CHECK
 731         vm_page_buckets_check_ready = TRUE;
 732 #endif /* VM_PAGE_BUCKETS_CHECK */
 733
 734         /*
 735          *      Machine-dependent code allocates the resident page table.
 736          *      It uses vm_page_init to initialize the page frames.
 737          *      The code also returns to us the virtual space available
 738          *      to the kernel.  We don't trust the pmap module
 739          *      to get the alignment right.
 740          */
 741
 742         kernel_debug_string("pmap_startup");
 743         pmap_startup(&virtual_space_start, &virtual_space_end);
 744         virtual_space_start = round_page(virtual_space_start);
 745         virtual_space_end = trunc_page(virtual_space_end);
 746
 747         *startp = virtual_space_start;
 748         *endp = virtual_space_end;
 749
 750         /*
 751          *      Compute the initial "wire" count.
 752          *      Up until now, the pages which have been set aside are not under
 753          *      the VM system's control, so although they aren't explicitly
 754          *      wired, they nonetheless can't be moved. At this moment,
 755          *      all VM managed pages are "free", courtesy of pmap_startup.
 756          */
 757         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 758         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 759         vm_page_wire_count_initial = vm_page_wire_count;
 760
 761         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 762                vm_page_free_count, vm_page_wire_count);
 763
 764         kernel_debug_string("vm_page_bootstrap complete");
 765         simple_lock_init(&vm_paging_lock, 0);
 766 }
 767
 768 #ifndef MACHINE_PAGES
 769 /*
 770  *      We implement pmap_steal_memory and pmap_startup with the help
 771  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 772  */
 773
 774 void *
 775 pmap_steal_memory(
 776         vm_size_t size)
 777 {
 778         vm_offset_t addr, vaddr;
 779         ppnum_t phys_page;
 780
 781         /*
 782          *      We round the size to a round multiple.
 783          */
 784
 785         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 786
 787         /*
 788          *      If this is the first call to pmap_steal_memory,
 789          *      we have to initialize ourself.
 790          */
 791
 792         if (virtual_space_start == virtual_space_end) {
 793                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 794
 795                 /*
 796                  *      The initial values must be aligned properly, and
 797                  *      we don't trust the pmap module to do it right.
 798                  */
 799
 800                 virtual_space_start = round_page(virtual_space_start);
 801                 virtual_space_end = trunc_page(virtual_space_end);
 802         }
 803
 804         /*
 805          *      Allocate virtual memory for this request.
 806          */
 807
 808         addr = virtual_space_start;
 809         virtual_space_start += size;
 810
 811         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
 812
 813         /*
 814          *      Allocate and map physical pages to back new virtual pages.
 815          */
 816
 817         for (vaddr = round_page(addr);
 818              vaddr < addr + size;
 819              vaddr += PAGE_SIZE) {
 820
 821                 if (!pmap_next_page_hi(&phys_page))
 822                         panic("pmap_steal_memory");
 823
 824                 /*
 825                  *      XXX Logically, these mappings should be wired,
 826                  *      but some pmap modules barf if they are.
 827                  */
 828 #if defined(__LP64__)
 829                 pmap_pre_expand(kernel_pmap, vaddr);
 830 #endif
 831
 832                 pmap_enter(kernel_pmap, vaddr, phys_page,
 833                            VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
 834                                 VM_WIMG_USE_DEFAULT, FALSE);
 835                 /*
 836                  * Account for newly stolen memory
 837                  */
 838                 vm_page_wire_count++;
 839
 840         }
 841
 842         return (void *) addr;
 843 }
 844
 845 void vm_page_release_startup(vm_page_t mem);
 846 void
 847 pmap_startup(
 848         vm_offset_t *startp,
 849         vm_offset_t *endp)
 850 {
 851         unsigned int i, npages, pages_initialized, fill, fillval;
 852         ppnum_t         phys_page;
 853         addr64_t        tmpaddr;
 854
 855
 856 #if    defined(__LP64__)
 857         /*
 858          * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
 859          */
 860         assert(sizeof(struct vm_page) == 64);
 861
 862         /*
 863          * make sure we are aligned on a 64 byte boundary
 864          * for VM_PAGE_PACK_PTR (it clips off the low-order
 865          * 6 bits of the pointer)
 866          */
 867         if (virtual_space_start != virtual_space_end)
 868                 virtual_space_start = round_page(virtual_space_start);
 869 #endif
 870
 871         /*
 872          *      We calculate how many page frames we will have
 873          *      and then allocate the page structures in one chunk.
 874          */
 875
 876         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 877         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 878         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 879
 880         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 881
 882         /*
 883          *      Initialize the page frames.
 884          */
 885         kernel_debug_string("Initialize the page frames");
 886         for (i = 0, pages_initialized = 0; i < npages; i++) {
 887                 if (!pmap_next_page(&phys_page))
 888                         break;
 889                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
 890                         vm_page_lowest = phys_page;
 891
 892                 vm_page_init(&vm_pages[i], phys_page, FALSE);
 893                 vm_page_pages++;
 894                 pages_initialized++;
 895         }
 896         vm_pages_count = pages_initialized;
 897
 898 #if    defined(__LP64__)
 899
 900         if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
 901                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
 902
 903         if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
 904                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
 905 #endif
 906         kernel_debug_string("page fill/release");
 907         /*
 908          * Check if we want to initialize pages to a known value
 909          */
 910         fill = 0;                                                               /* Assume no fill */
 911         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 912 #if     DEBUG
 913         /* This slows down booting the DEBUG kernel, particularly on
 914          * large memory systems, but is worthwhile in deterministically
 915          * trapping uninitialized memory usage.
 916          */
 917         if (fill == 0) {
 918                 fill = 1;
 919                 fillval = 0xDEB8F177;
 920         }
 921 #endif
 922         if (fill)
 923                 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
 924         // -debug code remove
 925         if (2 == vm_himemory_mode) {
 926                 // free low -> high so high is preferred
 927                 for (i = 1; i <= pages_initialized; i++) {
 928                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 929                         vm_page_release_startup(&vm_pages[i - 1]);
 930                 }
 931         }
 932         else
 933         // debug code remove-
 934
 935         /*
 936          * Release pages in reverse order so that physical pages
 937          * initially get allocated in ascending addresses. This keeps
 938          * the devices (which must address physical memory) happy if
 939          * they require several consecutive pages.
 940          */
 941         for (i = pages_initialized; i > 0; i--) {
 942                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 943                 vm_page_release_startup(&vm_pages[i - 1]);
 944         }
 945
 946         VM_CHECK_MEMORYSTATUS;
 947
 948 #if 0
 949         {
 950                 vm_page_t xx, xxo, xxl;
 951                 int i, j, k, l;
 952
 953                 j = 0;                                                                                                  /* (BRINGUP) */
 954                 xxl = 0;
 955
 956                 for( i = 0; i < vm_colors; i++ ) {
 957                         queue_iterate(&vm_page_queue_free[i],
 958                                       xx,
 959                                       vm_page_t,
 960                                       pageq) {  /* BRINGUP */
 961                                 j++;                                                                                            /* (BRINGUP) */
 962                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 963                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 964                                 }
 965
 966                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 967                                 k = 0;                                                                                          /* (BRINGUP) */
 968
 969                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 970
 971                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 972                                         k++;
 973                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 974                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 975                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 976                                         }
 977                                 }
 978
 979                                 xxl = xx;
 980                         }
 981                 }
 982
 983                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 984                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 985                 }
 986         }
 987 #endif
 988
 989
 990         /*
 991          *      We have to re-align virtual_space_start,
 992          *      because pmap_steal_memory has been using it.
 993          */
 994
 995         virtual_space_start = round_page(virtual_space_start);
 996
 997         *startp = virtual_space_start;
 998         *endp = virtual_space_end;
 999 }
1000 #endif  /* MACHINE_PAGES */
1001
1002 /*
1003  *      Routine:        vm_page_module_init
1004  *      Purpose:
1005  *              Second initialization pass, to be done after
1006  *              the basic VM system is ready.
1007  */
1008 void
1009 vm_page_module_init(void)
1010 {
1011         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1012                              0, PAGE_SIZE, "vm pages");
1013
1014 #if     ZONE_DEBUG
1015         zone_debug_disable(vm_page_zone);
1016 #endif  /* ZONE_DEBUG */
1017
1018         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1019         zone_change(vm_page_zone, Z_EXPAND, FALSE);
1020         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1021         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1022         zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1023         /*
1024          * Adjust zone statistics to account for the real pages allocated
1025          * in vm_page_create(). [Q: is this really what we want?]
1026          */
1027         vm_page_zone->count += vm_page_pages;
1028         vm_page_zone->sum_count += vm_page_pages;
1029         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1030 }
1031
1032 /*
1033  *      Routine:        vm_page_create
1034  *      Purpose:
1035  *              After the VM system is up, machine-dependent code
1036  *              may stumble across more physical memory.  For example,
1037  *              memory that it was reserving for a frame buffer.
1038  *              vm_page_create turns this memory into available pages.
1039  */
1040
1041 void
1042 vm_page_create(
1043         ppnum_t start,
1044         ppnum_t end)
1045 {
1046         ppnum_t         phys_page;
1047         vm_page_t       m;
1048
1049         for (phys_page = start;
1050              phys_page < end;
1051              phys_page++) {
1052                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1053                         == VM_PAGE_NULL)
1054                         vm_page_more_fictitious();
1055
1056                 m->fictitious = FALSE;
1057                 pmap_clear_noencrypt(phys_page);
1058
1059                 vm_page_pages++;
1060                 vm_page_release(m);
1061         }
1062 }
1063
1064 /*
1065  *      vm_page_hash:
1066  *
1067  *      Distributes the object/offset key pair among hash buckets.
1068  *
1069  *      NOTE:   The bucket count must be a power of 2
1070  */
1071 #define vm_page_hash(object, offset) (\
1072         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1073          & vm_page_hash_mask)
1074
1075
1076 /*
1077  *      vm_page_insert:         [ internal use only ]
1078  *
1079  *      Inserts the given mem entry into the object/object-page
1080  *      table and object list.
1081  *
1082  *      The object must be locked.
1083  */
1084 void
1085 vm_page_insert(
1086         vm_page_t               mem,
1087         vm_object_t             object,
1088         vm_object_offset_t      offset)
1089 {
1090         vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1091 }
1092
1093 void
1094 vm_page_insert_internal(
1095         vm_page_t               mem,
1096         vm_object_t             object,
1097         vm_object_offset_t      offset,
1098         boolean_t               queues_lock_held,
1099         boolean_t               insert_in_hash,
1100         boolean_t               batch_pmap_op)
1101 {
1102         vm_page_bucket_t        *bucket;
1103         lck_spin_t              *bucket_lock;
1104         int                     hash_id;
1105         task_t                  owner;
1106
1107         XPR(XPR_VM_PAGE,
1108                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1109                 object, offset, mem, 0,0);
1110 #if 0
1111         /*
1112          * we may not hold the page queue lock
1113          * so this check isn't safe to make
1114          */
1115         VM_PAGE_CHECK(mem);
1116 #endif
1117
1118         assert(page_aligned(offset));
1119
1120         /* the vm_submap_object is only a placeholder for submaps */
1121         assert(object != vm_submap_object);
1122
1123         vm_object_lock_assert_exclusive(object);
1124 #if DEBUG
1125         lck_mtx_assert(&vm_page_queue_lock,
1126                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1127                                         : LCK_MTX_ASSERT_NOTOWNED);
1128 #endif  /* DEBUG */
1129
1130         if (insert_in_hash == TRUE) {
1131 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1132                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1133                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1134                               "already in (obj=%p,off=0x%llx)",
1135                               mem, object, offset, mem->object, mem->offset);
1136 #endif
1137                 assert(!object->internal || offset < object->vo_size);
1138
1139                 /* only insert "pageout" pages into "pageout" objects,
1140                  * and normal pages into normal objects */
1141                 assert(object->pageout == mem->pageout);
1142
1143                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1144
1145                 /*
1146                  *      Record the object/offset pair in this page
1147                  */
1148
1149                 mem->object = object;
1150                 mem->offset = offset;
1151
1152                 /*
1153                  *      Insert it into the object_object/offset hash table
1154                  */
1155                 hash_id = vm_page_hash(object, offset);
1156                 bucket = &vm_page_buckets[hash_id];
1157                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1158
1159                 lck_spin_lock(bucket_lock);
1160
1161                 mem->next_m = bucket->page_list;
1162                 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1163                 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1164
1165 #if     MACH_PAGE_HASH_STATS
1166                 if (++bucket->cur_count > bucket->hi_count)
1167                         bucket->hi_count = bucket->cur_count;
1168 #endif /* MACH_PAGE_HASH_STATS */
1169                 mem->hashed = TRUE;
1170                 lck_spin_unlock(bucket_lock);
1171         }
1172
1173         {
1174                 unsigned int    cache_attr;
1175
1176                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1177
1178                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1179                         PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1180                 }
1181         }
1182         /*
1183          *      Now link into the object's list of backed pages.
1184          */
1185         VM_PAGE_INSERT(mem, object);
1186         mem->tabled = TRUE;
1187
1188         /*
1189          *      Show that the object has one more resident page.
1190          */
1191
1192         object->resident_page_count++;
1193         if (VM_PAGE_WIRED(mem)) {
1194                 object->wired_page_count++;
1195         }
1196         assert(object->resident_page_count >= object->wired_page_count);
1197
1198         if (object->internal) {
1199                 OSAddAtomic(1, &vm_page_internal_count);
1200         } else {
1201                 OSAddAtomic(1, &vm_page_external_count);
1202         }
1203
1204         /*
1205          * It wouldn't make sense to insert a "reusable" page in
1206          * an object (the page would have been marked "reusable" only
1207          * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1208          * in the object at that time).
1209          * But a page could be inserted in a "all_reusable" object, if
1210          * something faults it in (a vm_read() from another task or a
1211          * "use-after-free" issue in user space, for example).  It can
1212          * also happen if we're relocating a page from that object to
1213          * a different physical page during a physically-contiguous
1214          * allocation.
1215          */
1216         assert(!mem->reusable);
1217         if (mem->object->all_reusable) {
1218                 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1219         }
1220
1221         if (object->purgable == VM_PURGABLE_DENY) {
1222                 owner = TASK_NULL;
1223         } else {
1224                 owner = object->vo_purgeable_owner;
1225         }
1226         if (owner &&
1227             (object->purgable == VM_PURGABLE_NONVOLATILE ||
1228              VM_PAGE_WIRED(mem))) {
1229                 /* more non-volatile bytes */
1230                 ledger_credit(owner->ledger,
1231                               task_ledgers.purgeable_nonvolatile,
1232                               PAGE_SIZE);
1233                 /* more footprint */
1234                 ledger_credit(owner->ledger,
1235                               task_ledgers.phys_footprint,
1236                               PAGE_SIZE);
1237
1238         } else if (owner &&
1239                    (object->purgable == VM_PURGABLE_VOLATILE ||
1240                     object->purgable == VM_PURGABLE_EMPTY)) {
1241                 assert(! VM_PAGE_WIRED(mem));
1242                 /* more volatile bytes */
1243                 ledger_credit(owner->ledger,
1244                               task_ledgers.purgeable_volatile,
1245                               PAGE_SIZE);
1246         }
1247
1248         if (object->purgable == VM_PURGABLE_VOLATILE) {
1249                 if (VM_PAGE_WIRED(mem)) {
1250                         OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1251                 } else {
1252                         OSAddAtomic(+1, &vm_page_purgeable_count);
1253                 }
1254         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1255                    mem->throttled) {
1256                 /*
1257                  * This page belongs to a purged VM object but hasn't
1258                  * been purged (because it was "busy").
1259                  * It's in the "throttled" queue and hence not
1260                  * visible to vm_pageout_scan().  Move it to a pageable
1261                  * queue, so that it can eventually be reclaimed, instead
1262                  * of lingering in the "empty" object.
1263                  */
1264                 if (queues_lock_held == FALSE)
1265                         vm_page_lockspin_queues();
1266                 vm_page_deactivate(mem);
1267                 if (queues_lock_held == FALSE)
1268                         vm_page_unlock_queues();
1269         }
1270
1271 #if VM_OBJECT_TRACKING_OP_MODIFIED
1272         if (vm_object_tracking_inited &&
1273             object->internal &&
1274             object->resident_page_count == 0 &&
1275             object->pager == NULL &&
1276             object->shadow != NULL &&
1277             object->shadow->copy == object) {
1278                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1279                 int numsaved = 0;
1280
1281                 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1282                 btlog_add_entry(vm_object_tracking_btlog,
1283                                 object,
1284                                 VM_OBJECT_TRACKING_OP_MODIFIED,
1285                                 bt,
1286                                 numsaved);
1287         }
1288 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1289 }
1290
1291 /*
1292  *      vm_page_replace:
1293  *
1294  *      Exactly like vm_page_insert, except that we first
1295  *      remove any existing page at the given offset in object.
1296  *
1297  *      The object must be locked.
1298  */
1299 void
1300 vm_page_replace(
1301         register vm_page_t              mem,
1302         register vm_object_t            object,
1303         register vm_object_offset_t     offset)
1304 {
1305         vm_page_bucket_t *bucket;
1306         vm_page_t        found_m = VM_PAGE_NULL;
1307         lck_spin_t      *bucket_lock;
1308         int             hash_id;
1309
1310 #if 0
1311         /*
1312          * we don't hold the page queue lock
1313          * so this check isn't safe to make
1314          */
1315         VM_PAGE_CHECK(mem);
1316 #endif
1317         vm_object_lock_assert_exclusive(object);
1318 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1319         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1320                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1321                       "already in (obj=%p,off=0x%llx)",
1322                       mem, object, offset, mem->object, mem->offset);
1323         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1324 #endif
1325         /*
1326          *      Record the object/offset pair in this page
1327          */
1328
1329         mem->object = object;
1330         mem->offset = offset;
1331
1332         /*
1333          *      Insert it into the object_object/offset hash table,
1334          *      replacing any page that might have been there.
1335          */
1336
1337         hash_id = vm_page_hash(object, offset);
1338         bucket = &vm_page_buckets[hash_id];
1339         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1340
1341         lck_spin_lock(bucket_lock);
1342
1343         if (bucket->page_list) {
1344                 vm_page_packed_t *mp = &bucket->page_list;
1345                 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1346
1347                 do {
1348                         if (m->object == object && m->offset == offset) {
1349                                 /*
1350                                  * Remove old page from hash list
1351                                  */
1352                                 *mp = m->next_m;
1353                                 m->hashed = FALSE;
1354
1355                                 found_m = m;
1356                                 break;
1357                         }
1358                         mp = &m->next_m;
1359                 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1360
1361                 mem->next_m = bucket->page_list;
1362         } else {
1363                 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1364         }
1365         /*
1366          * insert new page at head of hash list
1367          */
1368         bucket->page_list = VM_PAGE_PACK_PTR(mem);
1369         mem->hashed = TRUE;
1370
1371         lck_spin_unlock(bucket_lock);
1372
1373         if (found_m) {
1374                 /*
1375                  * there was already a page at the specified
1376                  * offset for this object... remove it from
1377                  * the object and free it back to the free list
1378                  */
1379                 vm_page_free_unlocked(found_m, FALSE);
1380         }
1381         vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1382 }
1383
1384 /*
1385  *      vm_page_remove:         [ internal use only ]
1386  *
1387  *      Removes the given mem entry from the object/offset-page
1388  *      table and the object page list.
1389  *
1390  *      The object must be locked.
1391  */
1392
1393 void
1394 vm_page_remove(
1395         vm_page_t       mem,
1396         boolean_t       remove_from_hash)
1397 {
1398         vm_page_bucket_t *bucket;
1399         vm_page_t       this;
1400         lck_spin_t      *bucket_lock;
1401         int             hash_id;
1402         task_t          owner;
1403
1404         XPR(XPR_VM_PAGE,
1405                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1406                 mem->object, mem->offset,
1407                 mem, 0,0);
1408
1409         vm_object_lock_assert_exclusive(mem->object);
1410         assert(mem->tabled);
1411         assert(!mem->cleaning);
1412         assert(!mem->laundry);
1413 #if 0
1414         /*
1415          * we don't hold the page queue lock
1416          * so this check isn't safe to make
1417          */
1418         VM_PAGE_CHECK(mem);
1419 #endif
1420         if (remove_from_hash == TRUE) {
1421                 /*
1422                  *      Remove from the object_object/offset hash table
1423                  */
1424                 hash_id = vm_page_hash(mem->object, mem->offset);
1425                 bucket = &vm_page_buckets[hash_id];
1426                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1427
1428                 lck_spin_lock(bucket_lock);
1429
1430                 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1431                         /* optimize for common case */
1432
1433                         bucket->page_list = mem->next_m;
1434                 } else {
1435                         vm_page_packed_t        *prev;
1436
1437                         for (prev = &this->next_m;
1438                              (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1439                              prev = &this->next_m)
1440                                 continue;
1441                         *prev = this->next_m;
1442                 }
1443 #if     MACH_PAGE_HASH_STATS
1444                 bucket->cur_count--;
1445 #endif /* MACH_PAGE_HASH_STATS */
1446                 mem->hashed = FALSE;
1447                 lck_spin_unlock(bucket_lock);
1448         }
1449         /*
1450          *      Now remove from the object's list of backed pages.
1451          */
1452
1453         VM_PAGE_REMOVE(mem);
1454
1455         /*
1456          *      And show that the object has one fewer resident
1457          *      page.
1458          */
1459
1460         assert(mem->object->resident_page_count > 0);
1461         mem->object->resident_page_count--;
1462
1463         if (mem->object->internal) {
1464 #if DEBUG
1465                 assert(vm_page_internal_count);
1466 #endif /* DEBUG */
1467
1468                 OSAddAtomic(-1, &vm_page_internal_count);
1469         } else {
1470                 assert(vm_page_external_count);
1471                 OSAddAtomic(-1, &vm_page_external_count);
1472
1473                 if (mem->xpmapped) {
1474                         assert(vm_page_xpmapped_external_count);
1475                         OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1476                 }
1477         }
1478         if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1479                 if (mem->object->resident_page_count == 0)
1480                         vm_object_cache_remove(mem->object);
1481         }
1482
1483         if (VM_PAGE_WIRED(mem)) {
1484                 assert(mem->object->wired_page_count > 0);
1485                 mem->object->wired_page_count--;
1486         }
1487         assert(mem->object->resident_page_count >=
1488                mem->object->wired_page_count);
1489         if (mem->reusable) {
1490                 assert(mem->object->reusable_page_count > 0);
1491                 mem->object->reusable_page_count--;
1492                 assert(mem->object->reusable_page_count <=
1493                        mem->object->resident_page_count);
1494                 mem->reusable = FALSE;
1495                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1496                 vm_page_stats_reusable.reused_remove++;
1497         } else if (mem->object->all_reusable) {
1498                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1499                 vm_page_stats_reusable.reused_remove++;
1500         }
1501
1502         if (mem->object->purgable == VM_PURGABLE_DENY) {
1503                 owner = TASK_NULL;
1504         } else {
1505                 owner = mem->object->vo_purgeable_owner;
1506         }
1507         if (owner &&
1508             (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1509              VM_PAGE_WIRED(mem))) {
1510                 /* less non-volatile bytes */
1511                 ledger_debit(owner->ledger,
1512                              task_ledgers.purgeable_nonvolatile,
1513                              PAGE_SIZE);
1514                 /* less footprint */
1515                 ledger_debit(owner->ledger,
1516                              task_ledgers.phys_footprint,
1517                              PAGE_SIZE);
1518         } else if (owner &&
1519                    (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1520                     mem->object->purgable == VM_PURGABLE_EMPTY)) {
1521                 assert(! VM_PAGE_WIRED(mem));
1522                 /* less volatile bytes */
1523                 ledger_debit(owner->ledger,
1524                              task_ledgers.purgeable_volatile,
1525                              PAGE_SIZE);
1526         }
1527         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1528                 if (VM_PAGE_WIRED(mem)) {
1529                         assert(vm_page_purgeable_wired_count > 0);
1530                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1531                 } else {
1532                         assert(vm_page_purgeable_count > 0);
1533                         OSAddAtomic(-1, &vm_page_purgeable_count);
1534                 }
1535         }
1536         if (mem->object->set_cache_attr == TRUE)
1537                 pmap_set_cache_attributes(mem->phys_page, 0);
1538
1539         mem->tabled = FALSE;
1540         mem->object = VM_OBJECT_NULL;
1541         mem->offset = (vm_object_offset_t) -1;
1542 }
1543
1544
1545 /*
1546  *      vm_page_lookup:
1547  *
1548  *      Returns the page associated with the object/offset
1549  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1550  *
1551  *      The object must be locked.  No side effects.
1552  */
1553
1554 unsigned long vm_page_lookup_hint = 0;
1555 unsigned long vm_page_lookup_hint_next = 0;
1556 unsigned long vm_page_lookup_hint_prev = 0;
1557 unsigned long vm_page_lookup_hint_miss = 0;
1558 unsigned long vm_page_lookup_bucket_NULL = 0;
1559 unsigned long vm_page_lookup_miss = 0;
1560
1561
1562 vm_page_t
1563 vm_page_lookup(
1564         vm_object_t             object,
1565         vm_object_offset_t      offset)
1566 {
1567         vm_page_t       mem;
1568         vm_page_bucket_t *bucket;
1569         queue_entry_t   qe;
1570         lck_spin_t      *bucket_lock;
1571         int             hash_id;
1572
1573         vm_object_lock_assert_held(object);
1574         mem = object->memq_hint;
1575
1576         if (mem != VM_PAGE_NULL) {
1577                 assert(mem->object == object);
1578
1579                 if (mem->offset == offset) {
1580                         vm_page_lookup_hint++;
1581                         return mem;
1582                 }
1583                 qe = queue_next(&mem->listq);
1584
1585                 if (! queue_end(&object->memq, qe)) {
1586                         vm_page_t       next_page;
1587
1588                         next_page = (vm_page_t) qe;
1589                         assert(next_page->object == object);
1590
1591                         if (next_page->offset == offset) {
1592                                 vm_page_lookup_hint_next++;
1593                                 object->memq_hint = next_page; /* new hint */
1594                                 return next_page;
1595                         }
1596                 }
1597                 qe = queue_prev(&mem->listq);
1598
1599                 if (! queue_end(&object->memq, qe)) {
1600                         vm_page_t prev_page;
1601
1602                         prev_page = (vm_page_t) qe;
1603                         assert(prev_page->object == object);
1604
1605                         if (prev_page->offset == offset) {
1606                                 vm_page_lookup_hint_prev++;
1607                                 object->memq_hint = prev_page; /* new hint */
1608                                 return prev_page;
1609                         }
1610                 }
1611         }
1612         /*
1613          * Search the hash table for this object/offset pair
1614          */
1615         hash_id = vm_page_hash(object, offset);
1616         bucket = &vm_page_buckets[hash_id];
1617
1618         /*
1619          * since we hold the object lock, we are guaranteed that no
1620          * new pages can be inserted into this object... this in turn
1621          * guarantess that the page we're looking for can't exist
1622          * if the bucket it hashes to is currently NULL even when looked
1623          * at outside the scope of the hash bucket lock... this is a
1624          * really cheap optimiztion to avoid taking the lock
1625          */
1626         if (!bucket->page_list) {
1627                 vm_page_lookup_bucket_NULL++;
1628
1629                 return (VM_PAGE_NULL);
1630         }
1631         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1632
1633         lck_spin_lock(bucket_lock);
1634
1635         for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1636 #if 0
1637                 /*
1638                  * we don't hold the page queue lock
1639                  * so this check isn't safe to make
1640                  */
1641                 VM_PAGE_CHECK(mem);
1642 #endif
1643                 if ((mem->object == object) && (mem->offset == offset))
1644                         break;
1645         }
1646         lck_spin_unlock(bucket_lock);
1647
1648         if (mem != VM_PAGE_NULL) {
1649                 if (object->memq_hint != VM_PAGE_NULL) {
1650                         vm_page_lookup_hint_miss++;
1651                 }
1652                 assert(mem->object == object);
1653                 object->memq_hint = mem;
1654         } else
1655                 vm_page_lookup_miss++;
1656
1657         return(mem);
1658 }
1659
1660
1661 /*
1662  *      vm_page_rename:
1663  *
1664  *      Move the given memory entry from its
1665  *      current object to the specified target object/offset.
1666  *
1667  *      The object must be locked.
1668  */
1669 void
1670 vm_page_rename(
1671         register vm_page_t              mem,
1672         register vm_object_t            new_object,
1673         vm_object_offset_t              new_offset,
1674         boolean_t                       encrypted_ok)
1675 {
1676         boolean_t       internal_to_external, external_to_internal;
1677
1678         assert(mem->object != new_object);
1679
1680         /*
1681          * ENCRYPTED SWAP:
1682          * The encryption key is based on the page's memory object
1683          * (aka "pager") and paging offset.  Moving the page to
1684          * another VM object changes its "pager" and "paging_offset"
1685          * so it has to be decrypted first, or we would lose the key.
1686          *
1687          * One exception is VM object collapsing, where we transfer pages
1688          * from one backing object to its parent object.  This operation also
1689          * transfers the paging information, so the <pager,paging_offset> info
1690          * should remain consistent.  The caller (vm_object_do_collapse())
1691          * sets "encrypted_ok" in this case.
1692          */
1693         if (!encrypted_ok && mem->encrypted) {
1694                 panic("vm_page_rename: page %p is encrypted\n", mem);
1695         }
1696
1697         XPR(XPR_VM_PAGE,
1698                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1699                 new_object, new_offset,
1700                 mem, 0,0);
1701
1702         /*
1703          *      Changes to mem->object require the page lock because
1704          *      the pageout daemon uses that lock to get the object.
1705          */
1706         vm_page_lockspin_queues();
1707
1708         internal_to_external = FALSE;
1709         external_to_internal = FALSE;
1710
1711         if (mem->local) {
1712                 /*
1713                  * it's much easier to get the vm_page_pageable_xxx accounting correct
1714                  * if we first move the page to the active queue... it's going to end
1715                  * up there anyway, and we don't do vm_page_rename's frequently enough
1716                  * for this to matter.
1717                  */
1718                 VM_PAGE_QUEUES_REMOVE(mem);
1719                 vm_page_activate(mem);
1720         }
1721         if (mem->active || mem->inactive || mem->speculative) {
1722                 if (mem->object->internal && !new_object->internal) {
1723                         internal_to_external = TRUE;
1724                 }
1725                 if (!mem->object->internal && new_object->internal) {
1726                         external_to_internal = TRUE;
1727                 }
1728         }
1729
1730         vm_page_remove(mem, TRUE);
1731         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1732
1733         if (internal_to_external) {
1734                 vm_page_pageable_internal_count--;
1735                 vm_page_pageable_external_count++;
1736         } else if (external_to_internal) {
1737                 vm_page_pageable_external_count--;
1738                 vm_page_pageable_internal_count++;
1739         }
1740
1741         vm_page_unlock_queues();
1742 }
1743
1744 /*
1745  *      vm_page_init:
1746  *
1747  *      Initialize the fields in a new page.
1748  *      This takes a structure with random values and initializes it
1749  *      so that it can be given to vm_page_release or vm_page_insert.
1750  */
1751 void
1752 vm_page_init(
1753         vm_page_t       mem,
1754         ppnum_t         phys_page,
1755         boolean_t       lopage)
1756 {
1757         assert(phys_page);
1758
1759 #if     DEBUG
1760         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1761                 if (!(pmap_valid_page(phys_page))) {
1762                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1763                 }
1764         }
1765 #endif
1766         *mem = vm_page_template;
1767         mem->phys_page = phys_page;
1768 #if 0
1769         /*
1770          * we're leaving this turned off for now... currently pages
1771          * come off the free list and are either immediately dirtied/referenced
1772          * due to zero-fill or COW faults, or are used to read or write files...
1773          * in the file I/O case, the UPL mechanism takes care of clearing
1774          * the state of the HW ref/mod bits in a somewhat fragile way.
1775          * Since we may change the way this works in the future (to toughen it up),
1776          * I'm leaving this as a reminder of where these bits could get cleared
1777          */
1778
1779         /*
1780          * make sure both the h/w referenced and modified bits are
1781          * clear at this point... we are especially dependent on
1782          * not finding a 'stale' h/w modified in a number of spots
1783          * once this page goes back into use
1784          */
1785         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1786 #endif
1787         mem->lopage = lopage;
1788 }
1789
1790 /*
1791  *      vm_page_grab_fictitious:
1792  *
1793  *      Remove a fictitious page from the free list.
1794  *      Returns VM_PAGE_NULL if there are no free pages.
1795  */
1796 int     c_vm_page_grab_fictitious = 0;
1797 int     c_vm_page_grab_fictitious_failed = 0;
1798 int     c_vm_page_release_fictitious = 0;
1799 int     c_vm_page_more_fictitious = 0;
1800
1801 vm_page_t
1802 vm_page_grab_fictitious_common(
1803         ppnum_t phys_addr)
1804 {
1805         vm_page_t       m;
1806
1807         if ((m = (vm_page_t)zget(vm_page_zone))) {
1808
1809                 vm_page_init(m, phys_addr, FALSE);
1810                 m->fictitious = TRUE;
1811
1812                 c_vm_page_grab_fictitious++;
1813         } else
1814                 c_vm_page_grab_fictitious_failed++;
1815
1816         return m;
1817 }
1818
1819 vm_page_t
1820 vm_page_grab_fictitious(void)
1821 {
1822         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1823 }
1824
1825 vm_page_t
1826 vm_page_grab_guard(void)
1827 {
1828         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1829 }
1830
1831
1832 /*
1833  *      vm_page_release_fictitious:
1834  *
1835  *      Release a fictitious page to the zone pool
1836  */
1837 void
1838 vm_page_release_fictitious(
1839         vm_page_t m)
1840 {
1841         assert(!m->free);
1842         assert(m->fictitious);
1843         assert(m->phys_page == vm_page_fictitious_addr ||
1844                m->phys_page == vm_page_guard_addr);
1845
1846         c_vm_page_release_fictitious++;
1847
1848         zfree(vm_page_zone, m);
1849 }
1850
1851 /*
1852  *      vm_page_more_fictitious:
1853  *
1854  *      Add more fictitious pages to the zone.
1855  *      Allowed to block. This routine is way intimate
1856  *      with the zones code, for several reasons:
1857  *      1. we need to carve some page structures out of physical
1858  *         memory before zones work, so they _cannot_ come from
1859  *         the zone_map.
1860  *      2. the zone needs to be collectable in order to prevent
1861  *         growth without bound. These structures are used by
1862  *         the device pager (by the hundreds and thousands), as
1863  *         private pages for pageout, and as blocking pages for
1864  *         pagein. Temporary bursts in demand should not result in
1865  *         permanent allocation of a resource.
1866  *      3. To smooth allocation humps, we allocate single pages
1867  *         with kernel_memory_allocate(), and cram them into the
1868  *         zone.
1869  */
1870
1871 void vm_page_more_fictitious(void)
1872 {
1873         vm_offset_t     addr;
1874         kern_return_t   retval;
1875
1876         c_vm_page_more_fictitious++;
1877
1878         /*
1879          * Allocate a single page from the zone_map. Do not wait if no physical
1880          * pages are immediately available, and do not zero the space. We need
1881          * our own blocking lock here to prevent having multiple,
1882          * simultaneous requests from piling up on the zone_map lock. Exactly
1883          * one (of our) threads should be potentially waiting on the map lock.
1884          * If winner is not vm-privileged, then the page allocation will fail,
1885          * and it will temporarily block here in the vm_page_wait().
1886          */
1887         lck_mtx_lock(&vm_page_alloc_lock);
1888         /*
1889          * If another thread allocated space, just bail out now.
1890          */
1891         if (zone_free_count(vm_page_zone) > 5) {
1892                 /*
1893                  * The number "5" is a small number that is larger than the
1894                  * number of fictitious pages that any single caller will
1895                  * attempt to allocate. Otherwise, a thread will attempt to
1896                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1897                  * release all of the resources and locks already acquired,
1898                  * and then call this routine. This routine finds the pages
1899                  * that the caller released, so fails to allocate new space.
1900                  * The process repeats infinitely. The largest known number
1901                  * of fictitious pages required in this manner is 2. 5 is
1902                  * simply a somewhat larger number.
1903                  */
1904                 lck_mtx_unlock(&vm_page_alloc_lock);
1905                 return;
1906         }
1907
1908         retval = kernel_memory_allocate(zone_map,
1909                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1910                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1911         if (retval != KERN_SUCCESS) {
1912                 /*
1913                  * No page was available. Drop the
1914                  * lock to give another thread a chance at it, and
1915                  * wait for the pageout daemon to make progress.
1916                  */
1917                 lck_mtx_unlock(&vm_page_alloc_lock);
1918                 vm_page_wait(THREAD_UNINT);
1919                 return;
1920         }
1921
1922         /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1923         OSAddAtomic64(1, &(vm_page_zone->page_count));
1924
1925         zcram(vm_page_zone, addr, PAGE_SIZE);
1926
1927         lck_mtx_unlock(&vm_page_alloc_lock);
1928 }
1929
1930
1931 /*
1932  *      vm_pool_low():
1933  *
1934  *      Return true if it is not likely that a non-vm_privileged thread
1935  *      can get memory without blocking.  Advisory only, since the
1936  *      situation may change under us.
1937  */
1938 int
1939 vm_pool_low(void)
1940 {
1941         /* No locking, at worst we will fib. */
1942         return( vm_page_free_count <= vm_page_free_reserved );
1943 }
1944
1945
1946
1947 /*
1948  * this is an interface to support bring-up of drivers
1949  * on platforms with physical memory > 4G...
1950  */
1951 int             vm_himemory_mode = 2;
1952
1953
1954 /*
1955  * this interface exists to support hardware controllers
1956  * incapable of generating DMAs with more than 32 bits
1957  * of address on platforms with physical memory > 4G...
1958  */
1959 unsigned int    vm_lopages_allocated_q = 0;
1960 unsigned int    vm_lopages_allocated_cpm_success = 0;
1961 unsigned int    vm_lopages_allocated_cpm_failed = 0;
1962 queue_head_t    vm_lopage_queue_free;
1963
1964 vm_page_t
1965 vm_page_grablo(void)
1966 {
1967         vm_page_t       mem;
1968
1969         if (vm_lopage_needed == FALSE)
1970                 return (vm_page_grab());
1971
1972         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1973
1974         if ( !queue_empty(&vm_lopage_queue_free)) {
1975                 queue_remove_first(&vm_lopage_queue_free,
1976                                    mem,
1977                                    vm_page_t,
1978                                    pageq);
1979                 assert(vm_lopage_free_count);
1980
1981                 vm_lopage_free_count--;
1982                 vm_lopages_allocated_q++;
1983
1984                 if (vm_lopage_free_count < vm_lopage_lowater)
1985                         vm_lopage_refill = TRUE;
1986
1987                 lck_mtx_unlock(&vm_page_queue_free_lock);
1988         } else {
1989                 lck_mtx_unlock(&vm_page_queue_free_lock);
1990
1991                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1992
1993                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1994                         vm_lopages_allocated_cpm_failed++;
1995                         lck_mtx_unlock(&vm_page_queue_free_lock);
1996
1997                         return (VM_PAGE_NULL);
1998                 }
1999                 mem->busy = TRUE;
2000
2001                 vm_page_lockspin_queues();
2002
2003                 mem->gobbled = FALSE;
2004                 vm_page_gobble_count--;
2005                 vm_page_wire_count--;
2006
2007                 vm_lopages_allocated_cpm_success++;
2008                 vm_page_unlock_queues();
2009         }
2010         assert(mem->busy);
2011         assert(!mem->free);
2012         assert(!mem->pmapped);
2013         assert(!mem->wpmapped);
2014         assert(!pmap_is_noencrypt(mem->phys_page));
2015
2016         mem->pageq.next = NULL;
2017         mem->pageq.prev = NULL;
2018
2019         return (mem);
2020 }
2021
2022
2023 /*
2024  *      vm_page_grab:
2025  *
2026  *      first try to grab a page from the per-cpu free list...
2027  *      this must be done while pre-emption is disabled... if
2028  *      a page is available, we're done...
2029  *      if no page is available, grab the vm_page_queue_free_lock
2030  *      and see if current number of free pages would allow us
2031  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
2032  *      if there are pages available, disable preemption and
2033  *      recheck the state of the per-cpu free list... we could
2034  *      have been preempted and moved to a different cpu, or
2035  *      some other thread could have re-filled it... if still
2036  *      empty, figure out how many pages we can steal from the
2037  *      global free queue and move to the per-cpu queue...
2038  *      return 1 of these pages when done... only wakeup the
2039  *      pageout_scan thread if we moved pages from the global
2040  *      list... no need for the wakeup if we've satisfied the
2041  *      request from the per-cpu queue.
2042  */
2043
2044
2045 vm_page_t
2046 vm_page_grab( void )
2047 {
2048         vm_page_t       mem;
2049
2050
2051         disable_preemption();
2052
2053         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2054 return_page_from_cpu_list:
2055                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2056                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2057
2058                 enable_preemption();
2059                 mem->pageq.next = NULL;
2060
2061                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2062                 assert(mem->tabled == FALSE);
2063                 assert(mem->object == VM_OBJECT_NULL);
2064                 assert(!mem->laundry);
2065                 assert(!mem->free);
2066                 assert(pmap_verify_free(mem->phys_page));
2067                 assert(mem->busy);
2068                 assert(!mem->encrypted);
2069                 assert(!mem->pmapped);
2070                 assert(!mem->wpmapped);
2071                 assert(!mem->active);
2072                 assert(!mem->inactive);
2073                 assert(!mem->throttled);
2074                 assert(!mem->speculative);
2075                 assert(!pmap_is_noencrypt(mem->phys_page));
2076
2077                 return mem;
2078         }
2079         enable_preemption();
2080
2081
2082         /*
2083          *      Optionally produce warnings if the wire or gobble
2084          *      counts exceed some threshold.
2085          */
2086 #if VM_PAGE_WIRE_COUNT_WARNING
2087         if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2088                 printf("mk: vm_page_grab(): high wired page count of %d\n",
2089                         vm_page_wire_count);
2090         }
2091 #endif
2092 #if VM_PAGE_GOBBLE_COUNT_WARNING
2093         if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2094                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2095                         vm_page_gobble_count);
2096         }
2097 #endif
2098         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2099
2100         /*
2101          *      Only let privileged threads (involved in pageout)
2102          *      dip into the reserved pool.
2103          */
2104         if ((vm_page_free_count < vm_page_free_reserved) &&
2105             !(current_thread()->options & TH_OPT_VMPRIV)) {
2106                 lck_mtx_unlock(&vm_page_queue_free_lock);
2107                 mem = VM_PAGE_NULL;
2108         }
2109         else {
2110                vm_page_t        head;
2111                vm_page_t        tail;
2112                unsigned int     pages_to_steal;
2113                unsigned int     color;
2114
2115                while ( vm_page_free_count == 0 ) {
2116
2117                         lck_mtx_unlock(&vm_page_queue_free_lock);
2118                         /*
2119                          * must be a privileged thread to be
2120                          * in this state since a non-privileged
2121                          * thread would have bailed if we were
2122                          * under the vm_page_free_reserved mark
2123                          */
2124                         VM_PAGE_WAIT();
2125                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2126                 }
2127
2128                 disable_preemption();
2129
2130                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2131                         lck_mtx_unlock(&vm_page_queue_free_lock);
2132
2133                         /*
2134                          * we got preempted and moved to another processor
2135                          * or we got preempted and someone else ran and filled the cache
2136                          */
2137                         goto return_page_from_cpu_list;
2138                 }
2139                 if (vm_page_free_count <= vm_page_free_reserved)
2140                         pages_to_steal = 1;
2141                 else {
2142                         if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2143                                 pages_to_steal = vm_free_magazine_refill_limit;
2144                         else
2145                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2146                 }
2147                 color = PROCESSOR_DATA(current_processor(), start_color);
2148                 head = tail = NULL;
2149
2150                 vm_page_free_count -= pages_to_steal;
2151
2152                 while (pages_to_steal--) {
2153
2154                         while (queue_empty(&vm_page_queue_free[color]))
2155                                 color = (color + 1) & vm_color_mask;
2156
2157                         queue_remove_first(&vm_page_queue_free[color],
2158                                            mem,
2159                                            vm_page_t,
2160                                            pageq);
2161                         mem->pageq.next = NULL;
2162                         mem->pageq.prev = NULL;
2163
2164                         assert(!mem->active);
2165                         assert(!mem->inactive);
2166                         assert(!mem->throttled);
2167                         assert(!mem->speculative);
2168
2169                         color = (color + 1) & vm_color_mask;
2170
2171                         if (head == NULL)
2172                                 head = mem;
2173                         else
2174                                 tail->pageq.next = (queue_t)mem;
2175                         tail = mem;
2176
2177                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2178                         assert(mem->tabled == FALSE);
2179                         assert(mem->object == VM_OBJECT_NULL);
2180                         assert(!mem->laundry);
2181                         assert(mem->free);
2182                         mem->free = FALSE;
2183
2184                         assert(pmap_verify_free(mem->phys_page));
2185                         assert(mem->busy);
2186                         assert(!mem->free);
2187                         assert(!mem->encrypted);
2188                         assert(!mem->pmapped);
2189                         assert(!mem->wpmapped);
2190                         assert(!pmap_is_noencrypt(mem->phys_page));
2191                 }
2192                 lck_mtx_unlock(&vm_page_queue_free_lock);
2193
2194                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2195                 PROCESSOR_DATA(current_processor(), start_color) = color;
2196
2197                 /*
2198                  * satisfy this request
2199                  */
2200                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2201                 mem = head;
2202                 mem->pageq.next = NULL;
2203
2204                 enable_preemption();
2205         }
2206         /*
2207          *      Decide if we should poke the pageout daemon.
2208          *      We do this if the free count is less than the low
2209          *      water mark, or if the free count is less than the high
2210          *      water mark (but above the low water mark) and the inactive
2211          *      count is less than its target.
2212          *
2213          *      We don't have the counts locked ... if they change a little,
2214          *      it doesn't really matter.
2215          */
2216         if ((vm_page_free_count < vm_page_free_min) ||
2217              ((vm_page_free_count < vm_page_free_target) &&
2218               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2219                  thread_wakeup((event_t) &vm_page_free_wanted);
2220
2221         VM_CHECK_MEMORYSTATUS;
2222
2223 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
2224
2225         return mem;
2226 }
2227
2228 /*
2229  *      vm_page_release:
2230  *
2231  *      Return a page to the free list.
2232  */
2233
2234 void
2235 vm_page_release(
2236         register vm_page_t      mem)
2237 {
2238         unsigned int    color;
2239         int     need_wakeup = 0;
2240         int     need_priv_wakeup = 0;
2241
2242
2243         assert(!mem->private && !mem->fictitious);
2244         if (vm_page_free_verify) {
2245                 assert(pmap_verify_free(mem->phys_page));
2246         }
2247 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
2248
2249         pmap_clear_noencrypt(mem->phys_page);
2250
2251         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2252 #if DEBUG
2253         if (mem->free)
2254                 panic("vm_page_release");
2255 #endif
2256
2257         assert(mem->busy);
2258         assert(!mem->laundry);
2259         assert(mem->object == VM_OBJECT_NULL);
2260         assert(mem->pageq.next == NULL &&
2261                mem->pageq.prev == NULL);
2262         assert(mem->listq.next == NULL &&
2263                mem->listq.prev == NULL);
2264
2265         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2266             vm_lopage_free_count < vm_lopage_free_limit &&
2267             mem->phys_page < max_valid_low_ppnum) {
2268                 /*
2269                  * this exists to support hardware controllers
2270                  * incapable of generating DMAs with more than 32 bits
2271                  * of address on platforms with physical memory > 4G...
2272                  */
2273                 queue_enter_first(&vm_lopage_queue_free,
2274                                   mem,
2275                                   vm_page_t,
2276                                   pageq);
2277                 vm_lopage_free_count++;
2278
2279                 if (vm_lopage_free_count >= vm_lopage_free_limit)
2280                         vm_lopage_refill = FALSE;
2281
2282                 mem->lopage = TRUE;
2283         } else {
2284                 mem->lopage = FALSE;
2285                 mem->free = TRUE;
2286
2287                 color = mem->phys_page & vm_color_mask;
2288                 queue_enter_first(&vm_page_queue_free[color],
2289                                   mem,
2290                                   vm_page_t,
2291                                   pageq);
2292                 vm_page_free_count++;
2293                 /*
2294                  *      Check if we should wake up someone waiting for page.
2295                  *      But don't bother waking them unless they can allocate.
2296                  *
2297                  *      We wakeup only one thread, to prevent starvation.
2298                  *      Because the scheduling system handles wait queues FIFO,
2299                  *      if we wakeup all waiting threads, one greedy thread
2300                  *      can starve multiple niceguy threads.  When the threads
2301                  *      all wakeup, the greedy threads runs first, grabs the page,
2302                  *      and waits for another page.  It will be the first to run
2303                  *      when the next page is freed.
2304                  *
2305                  *      However, there is a slight danger here.
2306                  *      The thread we wake might not use the free page.
2307                  *      Then the other threads could wait indefinitely
2308                  *      while the page goes unused.  To forestall this,
2309                  *      the pageout daemon will keep making free pages
2310                  *      as long as vm_page_free_wanted is non-zero.
2311                  */
2312
2313                 assert(vm_page_free_count > 0);
2314                 if (vm_page_free_wanted_privileged > 0) {
2315                         vm_page_free_wanted_privileged--;
2316                         need_priv_wakeup = 1;
2317                 } else if (vm_page_free_wanted > 0 &&
2318                            vm_page_free_count > vm_page_free_reserved) {
2319                         vm_page_free_wanted--;
2320                         need_wakeup = 1;
2321                 }
2322         }
2323         lck_mtx_unlock(&vm_page_queue_free_lock);
2324
2325         if (need_priv_wakeup)
2326                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2327         else if (need_wakeup)
2328                 thread_wakeup_one((event_t) &vm_page_free_count);
2329
2330         VM_CHECK_MEMORYSTATUS;
2331 }
2332
2333 /*
2334  * This version of vm_page_release() is used only at startup
2335  * when we are single-threaded and pages are being released
2336  * for the first time. Hence, no locking or unnecessary checks are made.
2337  * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2338  */
2339 void
2340 vm_page_release_startup(
2341         register vm_page_t      mem)
2342 {
2343         queue_t queue_free;
2344
2345         if (vm_lopage_free_count < vm_lopage_free_limit &&
2346             mem->phys_page < max_valid_low_ppnum) {
2347                 mem->lopage = TRUE;
2348                 vm_lopage_free_count++;
2349                 queue_free = &vm_lopage_queue_free;
2350         } else {
2351                 mem->lopage = FALSE;
2352                 mem->free = TRUE;
2353                 vm_page_free_count++;
2354                 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2355         }
2356         queue_enter_first(queue_free, mem, vm_page_t, pageq);
2357 }
2358
2359 /*
2360  *      vm_page_wait:
2361  *
2362  *      Wait for a page to become available.
2363  *      If there are plenty of free pages, then we don't sleep.
2364  *
2365  *      Returns:
2366  *              TRUE:  There may be another page, try again
2367  *              FALSE: We were interrupted out of our wait, don't try again
2368  */
2369
2370 boolean_t
2371 vm_page_wait(
2372         int     interruptible )
2373 {
2374         /*
2375          *      We can't use vm_page_free_reserved to make this
2376          *      determination.  Consider: some thread might
2377          *      need to allocate two pages.  The first allocation
2378          *      succeeds, the second fails.  After the first page is freed,
2379          *      a call to vm_page_wait must really block.
2380          */
2381         kern_return_t   wait_result;
2382         int             need_wakeup = 0;
2383         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2384
2385         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2386
2387         if (is_privileged && vm_page_free_count) {
2388                 lck_mtx_unlock(&vm_page_queue_free_lock);
2389                 return TRUE;
2390         }
2391         if (vm_page_free_count < vm_page_free_target) {
2392
2393                 if (is_privileged) {
2394                         if (vm_page_free_wanted_privileged++ == 0)
2395                                 need_wakeup = 1;
2396                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2397                 } else {
2398                         if (vm_page_free_wanted++ == 0)
2399                                 need_wakeup = 1;
2400                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2401                 }
2402                 lck_mtx_unlock(&vm_page_queue_free_lock);
2403                 counter(c_vm_page_wait_block++);
2404
2405                 if (need_wakeup)
2406                         thread_wakeup((event_t)&vm_page_free_wanted);
2407
2408                 if (wait_result == THREAD_WAITING) {
2409                         VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2410                                        vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2411                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2412                         VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2413                 }
2414
2415                 return(wait_result == THREAD_AWAKENED);
2416         } else {
2417                 lck_mtx_unlock(&vm_page_queue_free_lock);
2418                 return TRUE;
2419         }
2420 }
2421
2422 /*
2423  *      vm_page_alloc:
2424  *
2425  *      Allocate and return a memory cell associated
2426  *      with this VM object/offset pair.
2427  *
2428  *      Object must be locked.
2429  */
2430
2431 vm_page_t
2432 vm_page_alloc(
2433         vm_object_t             object,
2434         vm_object_offset_t      offset)
2435 {
2436         register vm_page_t      mem;
2437
2438         vm_object_lock_assert_exclusive(object);
2439         mem = vm_page_grab();
2440         if (mem == VM_PAGE_NULL)
2441                 return VM_PAGE_NULL;
2442
2443         vm_page_insert(mem, object, offset);
2444
2445         return(mem);
2446 }
2447
2448 vm_page_t
2449 vm_page_alloclo(
2450         vm_object_t             object,
2451         vm_object_offset_t      offset)
2452 {
2453         register vm_page_t      mem;
2454
2455         vm_object_lock_assert_exclusive(object);
2456         mem = vm_page_grablo();
2457         if (mem == VM_PAGE_NULL)
2458                 return VM_PAGE_NULL;
2459
2460         vm_page_insert(mem, object, offset);
2461
2462         return(mem);
2463 }
2464
2465
2466 /*
2467  *      vm_page_alloc_guard:
2468  *
2469  *      Allocate a fictitious page which will be used
2470  *      as a guard page.  The page will be inserted into
2471  *      the object and returned to the caller.
2472  */
2473
2474 vm_page_t
2475 vm_page_alloc_guard(
2476         vm_object_t             object,
2477         vm_object_offset_t      offset)
2478 {
2479         register vm_page_t      mem;
2480
2481         vm_object_lock_assert_exclusive(object);
2482         mem = vm_page_grab_guard();
2483         if (mem == VM_PAGE_NULL)
2484                 return VM_PAGE_NULL;
2485
2486         vm_page_insert(mem, object, offset);
2487
2488         return(mem);
2489 }
2490
2491
2492 counter(unsigned int c_laundry_pages_freed = 0;)
2493
2494 /*
2495  *      vm_page_free_prepare:
2496  *
2497  *      Removes page from any queue it may be on
2498  *      and disassociates it from its VM object.
2499  *
2500  *      Object and page queues must be locked prior to entry.
2501  */
2502 static void
2503 vm_page_free_prepare(
2504         vm_page_t       mem)
2505 {
2506         vm_page_free_prepare_queues(mem);
2507         vm_page_free_prepare_object(mem, TRUE);
2508 }
2509
2510
2511 void
2512 vm_page_free_prepare_queues(
2513         vm_page_t       mem)
2514 {
2515         VM_PAGE_CHECK(mem);
2516         assert(!mem->free);
2517         assert(!mem->cleaning);
2518
2519 #if MACH_ASSERT || DEBUG
2520         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2521         if (mem->free)
2522                 panic("vm_page_free: freeing page on free list\n");
2523 #endif /* MACH_ASSERT || DEBUG */
2524         if (mem->object) {
2525                 vm_object_lock_assert_exclusive(mem->object);
2526         }
2527         if (mem->laundry) {
2528                 /*
2529                  * We may have to free a page while it's being laundered
2530                  * if we lost its pager (due to a forced unmount, for example).
2531                  * We need to call vm_pageout_steal_laundry() before removing
2532                  * the page from its VM object, so that we can remove it
2533                  * from its pageout queue and adjust the laundry accounting
2534                  */
2535                 vm_pageout_steal_laundry(mem, TRUE);
2536                 counter(++c_laundry_pages_freed);
2537         }
2538
2539         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2540
2541         if (VM_PAGE_WIRED(mem)) {
2542                 if (mem->object) {
2543                         assert(mem->object->wired_page_count > 0);
2544                         mem->object->wired_page_count--;
2545                         assert(mem->object->resident_page_count >=
2546                                mem->object->wired_page_count);
2547
2548                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2549                                 OSAddAtomic(+1, &vm_page_purgeable_count);
2550                                 assert(vm_page_purgeable_wired_count > 0);
2551                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2552                         }
2553                         if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2554                              mem->object->purgable == VM_PURGABLE_EMPTY) &&
2555                             mem->object->vo_purgeable_owner != TASK_NULL) {
2556                                 task_t owner;
2557
2558                                 owner = mem->object->vo_purgeable_owner;
2559                                 /*
2560                                  * While wired, this page was accounted
2561                                  * as "non-volatile" but it should now
2562                                  * be accounted as "volatile".
2563                                  */
2564                                 /* one less "non-volatile"... */
2565                                 ledger_debit(owner->ledger,
2566                                              task_ledgers.purgeable_nonvolatile,
2567                                              PAGE_SIZE);
2568                                 /* ... and "phys_footprint" */
2569                                 ledger_debit(owner->ledger,
2570                                              task_ledgers.phys_footprint,
2571                                              PAGE_SIZE);
2572                                 /* one more "volatile" */
2573                                 ledger_credit(owner->ledger,
2574                                               task_ledgers.purgeable_volatile,
2575                                               PAGE_SIZE);
2576                         }
2577                 }
2578                 if (!mem->private && !mem->fictitious)
2579                         vm_page_wire_count--;
2580                 mem->wire_count = 0;
2581                 assert(!mem->gobbled);
2582         } else if (mem->gobbled) {
2583                 if (!mem->private && !mem->fictitious)
2584                         vm_page_wire_count--;
2585                 vm_page_gobble_count--;
2586         }
2587 }
2588
2589
2590 void
2591 vm_page_free_prepare_object(
2592         vm_page_t       mem,
2593         boolean_t       remove_from_hash)
2594 {
2595         if (mem->tabled)
2596                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2597
2598         PAGE_WAKEUP(mem);               /* clears wanted */
2599
2600         if (mem->private) {
2601                 mem->private = FALSE;
2602                 mem->fictitious = TRUE;
2603                 mem->phys_page = vm_page_fictitious_addr;
2604         }
2605         if ( !mem->fictitious) {
2606                 vm_page_init(mem, mem->phys_page, mem->lopage);
2607         }
2608 }
2609
2610
2611 /*
2612  *      vm_page_free:
2613  *
2614  *      Returns the given page to the free list,
2615  *      disassociating it with any VM object.
2616  *
2617  *      Object and page queues must be locked prior to entry.
2618  */
2619 void
2620 vm_page_free(
2621         vm_page_t       mem)
2622 {
2623         vm_page_free_prepare(mem);
2624
2625         if (mem->fictitious) {
2626                 vm_page_release_fictitious(mem);
2627         } else {
2628                 vm_page_release(mem);
2629         }
2630 }
2631
2632
2633 void
2634 vm_page_free_unlocked(
2635         vm_page_t       mem,
2636         boolean_t       remove_from_hash)
2637 {
2638         vm_page_lockspin_queues();
2639         vm_page_free_prepare_queues(mem);
2640         vm_page_unlock_queues();
2641
2642         vm_page_free_prepare_object(mem, remove_from_hash);
2643
2644         if (mem->fictitious) {
2645                 vm_page_release_fictitious(mem);
2646         } else {
2647                 vm_page_release(mem);
2648         }
2649 }
2650
2651
2652 /*
2653  * Free a list of pages.  The list can be up to several hundred pages,
2654  * as blocked up by vm_pageout_scan().
2655  * The big win is not having to take the free list lock once
2656  * per page.
2657  */
2658 void
2659 vm_page_free_list(
2660         vm_page_t       freeq,
2661         boolean_t       prepare_object)
2662 {
2663         vm_page_t       mem;
2664         vm_page_t       nxt;
2665         vm_page_t       local_freeq;
2666         int             pg_count;
2667
2668         while (freeq) {
2669
2670                 pg_count = 0;
2671                 local_freeq = VM_PAGE_NULL;
2672                 mem = freeq;
2673
2674                 /*
2675                  * break up the processing into smaller chunks so
2676                  * that we can 'pipeline' the pages onto the
2677                  * free list w/o introducing too much
2678                  * contention on the global free queue lock
2679                  */
2680                 while (mem && pg_count < 64) {
2681
2682                         assert(!mem->inactive);
2683                         assert(!mem->active);
2684                         assert(!mem->throttled);
2685                         assert(!mem->free);
2686                         assert(!mem->speculative);
2687                         assert(!VM_PAGE_WIRED(mem));
2688                         assert(mem->pageq.prev == NULL);
2689
2690                         nxt = (vm_page_t)(mem->pageq.next);
2691
2692                         if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2693                                 assert(pmap_verify_free(mem->phys_page));
2694                         }
2695                         if (prepare_object == TRUE)
2696                                 vm_page_free_prepare_object(mem, TRUE);
2697
2698                         if (!mem->fictitious) {
2699                                 assert(mem->busy);
2700
2701                                 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2702                                     vm_lopage_free_count < vm_lopage_free_limit &&
2703                                     mem->phys_page < max_valid_low_ppnum) {
2704                                         mem->pageq.next = NULL;
2705                                         vm_page_release(mem);
2706                                 } else {
2707                                         /*
2708                                          * IMPORTANT: we can't set the page "free" here
2709                                          * because that would make the page eligible for
2710                                          * a physically-contiguous allocation (see
2711                                          * vm_page_find_contiguous()) right away (we don't
2712                                          * hold the vm_page_queue_free lock).  That would
2713                                          * cause trouble because the page is not actually
2714                                          * in the free queue yet...
2715                                          */
2716                                         mem->pageq.next = (queue_entry_t)local_freeq;
2717                                         local_freeq = mem;
2718                                         pg_count++;
2719
2720                                         pmap_clear_noencrypt(mem->phys_page);
2721                                 }
2722                         } else {
2723                                 assert(mem->phys_page == vm_page_fictitious_addr ||
2724                                        mem->phys_page == vm_page_guard_addr);
2725                                 vm_page_release_fictitious(mem);
2726                         }
2727                         mem = nxt;
2728                 }
2729                 freeq = mem;
2730
2731                 if ( (mem = local_freeq) ) {
2732                         unsigned int    avail_free_count;
2733                         unsigned int    need_wakeup = 0;
2734                         unsigned int    need_priv_wakeup = 0;
2735
2736                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2737
2738                         while (mem) {
2739                                 int     color;
2740
2741                                 nxt = (vm_page_t)(mem->pageq.next);
2742
2743                                 assert(!mem->free);
2744                                 assert(mem->busy);
2745                                 mem->free = TRUE;
2746
2747                                 color = mem->phys_page & vm_color_mask;
2748                                 queue_enter_first(&vm_page_queue_free[color],
2749                                                   mem,
2750                                                   vm_page_t,
2751                                                   pageq);
2752                                 mem = nxt;
2753                         }
2754                         vm_page_free_count += pg_count;
2755                         avail_free_count = vm_page_free_count;
2756
2757                         if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2758
2759                                 if (avail_free_count < vm_page_free_wanted_privileged) {
2760                                         need_priv_wakeup = avail_free_count;
2761                                         vm_page_free_wanted_privileged -= avail_free_count;
2762                                         avail_free_count = 0;
2763                                 } else {
2764                                         need_priv_wakeup = vm_page_free_wanted_privileged;
2765                                         vm_page_free_wanted_privileged = 0;
2766                                         avail_free_count -= vm_page_free_wanted_privileged;
2767                                 }
2768                         }
2769                         if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2770                                 unsigned int  available_pages;
2771
2772                                 available_pages = avail_free_count - vm_page_free_reserved;
2773
2774                                 if (available_pages >= vm_page_free_wanted) {
2775                                         need_wakeup = vm_page_free_wanted;
2776                                         vm_page_free_wanted = 0;
2777                                 } else {
2778                                         need_wakeup = available_pages;
2779                                         vm_page_free_wanted -= available_pages;
2780                                 }
2781                         }
2782                         lck_mtx_unlock(&vm_page_queue_free_lock);
2783
2784                         if (need_priv_wakeup != 0) {
2785                                 /*
2786                                  * There shouldn't be that many VM-privileged threads,
2787                                  * so let's wake them all up, even if we don't quite
2788                                  * have enough pages to satisfy them all.
2789                                  */
2790                                 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2791                         }
2792                         if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2793                                 /*
2794                                  * We don't expect to have any more waiters
2795                                  * after this, so let's wake them all up at
2796                                  * once.
2797                                  */
2798                                 thread_wakeup((event_t) &vm_page_free_count);
2799                         } else for (; need_wakeup != 0; need_wakeup--) {
2800                                 /*
2801                                  * Wake up one waiter per page we just released.
2802                                  */
2803                                 thread_wakeup_one((event_t) &vm_page_free_count);
2804                         }
2805
2806                         VM_CHECK_MEMORYSTATUS;
2807                 }
2808         }
2809 }
2810
2811
2812 /*
2813  *      vm_page_wire:
2814  *
2815  *      Mark this page as wired down by yet
2816  *      another map, removing it from paging queues
2817  *      as necessary.
2818  *
2819  *      The page's object and the page queues must be locked.
2820  */
2821 void
2822 vm_page_wire(
2823         register vm_page_t      mem)
2824 {
2825
2826 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2827
2828         VM_PAGE_CHECK(mem);
2829         if (mem->object) {
2830                 vm_object_lock_assert_exclusive(mem->object);
2831         } else {
2832                 /*
2833                  * In theory, the page should be in an object before it
2834                  * gets wired, since we need to hold the object lock
2835                  * to update some fields in the page structure.
2836                  * However, some code (i386 pmap, for example) might want
2837                  * to wire a page before it gets inserted into an object.
2838                  * That's somewhat OK, as long as nobody else can get to
2839                  * that page and update it at the same time.
2840                  */
2841         }
2842 #if DEBUG
2843         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2844 #endif
2845         if ( !VM_PAGE_WIRED(mem)) {
2846
2847                 if (mem->pageout_queue) {
2848                         mem->pageout = FALSE;
2849                         vm_pageout_throttle_up(mem);
2850                 }
2851                 VM_PAGE_QUEUES_REMOVE(mem);
2852
2853                 if (mem->object) {
2854                         mem->object->wired_page_count++;
2855                         assert(mem->object->resident_page_count >=
2856                                mem->object->wired_page_count);
2857                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2858                                 assert(vm_page_purgeable_count > 0);
2859                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2860                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2861                         }
2862                         if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2863                              mem->object->purgable == VM_PURGABLE_EMPTY) &&
2864                             mem->object->vo_purgeable_owner != TASK_NULL) {
2865                                 task_t owner;
2866
2867                                 owner = mem->object->vo_purgeable_owner;
2868                                 /* less volatile bytes */
2869                                 ledger_debit(owner->ledger,
2870                                              task_ledgers.purgeable_volatile,
2871                                              PAGE_SIZE);
2872                                 /* more not-quite-volatile bytes */
2873                                 ledger_credit(owner->ledger,
2874                                               task_ledgers.purgeable_nonvolatile,
2875                                               PAGE_SIZE);
2876                                 /* more footprint */
2877                                 ledger_credit(owner->ledger,
2878                                               task_ledgers.phys_footprint,
2879                                               PAGE_SIZE);
2880                         }
2881                         if (mem->object->all_reusable) {
2882                                 /*
2883                                  * Wired pages are not counted as "re-usable"
2884                                  * in "all_reusable" VM objects, so nothing
2885                                  * to do here.
2886                                  */
2887                         } else if (mem->reusable) {
2888                                 /*
2889                                  * This page is not "re-usable" when it's
2890                                  * wired, so adjust its state and the
2891                                  * accounting.
2892                                  */
2893                                 vm_object_reuse_pages(mem->object,
2894                                                       mem->offset,
2895                                                       mem->offset+PAGE_SIZE_64,
2896                                                       FALSE);
2897                         }
2898                 }
2899                 assert(!mem->reusable);
2900
2901                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2902                         vm_page_wire_count++;
2903                 if (mem->gobbled)
2904                         vm_page_gobble_count--;
2905                 mem->gobbled = FALSE;
2906
2907                 VM_CHECK_MEMORYSTATUS;
2908
2909                 /*
2910                  * ENCRYPTED SWAP:
2911                  * The page could be encrypted, but
2912                  * We don't have to decrypt it here
2913                  * because we don't guarantee that the
2914                  * data is actually valid at this point.
2915                  * The page will get decrypted in
2916                  * vm_fault_wire() if needed.
2917                  */
2918         }
2919         assert(!mem->gobbled);
2920         mem->wire_count++;
2921         VM_PAGE_CHECK(mem);
2922 }
2923
2924 /*
2925  *      vm_page_gobble:
2926  *
2927  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2928  *
2929  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2930  */
2931 void
2932 vm_page_gobble(
2933         register vm_page_t      mem)
2934 {
2935         vm_page_lockspin_queues();
2936         VM_PAGE_CHECK(mem);
2937
2938         assert(!mem->gobbled);
2939         assert( !VM_PAGE_WIRED(mem));
2940
2941         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2942                 if (!mem->private && !mem->fictitious)
2943                         vm_page_wire_count++;
2944         }
2945         vm_page_gobble_count++;
2946         mem->gobbled = TRUE;
2947         vm_page_unlock_queues();
2948 }
2949
2950 /*
2951  *      vm_page_unwire:
2952  *
2953  *      Release one wiring of this page, potentially
2954  *      enabling it to be paged again.
2955  *
2956  *      The page's object and the page queues must be locked.
2957  */
2958 void
2959 vm_page_unwire(
2960         vm_page_t       mem,
2961         boolean_t       queueit)
2962 {
2963
2964 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2965
2966         VM_PAGE_CHECK(mem);
2967         assert(VM_PAGE_WIRED(mem));
2968         assert(mem->object != VM_OBJECT_NULL);
2969 #if DEBUG
2970         vm_object_lock_assert_exclusive(mem->object);
2971         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2972 #endif
2973         if (--mem->wire_count == 0) {
2974                 assert(!mem->private && !mem->fictitious);
2975                 vm_page_wire_count--;
2976                 assert(mem->object->wired_page_count > 0);
2977                 mem->object->wired_page_count--;
2978                 assert(mem->object->resident_page_count >=
2979                        mem->object->wired_page_count);
2980                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2981                         OSAddAtomic(+1, &vm_page_purgeable_count);
2982                         assert(vm_page_purgeable_wired_count > 0);
2983                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2984                 }
2985                 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2986                      mem->object->purgable == VM_PURGABLE_EMPTY) &&
2987                     mem->object->vo_purgeable_owner != TASK_NULL) {
2988                         task_t owner;
2989
2990                         owner = mem->object->vo_purgeable_owner;
2991                         /* more volatile bytes */
2992                         ledger_credit(owner->ledger,
2993                                       task_ledgers.purgeable_volatile,
2994                                       PAGE_SIZE);
2995                         /* less not-quite-volatile bytes */
2996                         ledger_debit(owner->ledger,
2997                                      task_ledgers.purgeable_nonvolatile,
2998                                      PAGE_SIZE);
2999                         /* less footprint */
3000                         ledger_debit(owner->ledger,
3001                                      task_ledgers.phys_footprint,
3002                                      PAGE_SIZE);
3003                 }
3004                 assert(mem->object != kernel_object);
3005                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3006
3007                 if (queueit == TRUE) {
3008                         if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3009                                 vm_page_deactivate(mem);
3010                         } else {
3011                                 vm_page_activate(mem);
3012                         }
3013                 }
3014
3015                 VM_CHECK_MEMORYSTATUS;
3016
3017         }
3018         VM_PAGE_CHECK(mem);
3019 }
3020
3021 /*
3022  *      vm_page_deactivate:
3023  *
3024  *      Returns the given page to the inactive list,
3025  *      indicating that no physical maps have access
3026  *      to this page.  [Used by the physical mapping system.]
3027  *
3028  *      The page queues must be locked.
3029  */
3030 void
3031 vm_page_deactivate(
3032         vm_page_t       m)
3033 {
3034         vm_page_deactivate_internal(m, TRUE);
3035 }
3036
3037
3038 void
3039 vm_page_deactivate_internal(
3040         vm_page_t       m,
3041         boolean_t       clear_hw_reference)
3042 {
3043
3044         VM_PAGE_CHECK(m);
3045         assert(m->object != kernel_object);
3046         assert(m->phys_page != vm_page_guard_addr);
3047
3048 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
3049 #if DEBUG
3050         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051 #endif
3052         /*
3053          *      This page is no longer very interesting.  If it was
3054          *      interesting (active or inactive/referenced), then we
3055          *      clear the reference bit and (re)enter it in the
3056          *      inactive queue.  Note wired pages should not have
3057          *      their reference bit cleared.
3058          */
3059         assert ( !(m->absent && !m->unusual));
3060
3061         if (m->gobbled) {               /* can this happen? */
3062                 assert( !VM_PAGE_WIRED(m));
3063
3064                 if (!m->private && !m->fictitious)
3065                         vm_page_wire_count--;
3066                 vm_page_gobble_count--;
3067                 m->gobbled = FALSE;
3068         }
3069         /*
3070          * if this page is currently on the pageout queue, we can't do the
3071          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3072          * and we can't remove it manually since we would need the object lock
3073          * (which is not required here) to decrement the activity_in_progress
3074          * reference which is held on the object while the page is in the pageout queue...
3075          * just let the normal laundry processing proceed
3076          */
3077         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3078                 return;
3079
3080         if (!m->absent && clear_hw_reference == TRUE)
3081                 pmap_clear_reference(m->phys_page);
3082
3083         m->reference = FALSE;
3084         m->no_cache = FALSE;
3085
3086         if (!m->inactive) {
3087                 VM_PAGE_QUEUES_REMOVE(m);
3088
3089                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3090                     m->dirty && m->object->internal &&
3091                     (m->object->purgable == VM_PURGABLE_DENY ||
3092                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3093                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
3094                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3095                         m->throttled = TRUE;
3096                         vm_page_throttled_count++;
3097                 } else {
3098                         if (m->object->named && m->object->ref_count == 1) {
3099                                 vm_page_speculate(m, FALSE);
3100 #if DEVELOPMENT || DEBUG
3101                                 vm_page_speculative_recreated++;
3102 #endif
3103                         } else {
3104                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3105                         }
3106                 }
3107         }
3108 }
3109
3110 /*
3111  * vm_page_enqueue_cleaned
3112  *
3113  * Put the page on the cleaned queue, mark it cleaned, etc.
3114  * Being on the cleaned queue (and having m->clean_queue set)
3115  * does ** NOT ** guarantee that the page is clean!
3116  *
3117  * Call with the queues lock held.
3118  */
3119
3120 void vm_page_enqueue_cleaned(vm_page_t m)
3121 {
3122         assert(m->phys_page != vm_page_guard_addr);
3123 #if DEBUG
3124         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3125 #endif
3126         assert( !(m->absent && !m->unusual));
3127
3128         if (m->gobbled) {
3129                 assert( !VM_PAGE_WIRED(m));
3130                 if (!m->private && !m->fictitious)
3131                         vm_page_wire_count--;
3132                 vm_page_gobble_count--;
3133                 m->gobbled = FALSE;
3134         }
3135         /*
3136          * if this page is currently on the pageout queue, we can't do the
3137          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3138          * and we can't remove it manually since we would need the object lock
3139          * (which is not required here) to decrement the activity_in_progress
3140          * reference which is held on the object while the page is in the pageout queue...
3141          * just let the normal laundry processing proceed
3142          */
3143         if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3144                 return;
3145
3146         VM_PAGE_QUEUES_REMOVE(m);
3147
3148         queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3149         m->clean_queue = TRUE;
3150         vm_page_cleaned_count++;
3151
3152         m->inactive = TRUE;
3153         vm_page_inactive_count++;
3154         if (m->object->internal) {
3155                 vm_page_pageable_internal_count++;
3156         } else {
3157                 vm_page_pageable_external_count++;
3158         }
3159
3160         vm_pageout_enqueued_cleaned++;
3161 }
3162
3163 /*
3164  *      vm_page_activate:
3165  *
3166  *      Put the specified page on the active list (if appropriate).
3167  *
3168  *      The page queues must be locked.
3169  */
3170
3171 void
3172 vm_page_activate(
3173         register vm_page_t      m)
3174 {
3175         VM_PAGE_CHECK(m);
3176 #ifdef  FIXME_4778297
3177         assert(m->object != kernel_object);
3178 #endif
3179         assert(m->phys_page != vm_page_guard_addr);
3180 #if DEBUG
3181         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3182 #endif
3183         assert( !(m->absent && !m->unusual));
3184
3185         if (m->gobbled) {
3186                 assert( !VM_PAGE_WIRED(m));
3187                 if (!m->private && !m->fictitious)
3188                         vm_page_wire_count--;
3189                 vm_page_gobble_count--;
3190                 m->gobbled = FALSE;
3191         }
3192         /*
3193          * if this page is currently on the pageout queue, we can't do the
3194          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3195          * and we can't remove it manually since we would need the object lock
3196          * (which is not required here) to decrement the activity_in_progress
3197          * reference which is held on the object while the page is in the pageout queue...
3198          * just let the normal laundry processing proceed
3199          */
3200         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3201                 return;
3202
3203 #if DEBUG
3204         if (m->active)
3205                 panic("vm_page_activate: already active");
3206 #endif
3207
3208         if (m->speculative) {
3209                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3210                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3211         }
3212
3213         VM_PAGE_QUEUES_REMOVE(m);
3214
3215         if ( !VM_PAGE_WIRED(m)) {
3216
3217                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3218                     m->dirty && m->object->internal &&
3219                     (m->object->purgable == VM_PURGABLE_DENY ||
3220                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3221                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
3222                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3223                         m->throttled = TRUE;
3224                         vm_page_throttled_count++;
3225                 } else {
3226                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3227                         m->active = TRUE;
3228                         vm_page_active_count++;
3229                         if (m->object->internal) {
3230                                 vm_page_pageable_internal_count++;
3231                         } else {
3232                                 vm_page_pageable_external_count++;
3233                         }
3234                 }
3235                 m->reference = TRUE;
3236                 m->no_cache = FALSE;
3237         }
3238         VM_PAGE_CHECK(m);
3239 }
3240
3241
3242 /*
3243  *      vm_page_speculate:
3244  *
3245  *      Put the specified page on the speculative list (if appropriate).
3246  *
3247  *      The page queues must be locked.
3248  */
3249 void
3250 vm_page_speculate(
3251         vm_page_t       m,
3252         boolean_t       new)
3253 {
3254         struct vm_speculative_age_q     *aq;
3255
3256         VM_PAGE_CHECK(m);
3257         assert(m->object != kernel_object);
3258         assert(m->phys_page != vm_page_guard_addr);
3259 #if DEBUG
3260         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3261 #endif
3262         assert( !(m->absent && !m->unusual));
3263
3264         /*
3265          * if this page is currently on the pageout queue, we can't do the
3266          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3267          * and we can't remove it manually since we would need the object lock
3268          * (which is not required here) to decrement the activity_in_progress
3269          * reference which is held on the object while the page is in the pageout queue...
3270          * just let the normal laundry processing proceed
3271          */
3272         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3273                 return;
3274
3275         VM_PAGE_QUEUES_REMOVE(m);
3276
3277         if ( !VM_PAGE_WIRED(m)) {
3278                 mach_timespec_t         ts;
3279                 clock_sec_t sec;
3280                 clock_nsec_t nsec;
3281
3282                 clock_get_system_nanotime(&sec, &nsec);
3283                 ts.tv_sec = (unsigned int) sec;
3284                 ts.tv_nsec = nsec;
3285
3286                 if (vm_page_speculative_count == 0) {
3287
3288                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3289                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3290
3291                         aq = &vm_page_queue_speculative[speculative_age_index];
3292
3293                         /*
3294                          * set the timer to begin a new group
3295                          */
3296                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3297                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3298
3299                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3300                 } else {
3301                         aq = &vm_page_queue_speculative[speculative_age_index];
3302
3303                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3304
3305                                 speculative_age_index++;
3306
3307                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3308                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3309                                 if (speculative_age_index == speculative_steal_index) {
3310                                         speculative_steal_index = speculative_age_index + 1;
3311
3312                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3313                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3314                                 }
3315                                 aq = &vm_page_queue_speculative[speculative_age_index];
3316
3317                                 if (!queue_empty(&aq->age_q))
3318                                         vm_page_speculate_ageit(aq);
3319
3320                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3321                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3322
3323                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3324                         }
3325                 }
3326                 enqueue_tail(&aq->age_q, &m->pageq);
3327                 m->speculative = TRUE;
3328                 vm_page_speculative_count++;
3329                 if (m->object->internal) {
3330                         vm_page_pageable_internal_count++;
3331                 } else {
3332                         vm_page_pageable_external_count++;
3333                 }
3334
3335                 if (new == TRUE) {
3336                         vm_object_lock_assert_exclusive(m->object);
3337
3338                         m->object->pages_created++;
3339 #if DEVELOPMENT || DEBUG
3340                         vm_page_speculative_created++;
3341 #endif
3342                 }
3343         }
3344         VM_PAGE_CHECK(m);
3345 }
3346
3347
3348 /*
3349  * move pages from the specified aging bin to
3350  * the speculative bin that pageout_scan claims from
3351  *
3352  *      The page queues must be locked.
3353  */
3354 void
3355 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3356 {
3357         struct vm_speculative_age_q     *sq;
3358         vm_page_t       t;
3359
3360         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3361
3362         if (queue_empty(&sq->age_q)) {
3363                 sq->age_q.next = aq->age_q.next;
3364                 sq->age_q.prev = aq->age_q.prev;
3365
3366                 t = (vm_page_t)sq->age_q.next;
3367                 t->pageq.prev = &sq->age_q;
3368
3369                 t = (vm_page_t)sq->age_q.prev;
3370                 t->pageq.next = &sq->age_q;
3371         } else {
3372                 t = (vm_page_t)sq->age_q.prev;
3373                 t->pageq.next = aq->age_q.next;
3374
3375                 t = (vm_page_t)aq->age_q.next;
3376                 t->pageq.prev = sq->age_q.prev;
3377
3378                 t = (vm_page_t)aq->age_q.prev;
3379                 t->pageq.next = &sq->age_q;
3380
3381                 sq->age_q.prev = aq->age_q.prev;
3382         }
3383         queue_init(&aq->age_q);
3384 }
3385
3386
3387 void
3388 vm_page_lru(
3389         vm_page_t       m)
3390 {
3391         VM_PAGE_CHECK(m);
3392         assert(m->object != kernel_object);
3393         assert(m->phys_page != vm_page_guard_addr);
3394
3395 #if DEBUG
3396         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3397 #endif
3398         /*
3399          * if this page is currently on the pageout queue, we can't do the
3400          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3401          * and we can't remove it manually since we would need the object lock
3402          * (which is not required here) to decrement the activity_in_progress
3403          * reference which is held on the object while the page is in the pageout queue...
3404          * just let the normal laundry processing proceed
3405          */
3406         if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3407                 return;
3408
3409         m->no_cache = FALSE;
3410
3411         VM_PAGE_QUEUES_REMOVE(m);
3412
3413         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3414 }
3415
3416
3417 void
3418 vm_page_reactivate_all_throttled(void)
3419 {
3420         vm_page_t       first_throttled, last_throttled;
3421         vm_page_t       first_active;
3422         vm_page_t       m;
3423         int             extra_active_count;
3424         int             extra_internal_count, extra_external_count;
3425
3426         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3427                 return;
3428
3429         extra_active_count = 0;
3430         extra_internal_count = 0;
3431         extra_external_count = 0;
3432         vm_page_lock_queues();
3433         if (! queue_empty(&vm_page_queue_throttled)) {
3434                 /*
3435                  * Switch "throttled" pages to "active".
3436                  */
3437                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3438                         VM_PAGE_CHECK(m);
3439                         assert(m->throttled);
3440                         assert(!m->active);
3441                         assert(!m->inactive);
3442                         assert(!m->speculative);
3443                         assert(!VM_PAGE_WIRED(m));
3444
3445                         extra_active_count++;
3446                         if (m->object->internal) {
3447                                 extra_internal_count++;
3448                         } else {
3449                                 extra_external_count++;
3450                         }
3451
3452                         m->throttled = FALSE;
3453                         m->active = TRUE;
3454                         VM_PAGE_CHECK(m);
3455                 }
3456
3457                 /*
3458                  * Transfer the entire throttled queue to a regular LRU page queues.
3459                  * We insert it at the head of the active queue, so that these pages
3460                  * get re-evaluated by the LRU algorithm first, since they've been
3461                  * completely out of it until now.
3462                  */
3463                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3464                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3465                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3466                 if (queue_empty(&vm_page_queue_active)) {
3467                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3468                 } else {
3469                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3470                 }
3471                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3472                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3473                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3474
3475 #if DEBUG
3476                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3477 #endif
3478                 queue_init(&vm_page_queue_throttled);
3479                 /*
3480                  * Adjust the global page counts.
3481                  */
3482                 vm_page_active_count += extra_active_count;
3483                 vm_page_pageable_internal_count += extra_internal_count;
3484                 vm_page_pageable_external_count += extra_external_count;
3485                 vm_page_throttled_count = 0;
3486         }
3487         assert(vm_page_throttled_count == 0);
3488         assert(queue_empty(&vm_page_queue_throttled));
3489         vm_page_unlock_queues();
3490 }
3491
3492
3493 /*
3494  * move pages from the indicated local queue to the global active queue
3495  * its ok to fail if we're below the hard limit and force == FALSE
3496  * the nolocks == TRUE case is to allow this function to be run on
3497  * the hibernate path
3498  */
3499
3500 void
3501 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3502 {
3503         struct vpl      *lq;
3504         vm_page_t       first_local, last_local;
3505         vm_page_t       first_active;
3506         vm_page_t       m;
3507         uint32_t        count = 0;
3508
3509         if (vm_page_local_q == NULL)
3510                 return;
3511
3512         lq = &vm_page_local_q[lid].vpl_un.vpl;
3513
3514         if (nolocks == FALSE) {
3515                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3516                         if ( !vm_page_trylockspin_queues())
3517                                 return;
3518                 } else
3519                         vm_page_lockspin_queues();
3520
3521                 VPL_LOCK(&lq->vpl_lock);
3522         }
3523         if (lq->vpl_count) {
3524                 /*
3525                  * Switch "local" pages to "active".
3526                  */
3527                 assert(!queue_empty(&lq->vpl_queue));
3528
3529                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3530                         VM_PAGE_CHECK(m);
3531                         assert(m->local);
3532                         assert(!m->active);
3533                         assert(!m->inactive);
3534                         assert(!m->speculative);
3535                         assert(!VM_PAGE_WIRED(m));
3536                         assert(!m->throttled);
3537                         assert(!m->fictitious);
3538
3539                         if (m->local_id != lid)
3540                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3541
3542                         m->local_id = 0;
3543                         m->local = FALSE;
3544                         m->active = TRUE;
3545                         VM_PAGE_CHECK(m);
3546
3547                         count++;
3548                 }
3549                 if (count != lq->vpl_count)
3550                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3551
3552                 /*
3553                  * Transfer the entire local queue to a regular LRU page queues.
3554                  */
3555                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3556                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3557                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3558
3559                 if (queue_empty(&vm_page_queue_active)) {
3560                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3561                 } else {
3562                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3563                 }
3564                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3565                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3566                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3567
3568                 queue_init(&lq->vpl_queue);
3569                 /*
3570                  * Adjust the global page counts.
3571                  */
3572                 vm_page_active_count += lq->vpl_count;
3573                 vm_page_pageable_internal_count += lq->vpl_internal_count;
3574                 vm_page_pageable_external_count += lq->vpl_external_count;
3575                 lq->vpl_count = 0;
3576                 lq->vpl_internal_count = 0;
3577                 lq->vpl_external_count = 0;
3578         }
3579         assert(queue_empty(&lq->vpl_queue));
3580
3581         if (nolocks == FALSE) {
3582                 VPL_UNLOCK(&lq->vpl_lock);
3583                 vm_page_unlock_queues();
3584         }
3585 }
3586
3587 /*
3588  *      vm_page_part_zero_fill:
3589  *
3590  *      Zero-fill a part of the page.
3591  */
3592 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3593 void
3594 vm_page_part_zero_fill(
3595         vm_page_t       m,
3596         vm_offset_t     m_pa,
3597         vm_size_t       len)
3598 {
3599
3600 #if 0
3601         /*
3602          * we don't hold the page queue lock
3603          * so this check isn't safe to make
3604          */
3605         VM_PAGE_CHECK(m);
3606 #endif
3607
3608 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3609         pmap_zero_part_page(m->phys_page, m_pa, len);
3610 #else
3611         vm_page_t       tmp;
3612         while (1) {
3613                 tmp = vm_page_grab();
3614                 if (tmp == VM_PAGE_NULL) {
3615                         vm_page_wait(THREAD_UNINT);
3616                         continue;
3617                 }
3618                 break;
3619         }
3620         vm_page_zero_fill(tmp);
3621         if(m_pa != 0) {
3622                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3623         }
3624         if((m_pa + len) <  PAGE_SIZE) {
3625                 vm_page_part_copy(m, m_pa + len, tmp,
3626                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3627         }
3628         vm_page_copy(tmp,m);
3629         VM_PAGE_FREE(tmp);
3630 #endif
3631
3632 }
3633
3634 /*
3635  *      vm_page_zero_fill:
3636  *
3637  *      Zero-fill the specified page.
3638  */
3639 void
3640 vm_page_zero_fill(
3641         vm_page_t       m)
3642 {
3643         XPR(XPR_VM_PAGE,
3644                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3645                 m->object, m->offset, m, 0,0);
3646 #if 0
3647         /*
3648          * we don't hold the page queue lock
3649          * so this check isn't safe to make
3650          */
3651         VM_PAGE_CHECK(m);
3652 #endif
3653
3654 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3655         pmap_zero_page(m->phys_page);
3656 }
3657
3658 /*
3659  *      vm_page_part_copy:
3660  *
3661  *      copy part of one page to another
3662  */
3663
3664 void
3665 vm_page_part_copy(
3666         vm_page_t       src_m,
3667         vm_offset_t     src_pa,
3668         vm_page_t       dst_m,
3669         vm_offset_t     dst_pa,
3670         vm_size_t       len)
3671 {
3672 #if 0
3673         /*
3674          * we don't hold the page queue lock
3675          * so this check isn't safe to make
3676          */
3677         VM_PAGE_CHECK(src_m);
3678         VM_PAGE_CHECK(dst_m);
3679 #endif
3680         pmap_copy_part_page(src_m->phys_page, src_pa,
3681                         dst_m->phys_page, dst_pa, len);
3682 }
3683
3684 /*
3685  *      vm_page_copy:
3686  *
3687  *      Copy one page to another
3688  *
3689  * ENCRYPTED SWAP:
3690  * The source page should not be encrypted.  The caller should
3691  * make sure the page is decrypted first, if necessary.
3692  */
3693
3694 int vm_page_copy_cs_validations = 0;
3695 int vm_page_copy_cs_tainted = 0;
3696
3697 void
3698 vm_page_copy(
3699         vm_page_t       src_m,
3700         vm_page_t       dest_m)
3701 {
3702         XPR(XPR_VM_PAGE,
3703         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3704         src_m->object, src_m->offset,
3705         dest_m->object, dest_m->offset,
3706         0);
3707 #if 0
3708         /*
3709          * we don't hold the page queue lock
3710          * so this check isn't safe to make
3711          */
3712         VM_PAGE_CHECK(src_m);
3713         VM_PAGE_CHECK(dest_m);
3714 #endif
3715         vm_object_lock_assert_held(src_m->object);
3716
3717         /*
3718          * ENCRYPTED SWAP:
3719          * The source page should not be encrypted at this point.
3720          * The destination page will therefore not contain encrypted
3721          * data after the copy.
3722          */
3723         if (src_m->encrypted) {
3724                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3725         }
3726         dest_m->encrypted = FALSE;
3727
3728         if (src_m->object != VM_OBJECT_NULL &&
3729             src_m->object->code_signed) {
3730                 /*
3731                  * We're copying a page from a code-signed object.
3732                  * Whoever ends up mapping the copy page might care about
3733                  * the original page's integrity, so let's validate the
3734                  * source page now.
3735                  */
3736                 vm_page_copy_cs_validations++;
3737                 vm_page_validate_cs(src_m);
3738         }
3739
3740         if (vm_page_is_slideable(src_m)) {
3741                 boolean_t was_busy = src_m->busy;
3742                 src_m->busy = TRUE;
3743                 (void) vm_page_slide(src_m, 0);
3744                 assert(src_m->busy);
3745                 if (!was_busy) {
3746                         PAGE_WAKEUP_DONE(src_m);
3747                 }
3748         }
3749
3750         /*
3751          * Propagate the cs_tainted bit to the copy page. Do not propagate
3752          * the cs_validated bit.
3753          */
3754         dest_m->cs_tainted = src_m->cs_tainted;
3755         if (dest_m->cs_tainted) {
3756                 vm_page_copy_cs_tainted++;
3757         }
3758         dest_m->slid = src_m->slid;
3759         dest_m->error = src_m->error; /* sliding src_m might have failed... */
3760         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3761 }
3762
3763 #if MACH_ASSERT
3764 static void
3765 _vm_page_print(
3766         vm_page_t       p)
3767 {
3768         printf("vm_page %p: \n", p);
3769         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3770         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3771         printf("  next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3772         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3773         printf("  wire_count=%u\n", p->wire_count);
3774
3775         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3776                (p->local ? "" : "!"),
3777                (p->inactive ? "" : "!"),
3778                (p->active ? "" : "!"),
3779                (p->pageout_queue ? "" : "!"),
3780                (p->speculative ? "" : "!"),
3781                (p->laundry ? "" : "!"));
3782         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3783                (p->free ? "" : "!"),
3784                (p->reference ? "" : "!"),
3785                (p->gobbled ? "" : "!"),
3786                (p->private ? "" : "!"),
3787                (p->throttled ? "" : "!"));
3788         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3789                 (p->busy ? "" : "!"),
3790                 (p->wanted ? "" : "!"),
3791                 (p->tabled ? "" : "!"),
3792                 (p->fictitious ? "" : "!"),
3793                 (p->pmapped ? "" : "!"),
3794                 (p->wpmapped ? "" : "!"));
3795         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3796                (p->pageout ? "" : "!"),
3797                (p->absent ? "" : "!"),
3798                (p->error ? "" : "!"),
3799                (p->dirty ? "" : "!"),
3800                (p->cleaning ? "" : "!"),
3801                (p->precious ? "" : "!"),
3802                (p->clustered ? "" : "!"));
3803         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3804                (p->overwriting ? "" : "!"),
3805                (p->restart ? "" : "!"),
3806                (p->unusual ? "" : "!"),
3807                (p->encrypted ? "" : "!"),
3808                (p->encrypted_cleaning ? "" : "!"));
3809         printf("  %scs_validated, %scs_tainted, %sno_cache\n",
3810                (p->cs_validated ? "" : "!"),
3811                (p->cs_tainted ? "" : "!"),
3812                (p->no_cache ? "" : "!"));
3813
3814         printf("phys_page=0x%x\n", p->phys_page);
3815 }
3816
3817 /*
3818  *      Check that the list of pages is ordered by
3819  *      ascending physical address and has no holes.
3820  */
3821 static int
3822 vm_page_verify_contiguous(
3823         vm_page_t       pages,
3824         unsigned int    npages)
3825 {
3826         register vm_page_t      m;
3827         unsigned int            page_count;
3828         vm_offset_t             prev_addr;
3829
3830         prev_addr = pages->phys_page;
3831         page_count = 1;
3832         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3833                 if (m->phys_page != prev_addr + 1) {
3834                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3835                                m, (long)prev_addr, m->phys_page);
3836                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3837                         panic("vm_page_verify_contiguous:  not contiguous!");
3838                 }
3839                 prev_addr = m->phys_page;
3840                 ++page_count;
3841         }
3842         if (page_count != npages) {
3843                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3844                        pages, page_count, npages);
3845                 panic("vm_page_verify_contiguous:  count error");
3846         }
3847         return 1;
3848 }
3849
3850
3851 /*
3852  *      Check the free lists for proper length etc.
3853  */
3854 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
3855 static unsigned int
3856 vm_page_verify_free_list(
3857         queue_head_t    *vm_page_queue,
3858         unsigned int    color,
3859         vm_page_t       look_for_page,
3860         boolean_t       expect_page)
3861 {
3862         unsigned int    npages;
3863         vm_page_t       m;
3864         vm_page_t       prev_m;
3865         boolean_t       found_page;
3866
3867         if (! vm_page_verify_this_free_list_enabled)
3868                 return 0;
3869
3870         found_page = FALSE;
3871         npages = 0;
3872         prev_m = (vm_page_t) vm_page_queue;
3873         queue_iterate(vm_page_queue,
3874                       m,
3875                       vm_page_t,
3876                       pageq) {
3877
3878                 if (m == look_for_page) {
3879                         found_page = TRUE;
3880                 }
3881                 if ((vm_page_t) m->pageq.prev != prev_m)
3882                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3883                               color, npages, m, m->pageq.prev, prev_m);
3884                 if ( ! m->busy )
3885                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3886                               color, npages, m);
3887                 if (color != (unsigned int) -1) {
3888                         if ((m->phys_page & vm_color_mask) != color)
3889                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3890                                       color, npages, m, m->phys_page & vm_color_mask, color);
3891                         if ( ! m->free )
3892                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3893                                       color, npages, m);
3894                 }
3895                 ++npages;
3896                 prev_m = m;
3897         }
3898         if (look_for_page != VM_PAGE_NULL) {
3899                 unsigned int other_color;
3900
3901                 if (expect_page && !found_page) {
3902                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3903                                color, npages, look_for_page, look_for_page->phys_page);
3904                         _vm_page_print(look_for_page);
3905                         for (other_color = 0;
3906                              other_color < vm_colors;
3907                              other_color++) {
3908                                 if (other_color == color)
3909                                         continue;
3910                                 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3911                                                          other_color, look_for_page, FALSE);
3912                         }
3913                         if (color == (unsigned int) -1) {
3914                                 vm_page_verify_free_list(&vm_lopage_queue_free,
3915                                                          (unsigned int) -1, look_for_page, FALSE);
3916                         }
3917                         panic("vm_page_verify_free_list(color=%u)\n", color);
3918                 }
3919                 if (!expect_page && found_page) {
3920                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3921                                color, npages, look_for_page, look_for_page->phys_page);
3922                 }
3923         }
3924         return npages;
3925 }
3926
3927 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
3928 static void
3929 vm_page_verify_free_lists( void )
3930 {
3931         unsigned int    color, npages, nlopages;
3932         boolean_t       toggle = TRUE;
3933
3934         if (! vm_page_verify_all_free_lists_enabled)
3935                 return;
3936
3937         npages = 0;
3938
3939         lck_mtx_lock(&vm_page_queue_free_lock);
3940
3941         if (vm_page_verify_this_free_list_enabled == TRUE) {
3942                 /*
3943                  * This variable has been set globally for extra checking of
3944                  * each free list Q. Since we didn't set it, we don't own it
3945                  * and we shouldn't toggle it.
3946                  */
3947                 toggle = FALSE;
3948         }
3949
3950         if (toggle == TRUE) {
3951                 vm_page_verify_this_free_list_enabled = TRUE;
3952         }
3953
3954         for( color = 0; color < vm_colors; color++ ) {
3955                 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3956                                                    color, VM_PAGE_NULL, FALSE);
3957         }
3958         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3959                                             (unsigned int) -1,
3960                                             VM_PAGE_NULL, FALSE);
3961         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3962                 panic("vm_page_verify_free_lists:  "
3963                       "npages %u free_count %d nlopages %u lo_free_count %u",
3964                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3965
3966         if (toggle == TRUE) {
3967                 vm_page_verify_this_free_list_enabled = FALSE;
3968         }
3969
3970         lck_mtx_unlock(&vm_page_queue_free_lock);
3971 }
3972
3973 void
3974 vm_page_queues_assert(
3975         vm_page_t       mem,
3976         int             val)
3977 {
3978 #if DEBUG
3979         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3980 #endif
3981         if (mem->free + mem->active + mem->inactive + mem->speculative +
3982             mem->throttled + mem->pageout_queue > (val)) {
3983                 _vm_page_print(mem);
3984                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3985         }
3986         if (VM_PAGE_WIRED(mem)) {
3987                 assert(!mem->active);
3988                 assert(!mem->inactive);
3989                 assert(!mem->speculative);
3990                 assert(!mem->throttled);
3991                 assert(!mem->pageout_queue);
3992         }
3993 }
3994 #endif  /* MACH_ASSERT */
3995
3996
3997 /*
3998  *      CONTIGUOUS PAGE ALLOCATION
3999  *
4000  *      Find a region large enough to contain at least n pages
4001  *      of contiguous physical memory.
4002  *
4003  *      This is done by traversing the vm_page_t array in a linear fashion
4004  *      we assume that the vm_page_t array has the avaiable physical pages in an
4005  *      ordered, ascending list... this is currently true of all our implementations
4006  *      and must remain so... there can be 'holes' in the array...  we also can
4007  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4008  *      which use to happen via 'vm_page_convert'... that function was no longer
4009  *      being called and was removed...
4010  *
4011  *      The basic flow consists of stabilizing some of the interesting state of
4012  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4013  *      sweep at the beginning of the array looking for pages that meet our criterea
4014  *      for a 'stealable' page... currently we are pretty conservative... if the page
4015  *      meets this criterea and is physically contiguous to the previous page in the 'run'
4016  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
4017  *      and start to develop a new run... if at this point we've already considered
4018  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4019  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
4020  *      to other threads trying to acquire free pages (or move pages from q to q),
4021  *      and then continue from the spot we left off... we only make 1 pass through the
4022  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
4023  *      which steals the pages from the queues they're currently on... pages on the free
4024  *      queue can be stolen directly... pages that are on any of the other queues
4025  *      must be removed from the object they are tabled on... this requires taking the
4026  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4027  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
4028  *      dump the pages we've currently stolen back to the free list, and pick up our
4029  *      scan from the point where we aborted the 'current' run.
4030  *
4031  *
4032  *      Requirements:
4033  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
4034  *
4035  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4036  *
4037  * Algorithm:
4038  */
4039
4040 #define MAX_CONSIDERED_BEFORE_YIELD     1000
4041
4042
4043 #define RESET_STATE_OF_RUN()    \
4044         MACRO_BEGIN             \
4045         prevcontaddr = -2;      \
4046         start_pnum = -1;        \
4047         free_considered = 0;    \
4048         substitute_needed = 0;  \
4049         npages = 0;             \
4050         MACRO_END
4051
4052 /*
4053  * Can we steal in-use (i.e. not free) pages when searching for
4054  * physically-contiguous pages ?
4055  */
4056 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4057
4058 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
4059 #if DEBUG
4060 int vm_page_find_contig_debug = 0;
4061 #endif
4062
4063 static vm_page_t
4064 vm_page_find_contiguous(
4065         unsigned int    contig_pages,
4066         ppnum_t         max_pnum,
4067         ppnum_t     pnum_mask,
4068         boolean_t       wire,
4069         int             flags)
4070 {
4071         vm_page_t       m = NULL;
4072         ppnum_t         prevcontaddr;
4073         ppnum_t         start_pnum;
4074         unsigned int    npages, considered, scanned;
4075         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
4076         unsigned int    idx_last_contig_page_found = 0;
4077         int             free_considered, free_available;
4078         int             substitute_needed;
4079         boolean_t       wrapped;
4080 #if DEBUG
4081         clock_sec_t     tv_start_sec, tv_end_sec;
4082         clock_usec_t    tv_start_usec, tv_end_usec;
4083 #endif
4084 #if MACH_ASSERT
4085         int             yielded = 0;
4086         int             dumped_run = 0;
4087         int             stolen_pages = 0;
4088         int             compressed_pages = 0;
4089 #endif
4090
4091         if (contig_pages == 0)
4092                 return VM_PAGE_NULL;
4093
4094 #if MACH_ASSERT
4095         vm_page_verify_free_lists();
4096 #endif
4097 #if DEBUG
4098         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4099 #endif
4100         PAGE_REPLACEMENT_ALLOWED(TRUE);
4101
4102         vm_page_lock_queues();
4103         lck_mtx_lock(&vm_page_queue_free_lock);
4104
4105         RESET_STATE_OF_RUN();
4106
4107         scanned = 0;
4108         considered = 0;
4109         free_available = vm_page_free_count - vm_page_free_reserved;
4110
4111         wrapped = FALSE;
4112
4113         if(flags & KMA_LOMEM)
4114                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4115         else
4116                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
4117
4118         orig_last_idx = idx_last_contig_page_found;
4119         last_idx = orig_last_idx;
4120
4121         for (page_idx = last_idx, start_idx = last_idx;
4122              npages < contig_pages && page_idx < vm_pages_count;
4123              page_idx++) {
4124 retry:
4125                 if (wrapped &&
4126                     npages == 0 &&
4127                     page_idx >= orig_last_idx) {
4128                         /*
4129                          * We're back where we started and we haven't
4130                          * found any suitable contiguous range.  Let's
4131                          * give up.
4132                          */
4133                         break;
4134                 }
4135                 scanned++;
4136                 m = &vm_pages[page_idx];
4137
4138                 assert(!m->fictitious);
4139                 assert(!m->private);
4140
4141                 if (max_pnum && m->phys_page > max_pnum) {
4142                         /* no more low pages... */
4143                         break;
4144                 }
4145                 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4146                         /*
4147                          * not aligned
4148                          */
4149                         RESET_STATE_OF_RUN();
4150
4151                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4152                            m->encrypted_cleaning ||
4153                            m->pageout_queue || m->laundry || m->wanted ||
4154                            m->cleaning || m->overwriting || m->pageout) {
4155                         /*
4156                          * page is in a transient state
4157                          * or a state we don't want to deal
4158                          * with, so don't consider it which
4159                          * means starting a new run
4160                          */
4161                         RESET_STATE_OF_RUN();
4162
4163                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4164                         /*
4165                          * page needs to be on one of our queues
4166                          * or it needs to belong to the compressor pool
4167                          * in order for it to be stable behind the
4168                          * locks we hold at this point...
4169                          * if not, don't consider it which
4170                          * means starting a new run
4171                          */
4172                         RESET_STATE_OF_RUN();
4173
4174                 } else if (!m->free && (!m->tabled || m->busy)) {
4175                         /*
4176                          * pages on the free list are always 'busy'
4177                          * so we couldn't test for 'busy' in the check
4178                          * for the transient states... pages that are
4179                          * 'free' are never 'tabled', so we also couldn't
4180                          * test for 'tabled'.  So we check here to make
4181                          * sure that a non-free page is not busy and is
4182                          * tabled on an object...
4183                          * if not, don't consider it which
4184                          * means starting a new run
4185                          */
4186                         RESET_STATE_OF_RUN();
4187
4188                 } else {
4189                         if (m->phys_page != prevcontaddr + 1) {
4190                                 if ((m->phys_page & pnum_mask) != 0) {
4191                                         RESET_STATE_OF_RUN();
4192                                         goto did_consider;
4193                                 } else {
4194                                         npages = 1;
4195                                         start_idx = page_idx;
4196                                         start_pnum = m->phys_page;
4197                                 }
4198                         } else {
4199                                 npages++;
4200                         }
4201                         prevcontaddr = m->phys_page;
4202
4203                         VM_PAGE_CHECK(m);
4204                         if (m->free) {
4205                                 free_considered++;
4206                         } else {
4207                                 /*
4208                                  * This page is not free.
4209                                  * If we can't steal used pages,
4210                                  * we have to give up this run
4211                                  * and keep looking.
4212                                  * Otherwise, we might need to
4213                                  * move the contents of this page
4214                                  * into a substitute page.
4215                                  */
4216 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4217                                 if (m->pmapped || m->dirty || m->precious) {
4218                                         substitute_needed++;
4219                                 }
4220 #else
4221                                 RESET_STATE_OF_RUN();
4222 #endif
4223                         }
4224
4225                         if ((free_considered + substitute_needed) > free_available) {
4226                                 /*
4227                                  * if we let this run continue
4228                                  * we will end up dropping the vm_page_free_count
4229                                  * below the reserve limit... we need to abort
4230                                  * this run, but we can at least re-consider this
4231                                  * page... thus the jump back to 'retry'
4232                                  */
4233                                 RESET_STATE_OF_RUN();
4234
4235                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4236                                         considered++;
4237                                         goto retry;
4238                                 }
4239                                 /*
4240                                  * free_available == 0
4241                                  * so can't consider any free pages... if
4242                                  * we went to retry in this case, we'd
4243                                  * get stuck looking at the same page
4244                                  * w/o making any forward progress
4245                                  * we also want to take this path if we've already
4246                                  * reached our limit that controls the lock latency
4247                                  */
4248                         }
4249                 }
4250 did_consider:
4251                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4252
4253                         PAGE_REPLACEMENT_ALLOWED(FALSE);
4254
4255                         lck_mtx_unlock(&vm_page_queue_free_lock);
4256                         vm_page_unlock_queues();
4257
4258                         mutex_pause(0);
4259
4260                         PAGE_REPLACEMENT_ALLOWED(TRUE);
4261
4262                         vm_page_lock_queues();
4263                         lck_mtx_lock(&vm_page_queue_free_lock);
4264
4265                         RESET_STATE_OF_RUN();
4266                         /*
4267                          * reset our free page limit since we
4268                          * dropped the lock protecting the vm_page_free_queue
4269                          */
4270                         free_available = vm_page_free_count - vm_page_free_reserved;
4271                         considered = 0;
4272 #if MACH_ASSERT
4273                         yielded++;
4274 #endif
4275                         goto retry;
4276                 }
4277                 considered++;
4278         }
4279         m = VM_PAGE_NULL;
4280
4281         if (npages != contig_pages) {
4282                 if (!wrapped) {
4283                         /*
4284                          * We didn't find a contiguous range but we didn't
4285                          * start from the very first page.
4286                          * Start again from the very first page.
4287                          */
4288                         RESET_STATE_OF_RUN();
4289                         if( flags & KMA_LOMEM)
4290                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
4291                         else
4292                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4293                         last_idx = 0;
4294                         page_idx = last_idx;
4295                         wrapped = TRUE;
4296                         goto retry;
4297                 }
4298                 lck_mtx_unlock(&vm_page_queue_free_lock);
4299         } else {
4300                 vm_page_t       m1;
4301                 vm_page_t       m2;
4302                 unsigned int    cur_idx;
4303                 unsigned int    tmp_start_idx;
4304                 vm_object_t     locked_object = VM_OBJECT_NULL;
4305                 boolean_t       abort_run = FALSE;
4306
4307                 assert(page_idx - start_idx == contig_pages);
4308
4309                 tmp_start_idx = start_idx;
4310
4311                 /*
4312                  * first pass through to pull the free pages
4313                  * off of the free queue so that in case we
4314                  * need substitute pages, we won't grab any
4315                  * of the free pages in the run... we'll clear
4316                  * the 'free' bit in the 2nd pass, and even in
4317                  * an abort_run case, we'll collect all of the
4318                  * free pages in this run and return them to the free list
4319                  */
4320                 while (start_idx < page_idx) {
4321
4322                         m1 = &vm_pages[start_idx++];
4323
4324 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4325                         assert(m1->free);
4326 #endif
4327
4328                         if (m1->free) {
4329                                 unsigned int color;
4330
4331                                 color = m1->phys_page & vm_color_mask;
4332 #if MACH_ASSERT
4333                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4334 #endif
4335                                 queue_remove(&vm_page_queue_free[color],
4336                                              m1,
4337                                              vm_page_t,
4338                                              pageq);
4339                                 m1->pageq.next = NULL;
4340                                 m1->pageq.prev = NULL;
4341 #if MACH_ASSERT
4342                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4343 #endif
4344                                 /*
4345                                  * Clear the "free" bit so that this page
4346                                  * does not get considered for another
4347                                  * concurrent physically-contiguous allocation.
4348                                  */
4349                                 m1->free = FALSE;
4350                                 assert(m1->busy);
4351
4352                                 vm_page_free_count--;
4353                         }
4354                 }
4355                 if( flags & KMA_LOMEM)
4356                         vm_page_lomem_find_contiguous_last_idx = page_idx;
4357                 else
4358                         vm_page_find_contiguous_last_idx = page_idx;
4359
4360                 /*
4361                  * we can drop the free queue lock at this point since
4362                  * we've pulled any 'free' candidates off of the list
4363                  * we need it dropped so that we can do a vm_page_grab
4364                  * when substituing for pmapped/dirty pages
4365                  */
4366                 lck_mtx_unlock(&vm_page_queue_free_lock);
4367
4368                 start_idx = tmp_start_idx;
4369                 cur_idx = page_idx - 1;
4370
4371                 while (start_idx++ < page_idx) {
4372                         /*
4373                          * must go through the list from back to front
4374                          * so that the page list is created in the
4375                          * correct order - low -> high phys addresses
4376                          */
4377                         m1 = &vm_pages[cur_idx--];
4378
4379                         assert(!m1->free);
4380
4381                         if (m1->object == VM_OBJECT_NULL) {
4382                                 /*
4383                                  * page has already been removed from
4384                                  * the free list in the 1st pass
4385                                  */
4386                                 assert(m1->offset == (vm_object_offset_t) -1);
4387                                 assert(m1->busy);
4388                                 assert(!m1->wanted);
4389                                 assert(!m1->laundry);
4390                         } else {
4391                                 vm_object_t object;
4392                                 int refmod;
4393                                 boolean_t disconnected, reusable;
4394
4395                                 if (abort_run == TRUE)
4396                                         continue;
4397
4398                                 object = m1->object;
4399
4400                                 if (object != locked_object) {
4401                                         if (locked_object) {
4402                                                 vm_object_unlock(locked_object);
4403                                                 locked_object = VM_OBJECT_NULL;
4404                                         }
4405                                         if (vm_object_lock_try(object))
4406                                                 locked_object = object;
4407                                 }
4408                                 if (locked_object == VM_OBJECT_NULL ||
4409                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
4410                                      m1->encrypted_cleaning ||
4411                                      m1->pageout_queue || m1->laundry || m1->wanted ||
4412                                      m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4413
4414                                         if (locked_object) {
4415                                                 vm_object_unlock(locked_object);
4416                                                 locked_object = VM_OBJECT_NULL;
4417                                         }
4418                                         tmp_start_idx = cur_idx;
4419                                         abort_run = TRUE;
4420                                         continue;
4421                                 }
4422
4423                                 disconnected = FALSE;
4424                                 reusable = FALSE;
4425
4426                                 if ((m1->reusable ||
4427                                      m1->object->all_reusable) &&
4428                                     m1->inactive &&
4429                                     !m1->dirty &&
4430                                     !m1->reference) {
4431                                         /* reusable page... */
4432                                         refmod = pmap_disconnect(m1->phys_page);
4433                                         disconnected = TRUE;
4434                                         if (refmod == 0) {
4435                                                 /*
4436                                                  * ... not reused: can steal
4437                                                  * without relocating contents.
4438                                                  */
4439                                                 reusable = TRUE;
4440                                         }
4441                                 }
4442
4443                                 if ((m1->pmapped &&
4444                                      ! reusable) ||
4445                                     m1->dirty ||
4446                                     m1->precious) {
4447                                         vm_object_offset_t offset;
4448
4449                                         m2 = vm_page_grab();
4450
4451                                         if (m2 == VM_PAGE_NULL) {
4452                                                 if (locked_object) {
4453                                                         vm_object_unlock(locked_object);
4454                                                         locked_object = VM_OBJECT_NULL;
4455                                                 }
4456                                                 tmp_start_idx = cur_idx;
4457                                                 abort_run = TRUE;
4458                                                 continue;
4459                                         }
4460                                         if (! disconnected) {
4461                                                 if (m1->pmapped)
4462                                                         refmod = pmap_disconnect(m1->phys_page);
4463                                                 else
4464                                                         refmod = 0;
4465                                         }
4466
4467                                         /* copy the page's contents */
4468                                         pmap_copy_page(m1->phys_page, m2->phys_page);
4469                                         /* copy the page's state */
4470                                         assert(!VM_PAGE_WIRED(m1));
4471                                         assert(!m1->free);
4472                                         assert(!m1->pageout_queue);
4473                                         assert(!m1->laundry);
4474                                         m2->reference   = m1->reference;
4475                                         assert(!m1->gobbled);
4476                                         assert(!m1->private);
4477                                         m2->no_cache    = m1->no_cache;
4478                                         m2->xpmapped    = 0;
4479                                         assert(!m1->busy);
4480                                         assert(!m1->wanted);
4481                                         assert(!m1->fictitious);
4482                                         m2->pmapped     = m1->pmapped; /* should flush cache ? */
4483                                         m2->wpmapped    = m1->wpmapped;
4484                                         assert(!m1->pageout);
4485                                         m2->absent      = m1->absent;
4486                                         m2->error       = m1->error;
4487                                         m2->dirty       = m1->dirty;
4488                                         assert(!m1->cleaning);
4489                                         m2->precious    = m1->precious;
4490                                         m2->clustered   = m1->clustered;
4491                                         assert(!m1->overwriting);
4492                                         m2->restart     = m1->restart;
4493                                         m2->unusual     = m1->unusual;
4494                                         m2->encrypted   = m1->encrypted;
4495                                         assert(!m1->encrypted_cleaning);
4496                                         m2->cs_validated = m1->cs_validated;
4497                                         m2->cs_tainted  = m1->cs_tainted;
4498
4499                                         /*
4500                                          * If m1 had really been reusable,
4501                                          * we would have just stolen it, so
4502                                          * let's not propagate it's "reusable"
4503                                          * bit and assert that m2 is not
4504                                          * marked as "reusable".
4505                                          */
4506                                         // m2->reusable = m1->reusable;
4507                                         assert(!m2->reusable);
4508
4509                                         assert(!m1->lopage);
4510                                         m2->slid        = m1->slid;
4511                                         m2->compressor  = m1->compressor;
4512
4513                                         /*
4514                                          * page may need to be flushed if
4515                                          * it is marshalled into a UPL
4516                                          * that is going to be used by a device
4517                                          * that doesn't support coherency
4518                                          */
4519                                         m2->written_by_kernel = TRUE;
4520
4521                                         /*
4522                                          * make sure we clear the ref/mod state
4523                                          * from the pmap layer... else we risk
4524                                          * inheriting state from the last time
4525                                          * this page was used...
4526                                          */
4527                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4528
4529                                         if (refmod & VM_MEM_REFERENCED)
4530                                                 m2->reference = TRUE;
4531                                         if (refmod & VM_MEM_MODIFIED) {
4532                                                 SET_PAGE_DIRTY(m2, TRUE);
4533                                         }
4534                                         offset = m1->offset;
4535
4536                                         /*
4537                                          * completely cleans up the state
4538                                          * of the page so that it is ready
4539                                          * to be put onto the free list, or
4540                                          * for this purpose it looks like it
4541                                          * just came off of the free list
4542                                          */
4543                                         vm_page_free_prepare(m1);
4544
4545                                         /*
4546                                          * now put the substitute page
4547                                          * on the object
4548                                          */
4549                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4550
4551                                         if (m2->compressor) {
4552                                                 m2->pmapped = TRUE;
4553                                                 m2->wpmapped = TRUE;
4554
4555                                                 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4556                                                            VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4557 #if MACH_ASSERT
4558                                                 compressed_pages++;
4559 #endif
4560                                         } else {
4561                                                 if (m2->reference)
4562                                                         vm_page_activate(m2);
4563                                                 else
4564                                                         vm_page_deactivate(m2);
4565                                         }
4566                                         PAGE_WAKEUP_DONE(m2);
4567
4568                                 } else {
4569                                         assert(!m1->compressor);
4570
4571                                         /*
4572                                          * completely cleans up the state
4573                                          * of the page so that it is ready
4574                                          * to be put onto the free list, or
4575                                          * for this purpose it looks like it
4576                                          * just came off of the free list
4577                                          */
4578                                         vm_page_free_prepare(m1);
4579                                 }
4580 #if MACH_ASSERT
4581                                 stolen_pages++;
4582 #endif
4583                         }
4584                         m1->pageq.next = (queue_entry_t) m;
4585                         m1->pageq.prev = NULL;
4586                         m = m1;
4587                 }
4588                 if (locked_object) {
4589                         vm_object_unlock(locked_object);
4590                         locked_object = VM_OBJECT_NULL;
4591                 }
4592
4593                 if (abort_run == TRUE) {
4594                         if (m != VM_PAGE_NULL) {
4595                                 vm_page_free_list(m, FALSE);
4596                         }
4597 #if MACH_ASSERT
4598                         dumped_run++;
4599 #endif
4600                         /*
4601                          * want the index of the last
4602                          * page in this run that was
4603                          * successfully 'stolen', so back
4604                          * it up 1 for the auto-decrement on use
4605                          * and 1 more to bump back over this page
4606                          */
4607                         page_idx = tmp_start_idx + 2;
4608                         if (page_idx >= vm_pages_count) {
4609                                 if (wrapped)
4610                                         goto done_scanning;
4611                                 page_idx = last_idx = 0;
4612                                 wrapped = TRUE;
4613                         }
4614                         abort_run = FALSE;
4615
4616                         /*
4617                          * We didn't find a contiguous range but we didn't
4618                          * start from the very first page.
4619                          * Start again from the very first page.
4620                          */
4621                         RESET_STATE_OF_RUN();
4622
4623                         if( flags & KMA_LOMEM)
4624                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4625                         else
4626                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4627
4628                         last_idx = page_idx;
4629
4630                         lck_mtx_lock(&vm_page_queue_free_lock);
4631                         /*
4632                         * reset our free page limit since we
4633                         * dropped the lock protecting the vm_page_free_queue
4634                         */
4635                         free_available = vm_page_free_count - vm_page_free_reserved;
4636                         goto retry;
4637                 }
4638
4639                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4640
4641                         if (wire == TRUE)
4642                                 m1->wire_count++;
4643                         else
4644                                 m1->gobbled = TRUE;
4645                 }
4646                 if (wire == FALSE)
4647                         vm_page_gobble_count += npages;
4648
4649                 /*
4650                  * gobbled pages are also counted as wired pages
4651                  */
4652                 vm_page_wire_count += npages;
4653
4654                 assert(vm_page_verify_contiguous(m, npages));
4655         }
4656 done_scanning:
4657         PAGE_REPLACEMENT_ALLOWED(FALSE);
4658
4659         vm_page_unlock_queues();
4660
4661 #if DEBUG
4662         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4663
4664         tv_end_sec -= tv_start_sec;
4665         if (tv_end_usec < tv_start_usec) {
4666                 tv_end_sec--;
4667                 tv_end_usec += 1000000;
4668         }
4669         tv_end_usec -= tv_start_usec;
4670         if (tv_end_usec >= 1000000) {
4671                 tv_end_sec++;
4672                 tv_end_sec -= 1000000;
4673         }
4674         if (vm_page_find_contig_debug) {
4675                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages\n",
4676                        __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4677                        (long)tv_end_sec, tv_end_usec, orig_last_idx,
4678                        scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4679         }
4680
4681 #endif
4682 #if MACH_ASSERT
4683         vm_page_verify_free_lists();
4684 #endif
4685         return m;
4686 }
4687
4688 /*
4689  *      Allocate a list of contiguous, wired pages.
4690  */
4691 kern_return_t
4692 cpm_allocate(
4693         vm_size_t       size,
4694         vm_page_t       *list,
4695         ppnum_t         max_pnum,
4696         ppnum_t         pnum_mask,
4697         boolean_t       wire,
4698         int             flags)
4699 {
4700         vm_page_t               pages;
4701         unsigned int            npages;
4702
4703         if (size % PAGE_SIZE != 0)
4704                 return KERN_INVALID_ARGUMENT;
4705
4706         npages = (unsigned int) (size / PAGE_SIZE);
4707         if (npages != size / PAGE_SIZE) {
4708                 /* 32-bit overflow */
4709                 return KERN_INVALID_ARGUMENT;
4710         }
4711
4712         /*
4713          *      Obtain a pointer to a subset of the free
4714          *      list large enough to satisfy the request;
4715          *      the region will be physically contiguous.
4716          */
4717         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4718
4719         if (pages == VM_PAGE_NULL)
4720                 return KERN_NO_SPACE;
4721         /*
4722          * determine need for wakeups
4723          */
4724         if ((vm_page_free_count < vm_page_free_min) ||
4725              ((vm_page_free_count < vm_page_free_target) &&
4726               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4727                  thread_wakeup((event_t) &vm_page_free_wanted);
4728
4729         VM_CHECK_MEMORYSTATUS;
4730
4731         /*
4732          *      The CPM pages should now be available and
4733          *      ordered by ascending physical address.
4734          */
4735         assert(vm_page_verify_contiguous(pages, npages));
4736
4737         *list = pages;
4738         return KERN_SUCCESS;
4739 }
4740
4741
4742 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4743
4744 /*
4745  * when working on a 'run' of pages, it is necessary to hold
4746  * the vm_page_queue_lock (a hot global lock) for certain operations
4747  * on the page... however, the majority of the work can be done
4748  * while merely holding the object lock... in fact there are certain
4749  * collections of pages that don't require any work brokered by the
4750  * vm_page_queue_lock... to mitigate the time spent behind the global
4751  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4752  * while doing all of the work that doesn't require the vm_page_queue_lock...
4753  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4754  * necessary work for each page... we will grab the busy bit on the page
4755  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4756  * if it can't immediately take the vm_page_queue_lock in order to compete
4757  * for the locks in the same order that vm_pageout_scan takes them.
4758  * the operation names are modeled after the names of the routines that
4759  * need to be called in order to make the changes very obvious in the
4760  * original loop
4761  */
4762
4763 void
4764 vm_page_do_delayed_work(
4765         vm_object_t     object,
4766         struct vm_page_delayed_work *dwp,
4767         int             dw_count)
4768 {
4769         int             j;
4770         vm_page_t       m;
4771         vm_page_t       local_free_q = VM_PAGE_NULL;
4772
4773         /*
4774          * pageout_scan takes the vm_page_lock_queues first
4775          * then tries for the object lock... to avoid what
4776          * is effectively a lock inversion, we'll go to the
4777          * trouble of taking them in that same order... otherwise
4778          * if this object contains the majority of the pages resident
4779          * in the UBC (or a small set of large objects actively being
4780          * worked on contain the majority of the pages), we could
4781          * cause the pageout_scan thread to 'starve' in its attempt
4782          * to find pages to move to the free queue, since it has to
4783          * successfully acquire the object lock of any candidate page
4784          * before it can steal/clean it.
4785          */
4786         if (!vm_page_trylockspin_queues()) {
4787                 vm_object_unlock(object);
4788
4789                 vm_page_lockspin_queues();
4790
4791                 for (j = 0; ; j++) {
4792                         if (!vm_object_lock_avoid(object) &&
4793                             _vm_object_lock_try(object))
4794                                 break;
4795                         vm_page_unlock_queues();
4796                         mutex_pause(j);
4797                         vm_page_lockspin_queues();
4798                 }
4799         }
4800         for (j = 0; j < dw_count; j++, dwp++) {
4801
4802                 m = dwp->dw_m;
4803
4804                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4805                         vm_pageout_throttle_up(m);
4806 #if CONFIG_PHANTOM_CACHE
4807                 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4808                         vm_phantom_cache_update(m);
4809 #endif
4810                 if (dwp->dw_mask & DW_vm_page_wire)
4811                         vm_page_wire(m);
4812                 else if (dwp->dw_mask & DW_vm_page_unwire) {
4813                         boolean_t       queueit;
4814
4815                         queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4816
4817                         vm_page_unwire(m, queueit);
4818                 }
4819                 if (dwp->dw_mask & DW_vm_page_free) {
4820                         vm_page_free_prepare_queues(m);
4821
4822                         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4823                         /*
4824                          * Add this page to our list of reclaimed pages,
4825                          * to be freed later.
4826                          */
4827                         m->pageq.next = (queue_entry_t) local_free_q;
4828                         local_free_q = m;
4829                 } else {
4830                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4831                                 vm_page_deactivate_internal(m, FALSE);
4832                         else if (dwp->dw_mask & DW_vm_page_activate) {
4833                                 if (m->active == FALSE) {
4834                                         vm_page_activate(m);
4835                                 }
4836                         }
4837                         else if (dwp->dw_mask & DW_vm_page_speculate)
4838                                 vm_page_speculate(m, TRUE);
4839                         else if (dwp->dw_mask & DW_enqueue_cleaned) {
4840                                 /*
4841                                  * if we didn't hold the object lock and did this,
4842                                  * we might disconnect the page, then someone might
4843                                  * soft fault it back in, then we would put it on the
4844                                  * cleaned queue, and so we would have a referenced (maybe even dirty)
4845                                  * page on that queue, which we don't want
4846                                  */
4847                                 int refmod_state = pmap_disconnect(m->phys_page);
4848
4849                                 if ((refmod_state & VM_MEM_REFERENCED)) {
4850                                         /*
4851                                          * this page has been touched since it got cleaned; let's activate it
4852                                          * if it hasn't already been
4853                                          */
4854                                         vm_pageout_enqueued_cleaned++;
4855                                         vm_pageout_cleaned_reactivated++;
4856                                         vm_pageout_cleaned_commit_reactivated++;
4857
4858                                         if (m->active == FALSE)
4859                                                 vm_page_activate(m);
4860                                 } else {
4861                                         m->reference = FALSE;
4862                                         vm_page_enqueue_cleaned(m);
4863                                 }
4864                         }
4865                         else if (dwp->dw_mask & DW_vm_page_lru)
4866                                 vm_page_lru(m);
4867                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4868                                 if ( !m->pageout_queue)
4869                                         VM_PAGE_QUEUES_REMOVE(m);
4870                         }
4871                         if (dwp->dw_mask & DW_set_reference)
4872                                 m->reference = TRUE;
4873                         else if (dwp->dw_mask & DW_clear_reference)
4874                                 m->reference = FALSE;
4875
4876                         if (dwp->dw_mask & DW_move_page) {
4877                                 if ( !m->pageout_queue) {
4878                                         VM_PAGE_QUEUES_REMOVE(m);
4879
4880                                         assert(m->object != kernel_object);
4881
4882                                         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4883                                 }
4884                         }
4885                         if (dwp->dw_mask & DW_clear_busy)
4886                                 m->busy = FALSE;
4887
4888                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
4889                                 PAGE_WAKEUP(m);
4890                 }
4891         }
4892         vm_page_unlock_queues();
4893
4894         if (local_free_q)
4895                 vm_page_free_list(local_free_q, TRUE);
4896
4897         VM_CHECK_MEMORYSTATUS;
4898
4899 }
4900
4901 kern_return_t
4902 vm_page_alloc_list(
4903         int     page_count,
4904         int     flags,
4905         vm_page_t *list)
4906 {
4907         vm_page_t       lo_page_list = VM_PAGE_NULL;
4908         vm_page_t       mem;
4909         int             i;
4910
4911         if ( !(flags & KMA_LOMEM))
4912                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4913
4914         for (i = 0; i < page_count; i++) {
4915
4916                 mem = vm_page_grablo();
4917
4918                 if (mem == VM_PAGE_NULL) {
4919                         if (lo_page_list)
4920                                 vm_page_free_list(lo_page_list, FALSE);
4921
4922                         *list = VM_PAGE_NULL;
4923
4924                         return (KERN_RESOURCE_SHORTAGE);
4925                 }
4926                 mem->pageq.next = (queue_entry_t) lo_page_list;
4927                 lo_page_list = mem;
4928         }
4929         *list = lo_page_list;
4930
4931         return (KERN_SUCCESS);
4932 }
4933
4934 void
4935 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4936 {
4937         page->offset = offset;
4938 }
4939
4940 vm_page_t
4941 vm_page_get_next(vm_page_t page)
4942 {
4943         return ((vm_page_t) page->pageq.next);
4944 }
4945
4946 vm_object_offset_t
4947 vm_page_get_offset(vm_page_t page)
4948 {
4949         return (page->offset);
4950 }
4951
4952 ppnum_t
4953 vm_page_get_phys_page(vm_page_t page)
4954 {
4955         return (page->phys_page);
4956 }
4957
4958
4959 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4960
4961 #if HIBERNATION
4962
4963 static vm_page_t hibernate_gobble_queue;
4964
4965 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4966
4967 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4968 static int  hibernate_flush_dirty_pages(int);
4969 static int  hibernate_flush_queue(queue_head_t *, int);
4970
4971 void hibernate_flush_wait(void);
4972 void hibernate_mark_in_progress(void);
4973 void hibernate_clear_in_progress(void);
4974
4975 void            hibernate_free_range(int, int);
4976 void            hibernate_hash_insert_page(vm_page_t);
4977 uint32_t        hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4978 void            hibernate_rebuild_vm_structs(void);
4979 uint32_t        hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4980 ppnum_t         hibernate_lookup_paddr(unsigned int);
4981
4982 struct hibernate_statistics {
4983         int hibernate_considered;
4984         int hibernate_reentered_on_q;
4985         int hibernate_found_dirty;
4986         int hibernate_skipped_cleaning;
4987         int hibernate_skipped_transient;
4988         int hibernate_skipped_precious;
4989         int hibernate_skipped_external;
4990         int hibernate_queue_nolock;
4991         int hibernate_queue_paused;
4992         int hibernate_throttled;
4993         int hibernate_throttle_timeout;
4994         int hibernate_drained;
4995         int hibernate_drain_timeout;
4996         int cd_lock_failed;
4997         int cd_found_precious;
4998         int cd_found_wired;
4999         int cd_found_busy;
5000         int cd_found_unusual;
5001         int cd_found_cleaning;
5002         int cd_found_laundry;
5003         int cd_found_dirty;
5004         int cd_found_xpmapped;
5005         int cd_skipped_xpmapped;
5006         int cd_local_free;
5007         int cd_total_free;
5008         int cd_vm_page_wire_count;
5009         int cd_vm_struct_pages_unneeded;
5010         int cd_pages;
5011         int cd_discarded;
5012         int cd_count_wire;
5013 } hibernate_stats;
5014
5015
5016 /*
5017  * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5018  * so that we don't overrun the estimated image size, which would
5019  * result in a hibernation failure.
5020  */
5021 #define HIBERNATE_XPMAPPED_LIMIT        40000
5022
5023
5024 static int
5025 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5026 {
5027         wait_result_t   wait_result;
5028
5029         vm_page_lock_queues();
5030
5031         while ( !queue_empty(&q->pgo_pending) ) {
5032
5033                 q->pgo_draining = TRUE;
5034
5035                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5036
5037                 vm_page_unlock_queues();
5038
5039                 wait_result = thread_block(THREAD_CONTINUE_NULL);
5040
5041                 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5042                         hibernate_stats.hibernate_drain_timeout++;
5043
5044                         if (q == &vm_pageout_queue_external)
5045                                 return (0);
5046
5047                         return (1);
5048                 }
5049                 vm_page_lock_queues();
5050
5051                 hibernate_stats.hibernate_drained++;
5052         }
5053         vm_page_unlock_queues();
5054
5055         return (0);
5056 }
5057
5058
5059 boolean_t hibernate_skip_external = FALSE;
5060
5061 static int
5062 hibernate_flush_queue(queue_head_t *q, int qcount)
5063 {
5064         vm_page_t       m;
5065         vm_object_t     l_object = NULL;
5066         vm_object_t     m_object = NULL;
5067         int             refmod_state = 0;
5068         int             try_failed_count = 0;
5069         int             retval = 0;
5070         int             current_run = 0;
5071         struct  vm_pageout_queue *iq;
5072         struct  vm_pageout_queue *eq;
5073         struct  vm_pageout_queue *tq;
5074
5075
5076         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5077
5078         iq = &vm_pageout_queue_internal;
5079         eq = &vm_pageout_queue_external;
5080
5081         vm_page_lock_queues();
5082
5083         while (qcount && !queue_empty(q)) {
5084
5085                 if (current_run++ == 1000) {
5086                         if (hibernate_should_abort()) {
5087                                 retval = 1;
5088                                 break;
5089                         }
5090                         current_run = 0;
5091                 }
5092
5093                 m = (vm_page_t) queue_first(q);
5094                 m_object = m->object;
5095
5096                 /*
5097                  * check to see if we currently are working
5098                  * with the same object... if so, we've
5099                  * already got the lock
5100                  */
5101                 if (m_object != l_object) {
5102                         /*
5103                          * the object associated with candidate page is
5104                          * different from the one we were just working
5105                          * with... dump the lock if we still own it
5106                          */
5107                         if (l_object != NULL) {
5108                                 vm_object_unlock(l_object);
5109                                 l_object = NULL;
5110                         }
5111                         /*
5112                          * Try to lock object; since we've alread got the
5113                          * page queues lock, we can only 'try' for this one.
5114                          * if the 'try' fails, we need to do a mutex_pause
5115                          * to allow the owner of the object lock a chance to
5116                          * run...
5117                          */
5118                         if ( !vm_object_lock_try_scan(m_object)) {
5119
5120                                 if (try_failed_count > 20) {
5121                                         hibernate_stats.hibernate_queue_nolock++;
5122
5123                                         goto reenter_pg_on_q;
5124                                 }
5125                                 vm_pageout_scan_wants_object = m_object;
5126
5127                                 vm_page_unlock_queues();
5128                                 mutex_pause(try_failed_count++);
5129                                 vm_page_lock_queues();
5130
5131                                 hibernate_stats.hibernate_queue_paused++;
5132                                 continue;
5133                         } else {
5134                                 l_object = m_object;
5135                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5136                         }
5137                 }
5138                 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5139                         /*
5140                          * page is not to be cleaned
5141                          * put it back on the head of its queue
5142                          */
5143                         if (m->cleaning)
5144                                 hibernate_stats.hibernate_skipped_cleaning++;
5145                         else
5146                                 hibernate_stats.hibernate_skipped_transient++;
5147
5148                         goto reenter_pg_on_q;
5149                 }
5150                 if (m_object->copy == VM_OBJECT_NULL) {
5151                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5152                                 /*
5153                                  * let the normal hibernate image path
5154                                  * deal with these
5155                                  */
5156                                 goto reenter_pg_on_q;
5157                         }
5158                 }
5159                 if ( !m->dirty && m->pmapped) {
5160                         refmod_state = pmap_get_refmod(m->phys_page);
5161
5162                         if ((refmod_state & VM_MEM_MODIFIED)) {
5163                                 SET_PAGE_DIRTY(m, FALSE);
5164                         }
5165                 } else
5166                         refmod_state = 0;
5167
5168                 if ( !m->dirty) {
5169                         /*
5170                          * page is not to be cleaned
5171                          * put it back on the head of its queue
5172                          */
5173                         if (m->precious)
5174                                 hibernate_stats.hibernate_skipped_precious++;
5175
5176                         goto reenter_pg_on_q;
5177                 }
5178
5179                 if (hibernate_skip_external == TRUE && !m_object->internal) {
5180
5181                         hibernate_stats.hibernate_skipped_external++;
5182
5183                         goto reenter_pg_on_q;
5184                 }
5185                 tq = NULL;
5186
5187                 if (m_object->internal) {
5188                         if (VM_PAGE_Q_THROTTLED(iq))
5189                                 tq = iq;
5190                 } else if (VM_PAGE_Q_THROTTLED(eq))
5191                         tq = eq;
5192
5193                 if (tq != NULL) {
5194                         wait_result_t   wait_result;
5195                         int             wait_count = 5;
5196
5197                         if (l_object != NULL) {
5198                                 vm_object_unlock(l_object);
5199                                 l_object = NULL;
5200                         }
5201                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5202
5203                         while (retval == 0) {
5204
5205                                 tq->pgo_throttled = TRUE;
5206
5207                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5208
5209                                 vm_page_unlock_queues();
5210
5211                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
5212
5213                                 vm_page_lock_queues();
5214
5215                                 if (wait_result != THREAD_TIMED_OUT)
5216                                         break;
5217                                 if (!VM_PAGE_Q_THROTTLED(tq))
5218                                         break;
5219
5220                                 if (hibernate_should_abort())
5221                                         retval = 1;
5222
5223                                 if (--wait_count == 0) {
5224
5225                                         hibernate_stats.hibernate_throttle_timeout++;
5226
5227                                         if (tq == eq) {
5228                                                 hibernate_skip_external = TRUE;
5229                                                 break;
5230                                         }
5231                                         retval = 1;
5232                                 }
5233                         }
5234                         if (retval)
5235                                 break;
5236
5237                         hibernate_stats.hibernate_throttled++;
5238
5239                         continue;
5240                 }
5241                 /*
5242                  * we've already factored out pages in the laundry which
5243                  * means this page can't be on the pageout queue so it's
5244                  * safe to do the VM_PAGE_QUEUES_REMOVE
5245                  */
5246                 assert(!m->pageout_queue);
5247
5248                 VM_PAGE_QUEUES_REMOVE(m);
5249
5250                 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5251                         pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5252
5253                 vm_pageout_cluster(m, FALSE);
5254
5255                 hibernate_stats.hibernate_found_dirty++;
5256
5257                 goto next_pg;
5258
5259 reenter_pg_on_q:
5260                 queue_remove(q, m, vm_page_t, pageq);
5261                 queue_enter(q, m, vm_page_t, pageq);
5262
5263                 hibernate_stats.hibernate_reentered_on_q++;
5264 next_pg:
5265                 hibernate_stats.hibernate_considered++;
5266
5267                 qcount--;
5268                 try_failed_count = 0;
5269         }
5270         if (l_object != NULL) {
5271                 vm_object_unlock(l_object);
5272                 l_object = NULL;
5273         }
5274         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5275
5276         vm_page_unlock_queues();
5277
5278         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5279
5280         return (retval);
5281 }
5282
5283
5284 static int
5285 hibernate_flush_dirty_pages(int pass)
5286 {
5287         struct vm_speculative_age_q     *aq;
5288         uint32_t        i;
5289
5290         if (vm_page_local_q) {
5291                 for (i = 0; i < vm_page_local_q_count; i++)
5292                         vm_page_reactivate_local(i, TRUE, FALSE);
5293         }
5294
5295         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5296                 int             qcount;
5297                 vm_page_t       m;
5298
5299                 aq = &vm_page_queue_speculative[i];
5300
5301                 if (queue_empty(&aq->age_q))
5302                         continue;
5303                 qcount = 0;
5304
5305                 vm_page_lockspin_queues();
5306
5307                 queue_iterate(&aq->age_q,
5308                               m,
5309                               vm_page_t,
5310                               pageq)
5311                 {
5312                         qcount++;
5313                 }
5314                 vm_page_unlock_queues();
5315
5316                 if (qcount) {
5317                         if (hibernate_flush_queue(&aq->age_q, qcount))
5318                                 return (1);
5319                 }
5320         }
5321         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5322                 return (1);
5323         if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5324                 return (1);
5325         if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5326                 return (1);
5327         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5328                 return (1);
5329
5330         if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5331                 vm_compressor_record_warmup_start();
5332
5333         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5334                 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5335                         vm_compressor_record_warmup_end();
5336                 return (1);
5337         }
5338         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5339                 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5340                         vm_compressor_record_warmup_end();
5341                 return (1);
5342         }
5343         if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5344                 vm_compressor_record_warmup_end();
5345
5346         if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5347                 return (1);
5348
5349         return (0);
5350 }
5351
5352
5353 void
5354 hibernate_reset_stats()
5355 {
5356         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5357 }
5358
5359
5360 int
5361 hibernate_flush_memory()
5362 {
5363         int     retval;
5364
5365         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5366
5367         hibernate_cleaning_in_progress = TRUE;
5368         hibernate_skip_external = FALSE;
5369
5370         if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5371
5372                 if (COMPRESSED_PAGER_IS_ACTIVE) {
5373
5374                                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5375
5376                                 vm_compressor_flush();
5377
5378                                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5379                 }
5380                 if (consider_buffer_cache_collect != NULL) {
5381                         unsigned int orig_wire_count;
5382
5383                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5384                         orig_wire_count = vm_page_wire_count;
5385
5386                         (void)(*consider_buffer_cache_collect)(1);
5387                         consider_zone_gc(TRUE);
5388
5389                         HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5390
5391                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5392                 }
5393         }
5394         hibernate_cleaning_in_progress = FALSE;
5395
5396         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5397
5398         if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5399                 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5400
5401
5402     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5403                 hibernate_stats.hibernate_considered,
5404                 hibernate_stats.hibernate_reentered_on_q,
5405                 hibernate_stats.hibernate_found_dirty);
5406     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5407                 hibernate_stats.hibernate_skipped_cleaning,
5408                 hibernate_stats.hibernate_skipped_transient,
5409                 hibernate_stats.hibernate_skipped_precious,
5410                 hibernate_stats.hibernate_skipped_external,
5411                 hibernate_stats.hibernate_queue_nolock);
5412     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5413                 hibernate_stats.hibernate_queue_paused,
5414                 hibernate_stats.hibernate_throttled,
5415                 hibernate_stats.hibernate_throttle_timeout,
5416                 hibernate_stats.hibernate_drained,
5417                 hibernate_stats.hibernate_drain_timeout);
5418
5419         return (retval);
5420 }
5421
5422
5423 static void
5424 hibernate_page_list_zero(hibernate_page_list_t *list)
5425 {
5426     uint32_t             bank;
5427     hibernate_bitmap_t * bitmap;
5428
5429     bitmap = &list->bank_bitmap[0];
5430     for (bank = 0; bank < list->bank_count; bank++)
5431     {
5432         uint32_t last_bit;
5433
5434         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5435         // set out-of-bound bits at end of bitmap.
5436         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5437         if (last_bit)
5438             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5439
5440         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5441     }
5442 }
5443
5444 void
5445 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5446 {
5447     uint32_t i;
5448     vm_page_t m;
5449     uint64_t start, end, timeout, nsec;
5450     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5451     clock_get_uptime(&start);
5452
5453     for (i = 0; i < gobble_count; i++)
5454     {
5455         while (VM_PAGE_NULL == (m = vm_page_grab()))
5456         {
5457             clock_get_uptime(&end);
5458             if (end >= timeout)
5459                 break;
5460             VM_PAGE_WAIT();
5461         }
5462         if (!m)
5463             break;
5464         m->busy = FALSE;
5465         vm_page_gobble(m);
5466
5467         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5468         hibernate_gobble_queue = m;
5469     }
5470
5471     clock_get_uptime(&end);
5472     absolutetime_to_nanoseconds(end - start, &nsec);
5473     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5474 }
5475
5476 void
5477 hibernate_free_gobble_pages(void)
5478 {
5479     vm_page_t m, next;
5480     uint32_t  count = 0;
5481
5482     m = (vm_page_t) hibernate_gobble_queue;
5483     while(m)
5484     {
5485         next = (vm_page_t) m->pageq.next;
5486         vm_page_free(m);
5487         count++;
5488         m = next;
5489     }
5490     hibernate_gobble_queue = VM_PAGE_NULL;
5491
5492     if (count)
5493         HIBLOG("Freed %d pages\n", count);
5494 }
5495
5496 static boolean_t
5497 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5498 {
5499     vm_object_t object = NULL;
5500     int                  refmod_state;
5501     boolean_t            discard = FALSE;
5502
5503     do
5504     {
5505         if (m->private)
5506             panic("hibernate_consider_discard: private");
5507
5508         if (!vm_object_lock_try(m->object)) {
5509             if (!preflight) hibernate_stats.cd_lock_failed++;
5510             break;
5511         }
5512         object = m->object;
5513
5514         if (VM_PAGE_WIRED(m)) {
5515             if (!preflight) hibernate_stats.cd_found_wired++;
5516             break;
5517         }
5518         if (m->precious) {
5519             if (!preflight) hibernate_stats.cd_found_precious++;
5520             break;
5521         }
5522         if (m->busy || !object->alive) {
5523            /*
5524             *   Somebody is playing with this page.
5525             */
5526             if (!preflight) hibernate_stats.cd_found_busy++;
5527             break;
5528         }
5529         if (m->absent || m->unusual || m->error) {
5530            /*
5531             * If it's unusual in anyway, ignore it
5532             */
5533             if (!preflight) hibernate_stats.cd_found_unusual++;
5534             break;
5535         }
5536         if (m->cleaning) {
5537             if (!preflight) hibernate_stats.cd_found_cleaning++;
5538             break;
5539         }
5540         if (m->laundry) {
5541             if (!preflight) hibernate_stats.cd_found_laundry++;
5542             break;
5543         }
5544         if (!m->dirty)
5545         {
5546             refmod_state = pmap_get_refmod(m->phys_page);
5547
5548             if (refmod_state & VM_MEM_REFERENCED)
5549                 m->reference = TRUE;
5550             if (refmod_state & VM_MEM_MODIFIED) {
5551                 SET_PAGE_DIRTY(m, FALSE);
5552             }
5553         }
5554
5555         /*
5556          * If it's clean or purgeable we can discard the page on wakeup.
5557          */
5558         discard = (!m->dirty)
5559                     || (VM_PURGABLE_VOLATILE == object->purgable)
5560                     || (VM_PURGABLE_EMPTY    == object->purgable);
5561
5562
5563         if (discard == FALSE) {
5564                 if (!preflight)
5565                         hibernate_stats.cd_found_dirty++;
5566         } else if (m->xpmapped && m->reference && !object->internal) {
5567                 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5568                         if (!preflight)
5569                                 hibernate_stats.cd_found_xpmapped++;
5570                         discard = FALSE;
5571                 } else {
5572                         if (!preflight)
5573                                 hibernate_stats.cd_skipped_xpmapped++;
5574                 }
5575         }
5576     }
5577     while (FALSE);
5578
5579     if (object)
5580         vm_object_unlock(object);
5581
5582     return (discard);
5583 }
5584
5585
5586 static void
5587 hibernate_discard_page(vm_page_t m)
5588 {
5589     if (m->absent || m->unusual || m->error)
5590        /*
5591         * If it's unusual in anyway, ignore
5592         */
5593         return;
5594
5595 #if MACH_ASSERT || DEBUG
5596     vm_object_t object = m->object;
5597     if (!vm_object_lock_try(m->object))
5598         panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5599 #else
5600     /* No need to lock page queue for token delete, hibernate_vm_unlock()
5601        makes sure these locks are uncontended before sleep */
5602 #endif /* MACH_ASSERT || DEBUG */
5603
5604     if (m->pmapped == TRUE)
5605     {
5606         __unused int refmod_state = pmap_disconnect(m->phys_page);
5607     }
5608
5609     if (m->laundry)
5610         panic("hibernate_discard_page(%p) laundry", m);
5611     if (m->private)
5612         panic("hibernate_discard_page(%p) private", m);
5613     if (m->fictitious)
5614         panic("hibernate_discard_page(%p) fictitious", m);
5615
5616     if (VM_PURGABLE_VOLATILE == m->object->purgable)
5617     {
5618         /* object should be on a queue */
5619         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5620         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5621         assert(old_queue);
5622         if (m->object->purgeable_when_ripe) {
5623                 vm_purgeable_token_delete_first(old_queue);
5624         }
5625         m->object->purgable = VM_PURGABLE_EMPTY;
5626
5627         /*
5628          * Purgeable ledgers:  pages of VOLATILE and EMPTY objects are
5629          * accounted in the "volatile" ledger, so no change here.
5630          * We have to update vm_page_purgeable_count, though, since we're
5631          * effectively purging this object.
5632          */
5633         unsigned int delta;
5634         assert(m->object->resident_page_count >= m->object->wired_page_count);
5635         delta = (m->object->resident_page_count - m->object->wired_page_count);
5636         assert(vm_page_purgeable_count >= delta);
5637         assert(delta > 0);
5638         OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5639     }
5640
5641     vm_page_free(m);
5642
5643 #if MACH_ASSERT || DEBUG
5644     vm_object_unlock(object);
5645 #endif  /* MACH_ASSERT || DEBUG */
5646 }
5647
5648 /*
5649  Grab locks for hibernate_page_list_setall()
5650 */
5651 void
5652 hibernate_vm_lock_queues(void)
5653 {
5654     vm_object_lock(compressor_object);
5655     vm_page_lock_queues();
5656     lck_mtx_lock(&vm_page_queue_free_lock);
5657
5658     if (vm_page_local_q) {
5659         uint32_t  i;
5660         for (i = 0; i < vm_page_local_q_count; i++) {
5661             struct vpl  *lq;
5662             lq = &vm_page_local_q[i].vpl_un.vpl;
5663             VPL_LOCK(&lq->vpl_lock);
5664         }
5665     }
5666 }
5667
5668 void
5669 hibernate_vm_unlock_queues(void)
5670 {
5671     if (vm_page_local_q) {
5672         uint32_t  i;
5673         for (i = 0; i < vm_page_local_q_count; i++) {
5674             struct vpl  *lq;
5675             lq = &vm_page_local_q[i].vpl_un.vpl;
5676             VPL_UNLOCK(&lq->vpl_lock);
5677         }
5678     }
5679     lck_mtx_unlock(&vm_page_queue_free_lock);
5680     vm_page_unlock_queues();
5681     vm_object_unlock(compressor_object);
5682 }
5683
5684 /*
5685  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5686  pages known to VM to not need saving are subtracted.
5687  Wired pages to be saved are present in page_list_wired, pageable in page_list.
5688 */
5689
5690 void
5691 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5692                            hibernate_page_list_t * page_list_wired,
5693                            hibernate_page_list_t * page_list_pal,
5694                            boolean_t preflight,
5695                            boolean_t will_discard,
5696                            uint32_t * pagesOut)
5697 {
5698     uint64_t start, end, nsec;
5699     vm_page_t m;
5700     vm_page_t next;
5701     uint32_t pages = page_list->page_count;
5702     uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5703     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5704     uint32_t count_wire = pages;
5705     uint32_t count_discard_active    = 0;
5706     uint32_t count_discard_inactive  = 0;
5707     uint32_t count_discard_cleaned   = 0;
5708     uint32_t count_discard_purgeable = 0;
5709     uint32_t count_discard_speculative = 0;
5710     uint32_t count_discard_vm_struct_pages = 0;
5711     uint32_t i;
5712     uint32_t             bank;
5713     hibernate_bitmap_t * bitmap;
5714     hibernate_bitmap_t * bitmap_wired;
5715     boolean_t                    discard_all;
5716     boolean_t            discard;
5717
5718     HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5719
5720     if (preflight) {
5721         page_list       = NULL;
5722         page_list_wired = NULL;
5723         page_list_pal   = NULL;
5724                 discard_all     = FALSE;
5725     } else {
5726                 discard_all     = will_discard;
5727     }
5728
5729 #if MACH_ASSERT || DEBUG
5730     if (!preflight)
5731     {
5732         vm_page_lock_queues();
5733         if (vm_page_local_q) {
5734             for (i = 0; i < vm_page_local_q_count; i++) {
5735                 struct vpl      *lq;
5736                 lq = &vm_page_local_q[i].vpl_un.vpl;
5737                 VPL_LOCK(&lq->vpl_lock);
5738             }
5739         }
5740     }
5741 #endif  /* MACH_ASSERT || DEBUG */
5742
5743
5744     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5745
5746     clock_get_uptime(&start);
5747
5748     if (!preflight) {
5749         hibernate_page_list_zero(page_list);
5750         hibernate_page_list_zero(page_list_wired);
5751         hibernate_page_list_zero(page_list_pal);
5752
5753         hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5754         hibernate_stats.cd_pages = pages;
5755     }
5756
5757     if (vm_page_local_q) {
5758             for (i = 0; i < vm_page_local_q_count; i++)
5759                     vm_page_reactivate_local(i, TRUE, !preflight);
5760     }
5761
5762     if (preflight) {
5763         vm_object_lock(compressor_object);
5764         vm_page_lock_queues();
5765         lck_mtx_lock(&vm_page_queue_free_lock);
5766     }
5767
5768     m = (vm_page_t) hibernate_gobble_queue;
5769     while (m)
5770     {
5771         pages--;
5772         count_wire--;
5773         if (!preflight) {
5774             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5775             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5776         }
5777         m = (vm_page_t) m->pageq.next;
5778     }
5779
5780     if (!preflight) for( i = 0; i < real_ncpus; i++ )
5781     {
5782         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5783         {
5784             for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5785             {
5786                 pages--;
5787                 count_wire--;
5788                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5789                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5790
5791                 hibernate_stats.cd_local_free++;
5792                 hibernate_stats.cd_total_free++;
5793             }
5794         }
5795     }
5796
5797     for( i = 0; i < vm_colors; i++ )
5798     {
5799         queue_iterate(&vm_page_queue_free[i],
5800                       m,
5801                       vm_page_t,
5802                       pageq)
5803         {
5804             pages--;
5805             count_wire--;
5806             if (!preflight) {
5807                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5808                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5809
5810                 hibernate_stats.cd_total_free++;
5811             }
5812         }
5813     }
5814
5815     queue_iterate(&vm_lopage_queue_free,
5816                   m,
5817                   vm_page_t,
5818                   pageq)
5819     {
5820         pages--;
5821         count_wire--;
5822         if (!preflight) {
5823             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5824             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5825
5826             hibernate_stats.cd_total_free++;
5827         }
5828     }
5829
5830     m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5831     while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5832     {
5833         next = (vm_page_t) m->pageq.next;
5834         discard = FALSE;
5835         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5836          && hibernate_consider_discard(m, preflight))
5837         {
5838             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5839             count_discard_inactive++;
5840             discard = discard_all;
5841         }
5842         else
5843             count_throttled++;
5844         count_wire--;
5845         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5846
5847         if (discard) hibernate_discard_page(m);
5848         m = next;
5849     }
5850
5851     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5852     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5853     {
5854         next = (vm_page_t) m->pageq.next;
5855         discard = FALSE;
5856         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5857          && hibernate_consider_discard(m, preflight))
5858         {
5859             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5860             if (m->dirty)
5861                 count_discard_purgeable++;
5862             else
5863                 count_discard_inactive++;
5864             discard = discard_all;
5865         }
5866         else
5867             count_anonymous++;
5868         count_wire--;
5869         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5870         if (discard)    hibernate_discard_page(m);
5871         m = next;
5872     }
5873
5874     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5875     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5876     {
5877         next = (vm_page_t) m->pageq.next;
5878         discard = FALSE;
5879         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5880          && hibernate_consider_discard(m, preflight))
5881         {
5882             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5883             if (m->dirty)
5884                 count_discard_purgeable++;
5885             else
5886                 count_discard_cleaned++;
5887             discard = discard_all;
5888         }
5889         else
5890             count_cleaned++;
5891         count_wire--;
5892         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5893         if (discard)    hibernate_discard_page(m);
5894         m = next;
5895     }
5896
5897     m = (vm_page_t) queue_first(&vm_page_queue_active);
5898     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5899     {
5900         next = (vm_page_t) m->pageq.next;
5901         discard = FALSE;
5902         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5903          && hibernate_consider_discard(m, preflight))
5904         {
5905             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5906             if (m->dirty)
5907                 count_discard_purgeable++;
5908             else
5909                 count_discard_active++;
5910             discard = discard_all;
5911         }
5912         else
5913             count_active++;
5914         count_wire--;
5915         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5916         if (discard)    hibernate_discard_page(m);
5917         m = next;
5918     }
5919
5920     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5921     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5922     {
5923         next = (vm_page_t) m->pageq.next;
5924         discard = FALSE;
5925         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5926          && hibernate_consider_discard(m, preflight))
5927         {
5928             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5929             if (m->dirty)
5930                 count_discard_purgeable++;
5931             else
5932                 count_discard_inactive++;
5933             discard = discard_all;
5934         }
5935         else
5936             count_inactive++;
5937         count_wire--;
5938         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5939         if (discard)    hibernate_discard_page(m);
5940         m = next;
5941     }
5942
5943     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5944     {
5945         m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5946         while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5947         {
5948             next = (vm_page_t) m->pageq.next;
5949             discard = FALSE;
5950             if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5951              && hibernate_consider_discard(m, preflight))
5952             {
5953                 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5954                 count_discard_speculative++;
5955                 discard = discard_all;
5956             }
5957             else
5958                 count_speculative++;
5959             count_wire--;
5960             if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5961             if (discard)    hibernate_discard_page(m);
5962             m = next;
5963         }
5964     }
5965
5966     queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5967     {
5968         count_compressor++;
5969         count_wire--;
5970         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5971     }
5972
5973     if (preflight == FALSE && discard_all == TRUE) {
5974             KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5975
5976             HIBLOG("hibernate_teardown started\n");
5977             count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5978             HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5979
5980             pages -= count_discard_vm_struct_pages;
5981             count_wire -= count_discard_vm_struct_pages;
5982
5983             hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5984
5985             KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5986     }
5987
5988     if (!preflight) {
5989         // pull wired from hibernate_bitmap
5990         bitmap = &page_list->bank_bitmap[0];
5991         bitmap_wired = &page_list_wired->bank_bitmap[0];
5992         for (bank = 0; bank < page_list->bank_count; bank++)
5993         {
5994             for (i = 0; i < bitmap->bitmapwords; i++)
5995                 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5996             bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5997             bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5998         }
5999     }
6000
6001     // machine dependent adjustments
6002     hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6003
6004     if (!preflight) {
6005         hibernate_stats.cd_count_wire = count_wire;
6006         hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6007                 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6008     }
6009
6010     clock_get_uptime(&end);
6011     absolutetime_to_nanoseconds(end - start, &nsec);
6012     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6013
6014     HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n  %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6015            pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6016                 discard_all ? "did" : "could",
6017                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6018
6019     if (hibernate_stats.cd_skipped_xpmapped)
6020             HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6021
6022     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6023
6024     if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6025
6026 #if MACH_ASSERT || DEBUG
6027     if (!preflight)
6028     {
6029         if (vm_page_local_q) {
6030             for (i = 0; i < vm_page_local_q_count; i++) {
6031                 struct vpl      *lq;
6032                 lq = &vm_page_local_q[i].vpl_un.vpl;
6033                 VPL_UNLOCK(&lq->vpl_lock);
6034             }
6035         }
6036         vm_page_unlock_queues();
6037     }
6038 #endif  /* MACH_ASSERT || DEBUG */
6039
6040     if (preflight) {
6041         lck_mtx_unlock(&vm_page_queue_free_lock);
6042         vm_page_unlock_queues();
6043         vm_object_unlock(compressor_object);
6044     }
6045
6046     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6047 }
6048
6049 void
6050 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6051 {
6052     uint64_t  start, end, nsec;
6053     vm_page_t m;
6054     vm_page_t next;
6055     uint32_t  i;
6056     uint32_t  count_discard_active    = 0;
6057     uint32_t  count_discard_inactive  = 0;
6058     uint32_t  count_discard_purgeable = 0;
6059     uint32_t  count_discard_cleaned   = 0;
6060     uint32_t  count_discard_speculative = 0;
6061
6062
6063 #if MACH_ASSERT || DEBUG
6064         vm_page_lock_queues();
6065         if (vm_page_local_q) {
6066             for (i = 0; i < vm_page_local_q_count; i++) {
6067                 struct vpl      *lq;
6068                 lq = &vm_page_local_q[i].vpl_un.vpl;
6069                 VPL_LOCK(&lq->vpl_lock);
6070             }
6071         }
6072 #endif  /* MACH_ASSERT || DEBUG */
6073
6074     clock_get_uptime(&start);
6075
6076     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6077     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6078     {
6079         next = (vm_page_t) m->pageq.next;
6080         if (hibernate_page_bittst(page_list, m->phys_page))
6081         {
6082             if (m->dirty)
6083                 count_discard_purgeable++;
6084             else
6085                 count_discard_inactive++;
6086             hibernate_discard_page(m);
6087         }
6088         m = next;
6089     }
6090
6091     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6092     {
6093        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6094        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6095        {
6096            next = (vm_page_t) m->pageq.next;
6097            if (hibernate_page_bittst(page_list, m->phys_page))
6098            {
6099                count_discard_speculative++;
6100                hibernate_discard_page(m);
6101            }
6102            m = next;
6103        }
6104     }
6105
6106     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6107     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6108     {
6109         next = (vm_page_t) m->pageq.next;
6110         if (hibernate_page_bittst(page_list, m->phys_page))
6111         {
6112             if (m->dirty)
6113                 count_discard_purgeable++;
6114             else
6115                 count_discard_inactive++;
6116             hibernate_discard_page(m);
6117         }
6118         m = next;
6119     }
6120
6121     m = (vm_page_t) queue_first(&vm_page_queue_active);
6122     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6123     {
6124         next = (vm_page_t) m->pageq.next;
6125         if (hibernate_page_bittst(page_list, m->phys_page))
6126         {
6127             if (m->dirty)
6128                 count_discard_purgeable++;
6129             else
6130                 count_discard_active++;
6131             hibernate_discard_page(m);
6132         }
6133         m = next;
6134     }
6135
6136     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6137     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6138     {
6139         next = (vm_page_t) m->pageq.next;
6140         if (hibernate_page_bittst(page_list, m->phys_page))
6141         {
6142             if (m->dirty)
6143                 count_discard_purgeable++;
6144             else
6145                 count_discard_cleaned++;
6146             hibernate_discard_page(m);
6147         }
6148         m = next;
6149     }
6150
6151 #if MACH_ASSERT || DEBUG
6152         if (vm_page_local_q) {
6153             for (i = 0; i < vm_page_local_q_count; i++) {
6154                 struct vpl      *lq;
6155                 lq = &vm_page_local_q[i].vpl_un.vpl;
6156                 VPL_UNLOCK(&lq->vpl_lock);
6157             }
6158         }
6159         vm_page_unlock_queues();
6160 #endif  /* MACH_ASSERT || DEBUG */
6161
6162     clock_get_uptime(&end);
6163     absolutetime_to_nanoseconds(end - start, &nsec);
6164     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6165                 nsec / 1000000ULL,
6166                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6167 }
6168
6169 boolean_t       hibernate_paddr_map_inited = FALSE;
6170 boolean_t       hibernate_rebuild_needed = FALSE;
6171 unsigned int    hibernate_teardown_last_valid_compact_indx = -1;
6172 vm_page_t       hibernate_rebuild_hash_list = NULL;
6173
6174 unsigned int    hibernate_teardown_found_tabled_pages = 0;
6175 unsigned int    hibernate_teardown_found_created_pages = 0;
6176 unsigned int    hibernate_teardown_found_free_pages = 0;
6177 unsigned int    hibernate_teardown_vm_page_free_count;
6178
6179
6180 struct ppnum_mapping {
6181         struct ppnum_mapping    *ppnm_next;
6182         ppnum_t                 ppnm_base_paddr;
6183         unsigned int            ppnm_sindx;
6184         unsigned int            ppnm_eindx;
6185 };
6186
6187 struct ppnum_mapping    *ppnm_head;
6188 struct ppnum_mapping    *ppnm_last_found = NULL;
6189
6190
6191 void
6192 hibernate_create_paddr_map()
6193 {
6194         unsigned int    i;
6195         ppnum_t         next_ppnum_in_run = 0;
6196         struct ppnum_mapping *ppnm = NULL;
6197
6198         if (hibernate_paddr_map_inited == FALSE) {
6199
6200                 for (i = 0; i < vm_pages_count; i++) {
6201
6202                         if (ppnm)
6203                                 ppnm->ppnm_eindx = i;
6204
6205                         if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6206
6207                                 ppnm = kalloc(sizeof(struct ppnum_mapping));
6208
6209                                 ppnm->ppnm_next = ppnm_head;
6210                                 ppnm_head = ppnm;
6211
6212                                 ppnm->ppnm_sindx = i;
6213                                 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6214                         }
6215                         next_ppnum_in_run = vm_pages[i].phys_page + 1;
6216                 }
6217                 ppnm->ppnm_eindx++;
6218
6219                 hibernate_paddr_map_inited = TRUE;
6220         }
6221 }
6222
6223 ppnum_t
6224 hibernate_lookup_paddr(unsigned int indx)
6225 {
6226         struct ppnum_mapping *ppnm = NULL;
6227
6228         ppnm = ppnm_last_found;
6229
6230         if (ppnm) {
6231                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6232                         goto done;
6233         }
6234         for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6235
6236                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6237                         ppnm_last_found = ppnm;
6238                         break;
6239                 }
6240         }
6241         if (ppnm == NULL)
6242                 panic("hibernate_lookup_paddr of %d failed\n", indx);
6243 done:
6244         return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6245 }
6246
6247
6248 uint32_t
6249 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6250 {
6251         addr64_t        saddr_aligned;
6252         addr64_t        eaddr_aligned;
6253         addr64_t        addr;
6254         ppnum_t         paddr;
6255         unsigned int    mark_as_unneeded_pages = 0;
6256
6257         saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6258         eaddr_aligned = eaddr & ~PAGE_MASK_64;
6259
6260         for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6261
6262                 paddr = pmap_find_phys(kernel_pmap, addr);
6263
6264                 assert(paddr);
6265
6266                 hibernate_page_bitset(page_list,       TRUE, paddr);
6267                 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6268
6269                 mark_as_unneeded_pages++;
6270         }
6271         return (mark_as_unneeded_pages);
6272 }
6273
6274
6275 void
6276 hibernate_hash_insert_page(vm_page_t mem)
6277 {
6278         vm_page_bucket_t *bucket;
6279         int             hash_id;
6280
6281         assert(mem->hashed);
6282         assert(mem->object);
6283         assert(mem->offset != (vm_object_offset_t) -1);
6284
6285         /*
6286          *      Insert it into the object_object/offset hash table
6287          */
6288         hash_id = vm_page_hash(mem->object, mem->offset);
6289         bucket = &vm_page_buckets[hash_id];
6290
6291         mem->next_m = bucket->page_list;
6292         bucket->page_list = VM_PAGE_PACK_PTR(mem);
6293 }
6294
6295
6296 void
6297 hibernate_free_range(int sindx, int eindx)
6298 {
6299         vm_page_t       mem;
6300         unsigned int    color;
6301
6302         while (sindx < eindx) {
6303                 mem = &vm_pages[sindx];
6304
6305                 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6306
6307                 mem->lopage = FALSE;
6308                 mem->free = TRUE;
6309
6310                 color = mem->phys_page & vm_color_mask;
6311                 queue_enter_first(&vm_page_queue_free[color],
6312                                   mem,
6313                                   vm_page_t,
6314                                   pageq);
6315                 vm_page_free_count++;
6316
6317                 sindx++;
6318         }
6319 }
6320
6321
6322 extern void hibernate_rebuild_pmap_structs(void);
6323
6324 void
6325 hibernate_rebuild_vm_structs(void)
6326 {
6327         int             cindx, sindx, eindx;
6328         vm_page_t       mem, tmem, mem_next;
6329         AbsoluteTime    startTime, endTime;
6330         uint64_t        nsec;
6331
6332         if (hibernate_rebuild_needed == FALSE)
6333                 return;
6334
6335         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6336         HIBLOG("hibernate_rebuild started\n");
6337
6338         clock_get_uptime(&startTime);
6339
6340         hibernate_rebuild_pmap_structs();
6341
6342         bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6343         eindx = vm_pages_count;
6344
6345         for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6346
6347                 mem = &vm_pages[cindx];
6348                 /*
6349                  * hibernate_teardown_vm_structs leaves the location where
6350                  * this vm_page_t must be located in "next".
6351                  */
6352                 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6353                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6354
6355                 sindx = (int)(tmem - &vm_pages[0]);
6356
6357                 if (mem != tmem) {
6358                         /*
6359                          * this vm_page_t was moved by hibernate_teardown_vm_structs,
6360                          * so move it back to its real location
6361                          */
6362                         *tmem = *mem;
6363                         mem = tmem;
6364                 }
6365                 if (mem->hashed)
6366                         hibernate_hash_insert_page(mem);
6367                 /*
6368                  * the 'hole' between this vm_page_t and the previous
6369                  * vm_page_t we moved needs to be initialized as
6370                  * a range of free vm_page_t's
6371                  */
6372                 hibernate_free_range(sindx + 1, eindx);
6373
6374                 eindx = sindx;
6375         }
6376         if (sindx)
6377                 hibernate_free_range(0, sindx);
6378
6379         assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6380
6381         /*
6382          * process the list of vm_page_t's that were entered in the hash,
6383          * but were not located in the vm_pages arrary... these are
6384          * vm_page_t's that were created on the fly (i.e. fictitious)
6385          */
6386         for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6387                 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6388
6389                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6390                 hibernate_hash_insert_page(mem);
6391         }
6392         hibernate_rebuild_hash_list = NULL;
6393
6394         clock_get_uptime(&endTime);
6395         SUB_ABSOLUTETIME(&endTime, &startTime);
6396         absolutetime_to_nanoseconds(endTime, &nsec);
6397
6398         HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6399
6400         hibernate_rebuild_needed = FALSE;
6401
6402         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6403 }
6404
6405
6406 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6407
6408 uint32_t
6409 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6410 {
6411         unsigned int    i;
6412         unsigned int    compact_target_indx;
6413         vm_page_t       mem, mem_next;
6414         vm_page_bucket_t *bucket;
6415         unsigned int    mark_as_unneeded_pages = 0;
6416         unsigned int    unneeded_vm_page_bucket_pages = 0;
6417         unsigned int    unneeded_vm_pages_pages = 0;
6418         unsigned int    unneeded_pmap_pages = 0;
6419         addr64_t        start_of_unneeded = 0;
6420         addr64_t        end_of_unneeded = 0;
6421
6422
6423         if (hibernate_should_abort())
6424                 return (0);
6425
6426         HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6427                vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6428                vm_page_cleaned_count, compressor_object->resident_page_count);
6429
6430         for (i = 0; i < vm_page_bucket_count; i++) {
6431
6432                 bucket = &vm_page_buckets[i];
6433
6434                 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6435                         assert(mem->hashed);
6436
6437                         mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6438
6439                         if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6440                                 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6441                                 hibernate_rebuild_hash_list = mem;
6442                         }
6443                 }
6444         }
6445         unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6446         mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6447
6448         hibernate_teardown_vm_page_free_count = vm_page_free_count;
6449
6450         compact_target_indx = 0;
6451
6452         for (i = 0; i < vm_pages_count; i++) {
6453
6454                 mem = &vm_pages[i];
6455
6456                 if (mem->free) {
6457                         unsigned int color;
6458
6459                         assert(mem->busy);
6460                         assert(!mem->lopage);
6461
6462                         color = mem->phys_page & vm_color_mask;
6463
6464                         queue_remove(&vm_page_queue_free[color],
6465                                      mem,
6466                                      vm_page_t,
6467                                      pageq);
6468                         mem->pageq.next = NULL;
6469                         mem->pageq.prev = NULL;
6470
6471                         vm_page_free_count--;
6472
6473                         hibernate_teardown_found_free_pages++;
6474
6475                         if ( !vm_pages[compact_target_indx].free)
6476                                 compact_target_indx = i;
6477                 } else {
6478                         /*
6479                          * record this vm_page_t's original location
6480                          * we need this even if it doesn't get moved
6481                          * as an indicator to the rebuild function that
6482                          * we don't have to move it
6483                          */
6484                         mem->next_m = VM_PAGE_PACK_PTR(mem);
6485
6486                         if (vm_pages[compact_target_indx].free) {
6487                                 /*
6488                                  * we've got a hole to fill, so
6489                                  * move this vm_page_t to it's new home
6490                                  */
6491                                 vm_pages[compact_target_indx] = *mem;
6492                                 mem->free = TRUE;
6493
6494                                 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6495                                 compact_target_indx++;
6496                         } else
6497                                 hibernate_teardown_last_valid_compact_indx = i;
6498                 }
6499         }
6500         unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6501                                                              (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6502         mark_as_unneeded_pages += unneeded_vm_pages_pages;
6503
6504         hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6505
6506         if (start_of_unneeded) {
6507                 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6508                 mark_as_unneeded_pages += unneeded_pmap_pages;
6509         }
6510         HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6511
6512         hibernate_rebuild_needed = TRUE;
6513
6514         return (mark_as_unneeded_pages);
6515 }
6516
6517
6518 #endif /* HIBERNATION */
6519
6520 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6521
6522 #include <mach_vm_debug.h>
6523 #if     MACH_VM_DEBUG
6524
6525 #include <mach_debug/hash_info.h>
6526 #include <vm/vm_debug.h>
6527
6528 /*
6529  *      Routine:        vm_page_info
6530  *      Purpose:
6531  *              Return information about the global VP table.
6532  *              Fills the buffer with as much information as possible
6533  *              and returns the desired size of the buffer.
6534  *      Conditions:
6535  *              Nothing locked.  The caller should provide
6536  *              possibly-pageable memory.
6537  */
6538
6539 unsigned int
6540 vm_page_info(
6541         hash_info_bucket_t *info,
6542         unsigned int count)
6543 {
6544         unsigned int i;
6545         lck_spin_t      *bucket_lock;
6546
6547         if (vm_page_bucket_count < count)
6548                 count = vm_page_bucket_count;
6549
6550         for (i = 0; i < count; i++) {
6551                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6552                 unsigned int bucket_count = 0;
6553                 vm_page_t m;
6554
6555                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6556                 lck_spin_lock(bucket_lock);
6557
6558                 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6559                         bucket_count++;
6560
6561                 lck_spin_unlock(bucket_lock);
6562
6563                 /* don't touch pageable memory while holding locks */
6564                 info[i].hib_count = bucket_count;
6565         }
6566
6567         return vm_page_bucket_count;
6568 }
6569 #endif  /* MACH_VM_DEBUG */
6570
6571 #if VM_PAGE_BUCKETS_CHECK
6572 void
6573 vm_page_buckets_check(void)
6574 {
6575         unsigned int i;
6576         vm_page_t p;
6577         unsigned int p_hash;
6578         vm_page_bucket_t *bucket;
6579         lck_spin_t      *bucket_lock;
6580
6581         if (!vm_page_buckets_check_ready) {
6582                 return;
6583         }
6584
6585 #if HIBERNATION
6586         if (hibernate_rebuild_needed ||
6587             hibernate_rebuild_hash_list) {
6588                 panic("BUCKET_CHECK: hibernation in progress: "
6589                       "rebuild_needed=%d rebuild_hash_list=%p\n",
6590                       hibernate_rebuild_needed,
6591                       hibernate_rebuild_hash_list);
6592         }
6593 #endif /* HIBERNATION */
6594
6595 #if VM_PAGE_FAKE_BUCKETS
6596         char *cp;
6597         for (cp = (char *) vm_page_fake_buckets_start;
6598              cp < (char *) vm_page_fake_buckets_end;
6599              cp++) {
6600                 if (*cp != 0x5a) {
6601                         panic("BUCKET_CHECK: corruption at %p in fake buckets "
6602                               "[0x%llx:0x%llx]\n",
6603                               cp,
6604                               (uint64_t) vm_page_fake_buckets_start,
6605                               (uint64_t) vm_page_fake_buckets_end);
6606                 }
6607         }
6608 #endif /* VM_PAGE_FAKE_BUCKETS */
6609
6610         for (i = 0; i < vm_page_bucket_count; i++) {
6611                 bucket = &vm_page_buckets[i];
6612                 if (!bucket->page_list) {
6613                         continue;
6614                 }
6615
6616                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6617                 lck_spin_lock(bucket_lock);
6618                 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6619                 while (p != VM_PAGE_NULL) {
6620                         if (!p->hashed) {
6621                                 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6622                                       "hash %d in bucket %d at %p "
6623                                       "is not hashed\n",
6624                                       p, p->object, p->offset,
6625                                       p_hash, i, bucket);
6626                         }
6627                         p_hash = vm_page_hash(p->object, p->offset);
6628                         if (p_hash != i) {
6629                                 panic("BUCKET_CHECK: corruption in bucket %d "
6630                                       "at %p: page %p object %p offset 0x%llx "
6631                                       "hash %d\n",
6632                                       i, bucket, p, p->object, p->offset,
6633                                       p_hash);
6634                         }
6635                         p = VM_PAGE_UNPACK_PTR(p->next_m);
6636                 }
6637                 lck_spin_unlock(bucket_lock);
6638         }
6639
6640 //      printf("BUCKET_CHECK: checked buckets\n");
6641 }
6642 #endif /* VM_PAGE_BUCKETS_CHECK */