osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <kern/ledger.h>
  80 #include <vm/pmap.h>
  81 #include <vm/vm_init.h>
  82 #include <vm/vm_map.h>
  83 #include <vm/vm_page.h>
  84 #include <vm/vm_pageout.h>
  85 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  86 #include <kern/misc_protos.h>
  87 #include <zone_debug.h>
  88 #include <vm/cpm.h>
  89 #include <pexpert/pexpert.h>
  90
  91 #include <vm/vm_protos.h>
  92 #include <vm/memory_object.h>
  93 #include <vm/vm_purgeable_internal.h>
  94 #include <vm/vm_compressor.h>
  95
  96 #if CONFIG_PHANTOM_CACHE
  97 #include <vm/vm_phantom_cache.h>
  98 #endif
  99
 100 #include <IOKit/IOHibernatePrivate.h>
 101
 102 #include <sys/kdebug.h>
 103
 104 boolean_t       hibernate_cleaning_in_progress = FALSE;
 105 boolean_t       vm_page_free_verify = TRUE;
 106
 107 uint32_t        vm_lopage_free_count = 0;
 108 uint32_t        vm_lopage_free_limit = 0;
 109 uint32_t        vm_lopage_lowater    = 0;
 110 boolean_t       vm_lopage_refill = FALSE;
 111 boolean_t       vm_lopage_needed = FALSE;
 112
 113 lck_mtx_ext_t   vm_page_queue_lock_ext;
 114 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 115 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 116
 117 int             speculative_age_index = 0;
 118 int             speculative_steal_index = 0;
 119 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 120
 121
 122 __private_extern__ void         vm_page_init_lck_grp(void);
 123
 124 static void             vm_page_free_prepare(vm_page_t  page);
 125 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 126
 127
 128
 129
 130 /*
 131  *      Associated with page of user-allocatable memory is a
 132  *      page structure.
 133  */
 134
 135 /*
 136  *      These variables record the values returned by vm_page_bootstrap,
 137  *      for debugging purposes.  The implementation of pmap_steal_memory
 138  *      and pmap_startup here also uses them internally.
 139  */
 140
 141 vm_offset_t virtual_space_start;
 142 vm_offset_t virtual_space_end;
 143 uint32_t        vm_page_pages;
 144
 145 /*
 146  *      The vm_page_lookup() routine, which provides for fast
 147  *      (virtual memory object, offset) to page lookup, employs
 148  *      the following hash table.  The vm_page_{insert,remove}
 149  *      routines install and remove associations in the table.
 150  *      [This table is often called the virtual-to-physical,
 151  *      or VP, table.]
 152  */
 153 typedef struct {
 154         vm_page_packed_t page_list;
 155 #if     MACH_PAGE_HASH_STATS
 156         int             cur_count;              /* current count */
 157         int             hi_count;               /* high water mark */
 158 #endif /* MACH_PAGE_HASH_STATS */
 159 } vm_page_bucket_t;
 160
 161
 162 #define BUCKETS_PER_LOCK        16
 163
 164 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 165 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 166 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 167 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 168 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 169 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 170
 171 lck_spin_t      *vm_page_bucket_locks;
 172
 173 #if VM_PAGE_BUCKETS_CHECK
 174 boolean_t vm_page_buckets_check_ready = FALSE;
 175 #if VM_PAGE_FAKE_BUCKETS
 176 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
 177 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
 178 #endif /* VM_PAGE_FAKE_BUCKETS */
 179 #endif /* VM_PAGE_BUCKETS_CHECK */
 180
 181 #if     MACH_PAGE_HASH_STATS
 182 /* This routine is only for debug.  It is intended to be called by
 183  * hand by a developer using a kernel debugger.  This routine prints
 184  * out vm_page_hash table statistics to the kernel debug console.
 185  */
 186 void
 187 hash_debug(void)
 188 {
 189         int     i;
 190         int     numbuckets = 0;
 191         int     highsum = 0;
 192         int     maxdepth = 0;
 193
 194         for (i = 0; i < vm_page_bucket_count; i++) {
 195                 if (vm_page_buckets[i].hi_count) {
 196                         numbuckets++;
 197                         highsum += vm_page_buckets[i].hi_count;
 198                         if (vm_page_buckets[i].hi_count > maxdepth)
 199                                 maxdepth = vm_page_buckets[i].hi_count;
 200                 }
 201         }
 202         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 203         printf("Number used buckets:     %d = %d%%\n",
 204                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 205         printf("Number unused buckets:   %d = %d%%\n",
 206                 vm_page_bucket_count - numbuckets,
 207                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 208         printf("Sum of bucket max depth: %d\n", highsum);
 209         printf("Average bucket depth:    %d.%2d\n",
 210                 highsum/vm_page_bucket_count,
 211                 highsum%vm_page_bucket_count);
 212         printf("Maximum bucket depth:    %d\n", maxdepth);
 213 }
 214 #endif /* MACH_PAGE_HASH_STATS */
 215
 216 /*
 217  *      The virtual page size is currently implemented as a runtime
 218  *      variable, but is constant once initialized using vm_set_page_size.
 219  *      This initialization must be done in the machine-dependent
 220  *      bootstrap sequence, before calling other machine-independent
 221  *      initializations.
 222  *
 223  *      All references to the virtual page size outside this
 224  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 225  *      constants.
 226  */
 227 vm_size_t       page_size  = PAGE_SIZE;
 228 vm_size_t       page_mask  = PAGE_MASK;
 229 int             page_shift = PAGE_SHIFT;
 230
 231 /*
 232  *      Resident page structures are initialized from
 233  *      a template (see vm_page_alloc).
 234  *
 235  *      When adding a new field to the virtual memory
 236  *      object structure, be sure to add initialization
 237  *      (see vm_page_bootstrap).
 238  */
 239 struct vm_page  vm_page_template;
 240
 241 vm_page_t       vm_pages = VM_PAGE_NULL;
 242 unsigned int    vm_pages_count = 0;
 243 ppnum_t         vm_page_lowest = 0;
 244
 245 /*
 246  *      Resident pages that represent real memory
 247  *      are allocated from a set of free lists,
 248  *      one per color.
 249  */
 250 unsigned int    vm_colors;
 251 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 252 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 253 unsigned int    vm_free_magazine_refill_limit = 0;
 254 queue_head_t    vm_page_queue_free[MAX_COLORS];
 255 unsigned int    vm_page_free_wanted;
 256 unsigned int    vm_page_free_wanted_privileged;
 257 unsigned int    vm_page_free_count;
 258 unsigned int    vm_page_fictitious_count;
 259
 260 /*
 261  *      Occasionally, the virtual memory system uses
 262  *      resident page structures that do not refer to
 263  *      real pages, for example to leave a page with
 264  *      important state information in the VP table.
 265  *
 266  *      These page structures are allocated the way
 267  *      most other kernel structures are.
 268  */
 269 zone_t  vm_page_zone;
 270 vm_locks_array_t vm_page_locks;
 271 decl_lck_mtx_data(,vm_page_alloc_lock)
 272 lck_mtx_ext_t vm_page_alloc_lock_ext;
 273
 274 unsigned int io_throttle_zero_fill;
 275
 276 unsigned int    vm_page_local_q_count = 0;
 277 unsigned int    vm_page_local_q_soft_limit = 250;
 278 unsigned int    vm_page_local_q_hard_limit = 500;
 279 struct vplq     *vm_page_local_q = NULL;
 280
 281 /* N.B. Guard and fictitious pages must not
 282  * be assigned a zero phys_page value.
 283  */
 284 /*
 285  *      Fictitious pages don't have a physical address,
 286  *      but we must initialize phys_page to something.
 287  *      For debugging, this should be a strange value
 288  *      that the pmap module can recognize in assertions.
 289  */
 290 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 291
 292 /*
 293  *      Guard pages are not accessible so they don't
 294  *      need a physical address, but we need to enter
 295  *      one in the pmap.
 296  *      Let's make it recognizable and make sure that
 297  *      we don't use a real physical page with that
 298  *      physical address.
 299  */
 300 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 301
 302 /*
 303  *      Resident page structures are also chained on
 304  *      queues that are used by the page replacement
 305  *      system (pageout daemon).  These queues are
 306  *      defined here, but are shared by the pageout
 307  *      module.  The inactive queue is broken into
 308  *      file backed and anonymous for convenience as the
 309  *      pageout daemon often assignes a higher
 310  *      importance to anonymous pages (less likely to pick)
 311  */
 312 queue_head_t    vm_page_queue_active;
 313 queue_head_t    vm_page_queue_inactive;
 314 queue_head_t    vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
 315 queue_head_t    vm_page_queue_throttled;
 316
 317 unsigned int    vm_page_active_count;
 318 unsigned int    vm_page_inactive_count;
 319 unsigned int    vm_page_anonymous_count;
 320 unsigned int    vm_page_throttled_count;
 321 unsigned int    vm_page_speculative_count;
 322 unsigned int    vm_page_wire_count;
 323 unsigned int    vm_page_wire_count_initial;
 324 unsigned int    vm_page_gobble_count = 0;
 325
 326 #define VM_PAGE_WIRE_COUNT_WARNING      0
 327 #define VM_PAGE_GOBBLE_COUNT_WARNING    0
 328
 329 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 330 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 331 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 332
 333 unsigned int    vm_page_xpmapped_external_count = 0;
 334 unsigned int    vm_page_external_count = 0;
 335 unsigned int    vm_page_internal_count = 0;
 336 unsigned int    vm_page_pageable_external_count = 0;
 337 unsigned int    vm_page_pageable_internal_count = 0;
 338
 339 #if DEVELOPMENT || DEBUG
 340 unsigned int    vm_page_speculative_recreated = 0;
 341 unsigned int    vm_page_speculative_created = 0;
 342 unsigned int    vm_page_speculative_used = 0;
 343 #endif
 344
 345 queue_head_t    vm_page_queue_cleaned;
 346
 347 unsigned int    vm_page_cleaned_count = 0;
 348 unsigned int    vm_pageout_enqueued_cleaned = 0;
 349
 350 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 351 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 352
 353
 354 /*
 355  *      Several page replacement parameters are also
 356  *      shared with this module, so that page allocation
 357  *      (done here in vm_page_alloc) can trigger the
 358  *      pageout daemon.
 359  */
 360 unsigned int    vm_page_free_target = 0;
 361 unsigned int    vm_page_free_min = 0;
 362 unsigned int    vm_page_throttle_limit = 0;
 363 unsigned int    vm_page_inactive_target = 0;
 364 unsigned int    vm_page_anonymous_min = 0;
 365 unsigned int    vm_page_inactive_min = 0;
 366 unsigned int    vm_page_free_reserved = 0;
 367 unsigned int    vm_page_throttle_count = 0;
 368
 369
 370 /*
 371  *      The VM system has a couple of heuristics for deciding
 372  *      that pages are "uninteresting" and should be placed
 373  *      on the inactive queue as likely candidates for replacement.
 374  *      These variables let the heuristics be controlled at run-time
 375  *      to make experimentation easier.
 376  */
 377
 378 boolean_t vm_page_deactivate_hint = TRUE;
 379
 380 struct vm_page_stats_reusable vm_page_stats_reusable;
 381
 382 /*
 383  *      vm_set_page_size:
 384  *
 385  *      Sets the page size, perhaps based upon the memory
 386  *      size.  Must be called before any use of page-size
 387  *      dependent functions.
 388  *
 389  *      Sets page_shift and page_mask from page_size.
 390  */
 391 void
 392 vm_set_page_size(void)
 393 {
 394         page_size  = PAGE_SIZE;
 395         page_mask  = PAGE_MASK;
 396         page_shift = PAGE_SHIFT;
 397
 398         if ((page_mask & page_size) != 0)
 399                 panic("vm_set_page_size: page size not a power of two");
 400
 401         for (page_shift = 0; ; page_shift++)
 402                 if ((1U << page_shift) == page_size)
 403                         break;
 404 }
 405
 406 #define COLOR_GROUPS_TO_STEAL   4
 407
 408
 409 /* Called once during statup, once the cache geometry is known.
 410  */
 411 static void
 412 vm_page_set_colors( void )
 413 {
 414         unsigned int    n, override;
 415
 416         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 417                 n = override;
 418         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 419                 n = vm_cache_geometry_colors;
 420         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 421
 422         if ( n == 0 )
 423                 n = 1;
 424         if ( n > MAX_COLORS )
 425                 n = MAX_COLORS;
 426
 427         /* the count must be a power of 2  */
 428         if ( ( n & (n - 1)) != 0  )
 429                 panic("vm_page_set_colors");
 430
 431         vm_colors = n;
 432         vm_color_mask = n - 1;
 433
 434         vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
 435 }
 436
 437
 438 lck_grp_t               vm_page_lck_grp_free;
 439 lck_grp_t               vm_page_lck_grp_queue;
 440 lck_grp_t               vm_page_lck_grp_local;
 441 lck_grp_t               vm_page_lck_grp_purge;
 442 lck_grp_t               vm_page_lck_grp_alloc;
 443 lck_grp_t               vm_page_lck_grp_bucket;
 444 lck_grp_attr_t          vm_page_lck_grp_attr;
 445 lck_attr_t              vm_page_lck_attr;
 446
 447
 448 __private_extern__ void
 449 vm_page_init_lck_grp(void)
 450 {
 451         /*
 452          * initialze the vm_page lock world
 453          */
 454         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 455         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 456         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 457         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 458         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 459         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 460         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 461         lck_attr_setdefault(&vm_page_lck_attr);
 462         lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 463
 464         vm_compressor_init_locks();
 465 }
 466
 467 void
 468 vm_page_init_local_q()
 469 {
 470         unsigned int            num_cpus;
 471         unsigned int            i;
 472         struct vplq             *t_local_q;
 473
 474         num_cpus = ml_get_max_cpus();
 475
 476         /*
 477          * no point in this for a uni-processor system
 478          */
 479         if (num_cpus >= 2) {
 480                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 481
 482                 for (i = 0; i < num_cpus; i++) {
 483                         struct vpl      *lq;
 484
 485                         lq = &t_local_q[i].vpl_un.vpl;
 486                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 487                         queue_init(&lq->vpl_queue);
 488                         lq->vpl_count = 0;
 489                         lq->vpl_internal_count = 0;
 490                         lq->vpl_external_count = 0;
 491                 }
 492                 vm_page_local_q_count = num_cpus;
 493
 494                 vm_page_local_q = (struct vplq *)t_local_q;
 495         }
 496 }
 497
 498
 499 /*
 500  *      vm_page_bootstrap:
 501  *
 502  *      Initializes the resident memory module.
 503  *
 504  *      Allocates memory for the page cells, and
 505  *      for the object/offset-to-page hash table headers.
 506  *      Each page cell is initialized and placed on the free list.
 507  *      Returns the range of available kernel virtual memory.
 508  */
 509
 510 void
 511 vm_page_bootstrap(
 512         vm_offset_t             *startp,
 513         vm_offset_t             *endp)
 514 {
 515         register vm_page_t      m;
 516         unsigned int            i;
 517         unsigned int            log1;
 518         unsigned int            log2;
 519         unsigned int            size;
 520
 521         /*
 522          *      Initialize the vm_page template.
 523          */
 524
 525         m = &vm_page_template;
 526         bzero(m, sizeof (*m));
 527
 528         m->pageq.next = NULL;
 529         m->pageq.prev = NULL;
 530         m->listq.next = NULL;
 531         m->listq.prev = NULL;
 532         m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
 533
 534         m->object = VM_OBJECT_NULL;             /* reset later */
 535         m->offset = (vm_object_offset_t) -1;    /* reset later */
 536
 537         m->wire_count = 0;
 538         m->local = FALSE;
 539         m->inactive = FALSE;
 540         m->active = FALSE;
 541         m->pageout_queue = FALSE;
 542         m->speculative = FALSE;
 543         m->laundry = FALSE;
 544         m->free = FALSE;
 545         m->reference = FALSE;
 546         m->gobbled = FALSE;
 547         m->private = FALSE;
 548         m->throttled = FALSE;
 549         m->__unused_pageq_bits = 0;
 550
 551         m->phys_page = 0;               /* reset later */
 552
 553         m->busy = TRUE;
 554         m->wanted = FALSE;
 555         m->tabled = FALSE;
 556         m->hashed = FALSE;
 557         m->fictitious = FALSE;
 558         m->pmapped = FALSE;
 559         m->wpmapped = FALSE;
 560         m->pageout = FALSE;
 561         m->absent = FALSE;
 562         m->error = FALSE;
 563         m->dirty = FALSE;
 564         m->cleaning = FALSE;
 565         m->precious = FALSE;
 566         m->clustered = FALSE;
 567         m->overwriting = FALSE;
 568         m->restart = FALSE;
 569         m->unusual = FALSE;
 570         m->encrypted = FALSE;
 571         m->encrypted_cleaning = FALSE;
 572         m->cs_validated = FALSE;
 573         m->cs_tainted = FALSE;
 574         m->no_cache = FALSE;
 575         m->reusable = FALSE;
 576         m->slid = FALSE;
 577         m->xpmapped = FALSE;
 578         m->compressor = FALSE;
 579         m->written_by_kernel = FALSE;
 580         m->__unused_object_bits = 0;
 581
 582         /*
 583          *      Initialize the page queues.
 584          */
 585         vm_page_init_lck_grp();
 586
 587         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 588         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 589         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 590
 591         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 592                 int group;
 593
 594                 purgeable_queues[i].token_q_head = 0;
 595                 purgeable_queues[i].token_q_tail = 0;
 596                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 597                         queue_init(&purgeable_queues[i].objq[group]);
 598
 599                 purgeable_queues[i].type = i;
 600                 purgeable_queues[i].new_pages = 0;
 601 #if MACH_ASSERT
 602                 purgeable_queues[i].debug_count_tokens = 0;
 603                 purgeable_queues[i].debug_count_objects = 0;
 604 #endif
 605         };
 606         purgeable_nonvolatile_count = 0;
 607         queue_init(&purgeable_nonvolatile_queue);
 608
 609         for (i = 0; i < MAX_COLORS; i++ )
 610                 queue_init(&vm_page_queue_free[i]);
 611
 612         queue_init(&vm_lopage_queue_free);
 613         queue_init(&vm_page_queue_active);
 614         queue_init(&vm_page_queue_inactive);
 615         queue_init(&vm_page_queue_cleaned);
 616         queue_init(&vm_page_queue_throttled);
 617         queue_init(&vm_page_queue_anonymous);
 618
 619         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 620                 queue_init(&vm_page_queue_speculative[i].age_q);
 621
 622                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 623                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 624         }
 625         vm_page_free_wanted = 0;
 626         vm_page_free_wanted_privileged = 0;
 627
 628         vm_page_set_colors();
 629
 630
 631         /*
 632          *      Steal memory for the map and zone subsystems.
 633          */
 634         kernel_debug_string("zone_steal_memory");
 635         zone_steal_memory();
 636         kernel_debug_string("vm_map_steal_memory");
 637         vm_map_steal_memory();
 638
 639         /*
 640          *      Allocate (and initialize) the virtual-to-physical
 641          *      table hash buckets.
 642          *
 643          *      The number of buckets should be a power of two to
 644          *      get a good hash function.  The following computation
 645          *      chooses the first power of two that is greater
 646          *      than the number of physical pages in the system.
 647          */
 648
 649         if (vm_page_bucket_count == 0) {
 650                 unsigned int npages = pmap_free_pages();
 651
 652                 vm_page_bucket_count = 1;
 653                 while (vm_page_bucket_count < npages)
 654                         vm_page_bucket_count <<= 1;
 655         }
 656         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 657
 658         vm_page_hash_mask = vm_page_bucket_count - 1;
 659
 660         /*
 661          *      Calculate object shift value for hashing algorithm:
 662          *              O = log2(sizeof(struct vm_object))
 663          *              B = log2(vm_page_bucket_count)
 664          *              hash shifts the object left by
 665          *              B/2 - O
 666          */
 667         size = vm_page_bucket_count;
 668         for (log1 = 0; size > 1; log1++)
 669                 size /= 2;
 670         size = sizeof(struct vm_object);
 671         for (log2 = 0; size > 1; log2++)
 672                 size /= 2;
 673         vm_page_hash_shift = log1/2 - log2 + 1;
 674
 675         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 676         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 677         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 678
 679         if (vm_page_hash_mask & vm_page_bucket_count)
 680                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 681
 682 #if VM_PAGE_BUCKETS_CHECK
 683 #if VM_PAGE_FAKE_BUCKETS
 684         /*
 685          * Allocate a decoy set of page buckets, to detect
 686          * any stomping there.
 687          */
 688         vm_page_fake_buckets = (vm_page_bucket_t *)
 689                 pmap_steal_memory(vm_page_bucket_count *
 690                                   sizeof(vm_page_bucket_t));
 691         vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
 692         vm_page_fake_buckets_end =
 693                 vm_map_round_page((vm_page_fake_buckets_start +
 694                                    (vm_page_bucket_count *
 695                                     sizeof (vm_page_bucket_t))),
 696                                   PAGE_MASK);
 697         char *cp;
 698         for (cp = (char *)vm_page_fake_buckets_start;
 699              cp < (char *)vm_page_fake_buckets_end;
 700              cp++) {
 701                 *cp = 0x5a;
 702         }
 703 #endif /* VM_PAGE_FAKE_BUCKETS */
 704 #endif /* VM_PAGE_BUCKETS_CHECK */
 705
 706         kernel_debug_string("vm_page_buckets");
 707         vm_page_buckets = (vm_page_bucket_t *)
 708                 pmap_steal_memory(vm_page_bucket_count *
 709                                   sizeof(vm_page_bucket_t));
 710
 711         kernel_debug_string("vm_page_bucket_locks");
 712         vm_page_bucket_locks = (lck_spin_t *)
 713                 pmap_steal_memory(vm_page_bucket_lock_count *
 714                                   sizeof(lck_spin_t));
 715
 716         for (i = 0; i < vm_page_bucket_count; i++) {
 717                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 718
 719                 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
 720 #if     MACH_PAGE_HASH_STATS
 721                 bucket->cur_count = 0;
 722                 bucket->hi_count = 0;
 723 #endif /* MACH_PAGE_HASH_STATS */
 724         }
 725
 726         for (i = 0; i < vm_page_bucket_lock_count; i++)
 727                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 728
 729 #if VM_PAGE_BUCKETS_CHECK
 730         vm_page_buckets_check_ready = TRUE;
 731 #endif /* VM_PAGE_BUCKETS_CHECK */
 732
 733         /*
 734          *      Machine-dependent code allocates the resident page table.
 735          *      It uses vm_page_init to initialize the page frames.
 736          *      The code also returns to us the virtual space available
 737          *      to the kernel.  We don't trust the pmap module
 738          *      to get the alignment right.
 739          */
 740
 741         kernel_debug_string("pmap_startup");
 742         pmap_startup(&virtual_space_start, &virtual_space_end);
 743         virtual_space_start = round_page(virtual_space_start);
 744         virtual_space_end = trunc_page(virtual_space_end);
 745
 746         *startp = virtual_space_start;
 747         *endp = virtual_space_end;
 748
 749         /*
 750          *      Compute the initial "wire" count.
 751          *      Up until now, the pages which have been set aside are not under
 752          *      the VM system's control, so although they aren't explicitly
 753          *      wired, they nonetheless can't be moved. At this moment,
 754          *      all VM managed pages are "free", courtesy of pmap_startup.
 755          */
 756         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 757         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 758         vm_page_wire_count_initial = vm_page_wire_count;
 759
 760         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 761                vm_page_free_count, vm_page_wire_count);
 762
 763         kernel_debug_string("vm_page_bootstrap complete");
 764         simple_lock_init(&vm_paging_lock, 0);
 765 }
 766
 767 #ifndef MACHINE_PAGES
 768 /*
 769  *      We implement pmap_steal_memory and pmap_startup with the help
 770  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 771  */
 772
 773 void *
 774 pmap_steal_memory(
 775         vm_size_t size)
 776 {
 777         vm_offset_t addr, vaddr;
 778         ppnum_t phys_page;
 779
 780         /*
 781          *      We round the size to a round multiple.
 782          */
 783
 784         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 785
 786         /*
 787          *      If this is the first call to pmap_steal_memory,
 788          *      we have to initialize ourself.
 789          */
 790
 791         if (virtual_space_start == virtual_space_end) {
 792                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 793
 794                 /*
 795                  *      The initial values must be aligned properly, and
 796                  *      we don't trust the pmap module to do it right.
 797                  */
 798
 799                 virtual_space_start = round_page(virtual_space_start);
 800                 virtual_space_end = trunc_page(virtual_space_end);
 801         }
 802
 803         /*
 804          *      Allocate virtual memory for this request.
 805          */
 806
 807         addr = virtual_space_start;
 808         virtual_space_start += size;
 809
 810         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
 811
 812         /*
 813          *      Allocate and map physical pages to back new virtual pages.
 814          */
 815
 816         for (vaddr = round_page(addr);
 817              vaddr < addr + size;
 818              vaddr += PAGE_SIZE) {
 819
 820                 if (!pmap_next_page_hi(&phys_page))
 821                         panic("pmap_steal_memory");
 822
 823                 /*
 824                  *      XXX Logically, these mappings should be wired,
 825                  *      but some pmap modules barf if they are.
 826                  */
 827 #if defined(__LP64__)
 828                 pmap_pre_expand(kernel_pmap, vaddr);
 829 #endif
 830
 831                 pmap_enter(kernel_pmap, vaddr, phys_page,
 832                            VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
 833                                 VM_WIMG_USE_DEFAULT, FALSE);
 834                 /*
 835                  * Account for newly stolen memory
 836                  */
 837                 vm_page_wire_count++;
 838
 839         }
 840
 841         return (void *) addr;
 842 }
 843
 844 void vm_page_release_startup(vm_page_t mem);
 845 void
 846 pmap_startup(
 847         vm_offset_t *startp,
 848         vm_offset_t *endp)
 849 {
 850         unsigned int i, npages, pages_initialized, fill, fillval;
 851         ppnum_t         phys_page;
 852         addr64_t        tmpaddr;
 853
 854
 855 #if    defined(__LP64__)
 856         /*
 857          * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
 858          */
 859         assert(sizeof(struct vm_page) == 64);
 860
 861         /*
 862          * make sure we are aligned on a 64 byte boundary
 863          * for VM_PAGE_PACK_PTR (it clips off the low-order
 864          * 6 bits of the pointer)
 865          */
 866         if (virtual_space_start != virtual_space_end)
 867                 virtual_space_start = round_page(virtual_space_start);
 868 #endif
 869
 870         /*
 871          *      We calculate how many page frames we will have
 872          *      and then allocate the page structures in one chunk.
 873          */
 874
 875         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 876         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 877         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 878
 879         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 880
 881         /*
 882          *      Initialize the page frames.
 883          */
 884         kernel_debug_string("Initialize the page frames");
 885         for (i = 0, pages_initialized = 0; i < npages; i++) {
 886                 if (!pmap_next_page(&phys_page))
 887                         break;
 888                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
 889                         vm_page_lowest = phys_page;
 890
 891                 vm_page_init(&vm_pages[i], phys_page, FALSE);
 892                 vm_page_pages++;
 893                 pages_initialized++;
 894         }
 895         vm_pages_count = pages_initialized;
 896
 897 #if    defined(__LP64__)
 898
 899         if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
 900                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
 901
 902         if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
 903                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
 904 #endif
 905         kernel_debug_string("page fill/release");
 906         /*
 907          * Check if we want to initialize pages to a known value
 908          */
 909         fill = 0;                                                               /* Assume no fill */
 910         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 911 #if     DEBUG
 912         /* This slows down booting the DEBUG kernel, particularly on
 913          * large memory systems, but is worthwhile in deterministically
 914          * trapping uninitialized memory usage.
 915          */
 916         if (fill == 0) {
 917                 fill = 1;
 918                 fillval = 0xDEB8F177;
 919         }
 920 #endif
 921         if (fill)
 922                 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
 923         // -debug code remove
 924         if (2 == vm_himemory_mode) {
 925                 // free low -> high so high is preferred
 926                 for (i = 1; i <= pages_initialized; i++) {
 927                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 928                         vm_page_release_startup(&vm_pages[i - 1]);
 929                 }
 930         }
 931         else
 932         // debug code remove-
 933
 934         /*
 935          * Release pages in reverse order so that physical pages
 936          * initially get allocated in ascending addresses. This keeps
 937          * the devices (which must address physical memory) happy if
 938          * they require several consecutive pages.
 939          */
 940         for (i = pages_initialized; i > 0; i--) {
 941                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 942                 vm_page_release_startup(&vm_pages[i - 1]);
 943         }
 944
 945         VM_CHECK_MEMORYSTATUS;
 946
 947 #if 0
 948         {
 949                 vm_page_t xx, xxo, xxl;
 950                 int i, j, k, l;
 951
 952                 j = 0;                                                                                                  /* (BRINGUP) */
 953                 xxl = 0;
 954
 955                 for( i = 0; i < vm_colors; i++ ) {
 956                         queue_iterate(&vm_page_queue_free[i],
 957                                       xx,
 958                                       vm_page_t,
 959                                       pageq) {  /* BRINGUP */
 960                                 j++;                                                                                            /* (BRINGUP) */
 961                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 962                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 963                                 }
 964
 965                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 966                                 k = 0;                                                                                          /* (BRINGUP) */
 967
 968                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 969
 970                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 971                                         k++;
 972                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 973                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 974                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 975                                         }
 976                                 }
 977
 978                                 xxl = xx;
 979                         }
 980                 }
 981
 982                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 983                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 984                 }
 985         }
 986 #endif
 987
 988
 989         /*
 990          *      We have to re-align virtual_space_start,
 991          *      because pmap_steal_memory has been using it.
 992          */
 993
 994         virtual_space_start = round_page(virtual_space_start);
 995
 996         *startp = virtual_space_start;
 997         *endp = virtual_space_end;
 998 }
 999 #endif  /* MACHINE_PAGES */
1000
1001 /*
1002  *      Routine:        vm_page_module_init
1003  *      Purpose:
1004  *              Second initialization pass, to be done after
1005  *              the basic VM system is ready.
1006  */
1007 void
1008 vm_page_module_init(void)
1009 {
1010         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1011                              0, PAGE_SIZE, "vm pages");
1012
1013 #if     ZONE_DEBUG
1014         zone_debug_disable(vm_page_zone);
1015 #endif  /* ZONE_DEBUG */
1016
1017         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1018         zone_change(vm_page_zone, Z_EXPAND, FALSE);
1019         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1020         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1021         zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1022         /*
1023          * Adjust zone statistics to account for the real pages allocated
1024          * in vm_page_create(). [Q: is this really what we want?]
1025          */
1026         vm_page_zone->count += vm_page_pages;
1027         vm_page_zone->sum_count += vm_page_pages;
1028         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1029 }
1030
1031 /*
1032  *      Routine:        vm_page_create
1033  *      Purpose:
1034  *              After the VM system is up, machine-dependent code
1035  *              may stumble across more physical memory.  For example,
1036  *              memory that it was reserving for a frame buffer.
1037  *              vm_page_create turns this memory into available pages.
1038  */
1039
1040 void
1041 vm_page_create(
1042         ppnum_t start,
1043         ppnum_t end)
1044 {
1045         ppnum_t         phys_page;
1046         vm_page_t       m;
1047
1048         for (phys_page = start;
1049              phys_page < end;
1050              phys_page++) {
1051                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1052                         == VM_PAGE_NULL)
1053                         vm_page_more_fictitious();
1054
1055                 m->fictitious = FALSE;
1056                 pmap_clear_noencrypt(phys_page);
1057
1058                 vm_page_pages++;
1059                 vm_page_release(m);
1060         }
1061 }
1062
1063 /*
1064  *      vm_page_hash:
1065  *
1066  *      Distributes the object/offset key pair among hash buckets.
1067  *
1068  *      NOTE:   The bucket count must be a power of 2
1069  */
1070 #define vm_page_hash(object, offset) (\
1071         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1072          & vm_page_hash_mask)
1073
1074
1075 /*
1076  *      vm_page_insert:         [ internal use only ]
1077  *
1078  *      Inserts the given mem entry into the object/object-page
1079  *      table and object list.
1080  *
1081  *      The object must be locked.
1082  */
1083 void
1084 vm_page_insert(
1085         vm_page_t               mem,
1086         vm_object_t             object,
1087         vm_object_offset_t      offset)
1088 {
1089         vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1090 }
1091
1092 void
1093 vm_page_insert_internal(
1094         vm_page_t               mem,
1095         vm_object_t             object,
1096         vm_object_offset_t      offset,
1097         boolean_t               queues_lock_held,
1098         boolean_t               insert_in_hash,
1099         boolean_t               batch_pmap_op)
1100 {
1101         vm_page_bucket_t        *bucket;
1102         lck_spin_t              *bucket_lock;
1103         int                     hash_id;
1104         task_t                  owner;
1105
1106         XPR(XPR_VM_PAGE,
1107                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1108                 object, offset, mem, 0,0);
1109 #if 0
1110         /*
1111          * we may not hold the page queue lock
1112          * so this check isn't safe to make
1113          */
1114         VM_PAGE_CHECK(mem);
1115 #endif
1116
1117         assert(page_aligned(offset));
1118
1119         /* the vm_submap_object is only a placeholder for submaps */
1120         assert(object != vm_submap_object);
1121
1122         vm_object_lock_assert_exclusive(object);
1123 #if DEBUG
1124         lck_mtx_assert(&vm_page_queue_lock,
1125                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1126                                         : LCK_MTX_ASSERT_NOTOWNED);
1127 #endif  /* DEBUG */
1128
1129         if (insert_in_hash == TRUE) {
1130 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1131                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1132                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1133                               "already in (obj=%p,off=0x%llx)",
1134                               mem, object, offset, mem->object, mem->offset);
1135 #endif
1136                 assert(!object->internal || offset < object->vo_size);
1137
1138                 /* only insert "pageout" pages into "pageout" objects,
1139                  * and normal pages into normal objects */
1140                 assert(object->pageout == mem->pageout);
1141
1142                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1143
1144                 /*
1145                  *      Record the object/offset pair in this page
1146                  */
1147
1148                 mem->object = object;
1149                 mem->offset = offset;
1150
1151                 /*
1152                  *      Insert it into the object_object/offset hash table
1153                  */
1154                 hash_id = vm_page_hash(object, offset);
1155                 bucket = &vm_page_buckets[hash_id];
1156                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1157
1158                 lck_spin_lock(bucket_lock);
1159
1160                 mem->next_m = bucket->page_list;
1161                 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1162                 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1163
1164 #if     MACH_PAGE_HASH_STATS
1165                 if (++bucket->cur_count > bucket->hi_count)
1166                         bucket->hi_count = bucket->cur_count;
1167 #endif /* MACH_PAGE_HASH_STATS */
1168                 mem->hashed = TRUE;
1169                 lck_spin_unlock(bucket_lock);
1170         }
1171
1172         {
1173                 unsigned int    cache_attr;
1174
1175                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1176
1177                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1178                         PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1179                 }
1180         }
1181         /*
1182          *      Now link into the object's list of backed pages.
1183          */
1184         VM_PAGE_INSERT(mem, object);
1185         mem->tabled = TRUE;
1186
1187         /*
1188          *      Show that the object has one more resident page.
1189          */
1190
1191         object->resident_page_count++;
1192         if (VM_PAGE_WIRED(mem)) {
1193                 object->wired_page_count++;
1194         }
1195         assert(object->resident_page_count >= object->wired_page_count);
1196
1197         if (object->internal) {
1198                 OSAddAtomic(1, &vm_page_internal_count);
1199         } else {
1200                 OSAddAtomic(1, &vm_page_external_count);
1201         }
1202
1203         /*
1204          * It wouldn't make sense to insert a "reusable" page in
1205          * an object (the page would have been marked "reusable" only
1206          * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1207          * in the object at that time).
1208          * But a page could be inserted in a "all_reusable" object, if
1209          * something faults it in (a vm_read() from another task or a
1210          * "use-after-free" issue in user space, for example).  It can
1211          * also happen if we're relocating a page from that object to
1212          * a different physical page during a physically-contiguous
1213          * allocation.
1214          */
1215         assert(!mem->reusable);
1216         if (mem->object->all_reusable) {
1217                 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1218         }
1219
1220         if (object->purgable == VM_PURGABLE_DENY) {
1221                 owner = TASK_NULL;
1222         } else {
1223                 owner = object->vo_purgeable_owner;
1224         }
1225         if (owner &&
1226             (object->purgable == VM_PURGABLE_NONVOLATILE ||
1227              VM_PAGE_WIRED(mem))) {
1228                 /* more non-volatile bytes */
1229                 ledger_credit(owner->ledger,
1230                               task_ledgers.purgeable_nonvolatile,
1231                               PAGE_SIZE);
1232                 /* more footprint */
1233                 ledger_credit(owner->ledger,
1234                               task_ledgers.phys_footprint,
1235                               PAGE_SIZE);
1236
1237         } else if (owner &&
1238                    (object->purgable == VM_PURGABLE_VOLATILE ||
1239                     object->purgable == VM_PURGABLE_EMPTY)) {
1240                 assert(! VM_PAGE_WIRED(mem));
1241                 /* more volatile bytes */
1242                 ledger_credit(owner->ledger,
1243                               task_ledgers.purgeable_volatile,
1244                               PAGE_SIZE);
1245         }
1246
1247         if (object->purgable == VM_PURGABLE_VOLATILE) {
1248                 if (VM_PAGE_WIRED(mem)) {
1249                         OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1250                 } else {
1251                         OSAddAtomic(+1, &vm_page_purgeable_count);
1252                 }
1253         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1254                    mem->throttled) {
1255                 /*
1256                  * This page belongs to a purged VM object but hasn't
1257                  * been purged (because it was "busy").
1258                  * It's in the "throttled" queue and hence not
1259                  * visible to vm_pageout_scan().  Move it to a pageable
1260                  * queue, so that it can eventually be reclaimed, instead
1261                  * of lingering in the "empty" object.
1262                  */
1263                 if (queues_lock_held == FALSE)
1264                         vm_page_lockspin_queues();
1265                 vm_page_deactivate(mem);
1266                 if (queues_lock_held == FALSE)
1267                         vm_page_unlock_queues();
1268         }
1269
1270 #if VM_OBJECT_TRACKING_OP_MODIFIED
1271         if (vm_object_tracking_inited &&
1272             object->internal &&
1273             object->resident_page_count == 0 &&
1274             object->pager == NULL &&
1275             object->shadow != NULL &&
1276             object->shadow->copy == object) {
1277                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1278                 int numsaved = 0;
1279
1280                 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1281                 btlog_add_entry(vm_object_tracking_btlog,
1282                                 object,
1283                                 VM_OBJECT_TRACKING_OP_MODIFIED,
1284                                 bt,
1285                                 numsaved);
1286         }
1287 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1288 }
1289
1290 /*
1291  *      vm_page_replace:
1292  *
1293  *      Exactly like vm_page_insert, except that we first
1294  *      remove any existing page at the given offset in object.
1295  *
1296  *      The object must be locked.
1297  */
1298 void
1299 vm_page_replace(
1300         register vm_page_t              mem,
1301         register vm_object_t            object,
1302         register vm_object_offset_t     offset)
1303 {
1304         vm_page_bucket_t *bucket;
1305         vm_page_t        found_m = VM_PAGE_NULL;
1306         lck_spin_t      *bucket_lock;
1307         int             hash_id;
1308
1309 #if 0
1310         /*
1311          * we don't hold the page queue lock
1312          * so this check isn't safe to make
1313          */
1314         VM_PAGE_CHECK(mem);
1315 #endif
1316         vm_object_lock_assert_exclusive(object);
1317 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1318         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1319                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1320                       "already in (obj=%p,off=0x%llx)",
1321                       mem, object, offset, mem->object, mem->offset);
1322         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1323 #endif
1324         /*
1325          *      Record the object/offset pair in this page
1326          */
1327
1328         mem->object = object;
1329         mem->offset = offset;
1330
1331         /*
1332          *      Insert it into the object_object/offset hash table,
1333          *      replacing any page that might have been there.
1334          */
1335
1336         hash_id = vm_page_hash(object, offset);
1337         bucket = &vm_page_buckets[hash_id];
1338         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1339
1340         lck_spin_lock(bucket_lock);
1341
1342         if (bucket->page_list) {
1343                 vm_page_packed_t *mp = &bucket->page_list;
1344                 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1345
1346                 do {
1347                         if (m->object == object && m->offset == offset) {
1348                                 /*
1349                                  * Remove old page from hash list
1350                                  */
1351                                 *mp = m->next_m;
1352                                 m->hashed = FALSE;
1353
1354                                 found_m = m;
1355                                 break;
1356                         }
1357                         mp = &m->next_m;
1358                 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1359
1360                 mem->next_m = bucket->page_list;
1361         } else {
1362                 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1363         }
1364         /*
1365          * insert new page at head of hash list
1366          */
1367         bucket->page_list = VM_PAGE_PACK_PTR(mem);
1368         mem->hashed = TRUE;
1369
1370         lck_spin_unlock(bucket_lock);
1371
1372         if (found_m) {
1373                 /*
1374                  * there was already a page at the specified
1375                  * offset for this object... remove it from
1376                  * the object and free it back to the free list
1377                  */
1378                 vm_page_free_unlocked(found_m, FALSE);
1379         }
1380         vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1381 }
1382
1383 /*
1384  *      vm_page_remove:         [ internal use only ]
1385  *
1386  *      Removes the given mem entry from the object/offset-page
1387  *      table and the object page list.
1388  *
1389  *      The object must be locked.
1390  */
1391
1392 void
1393 vm_page_remove(
1394         vm_page_t       mem,
1395         boolean_t       remove_from_hash)
1396 {
1397         vm_page_bucket_t *bucket;
1398         vm_page_t       this;
1399         lck_spin_t      *bucket_lock;
1400         int             hash_id;
1401         task_t          owner;
1402
1403         XPR(XPR_VM_PAGE,
1404                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1405                 mem->object, mem->offset,
1406                 mem, 0,0);
1407
1408         vm_object_lock_assert_exclusive(mem->object);
1409         assert(mem->tabled);
1410         assert(!mem->cleaning);
1411         assert(!mem->laundry);
1412 #if 0
1413         /*
1414          * we don't hold the page queue lock
1415          * so this check isn't safe to make
1416          */
1417         VM_PAGE_CHECK(mem);
1418 #endif
1419         if (remove_from_hash == TRUE) {
1420                 /*
1421                  *      Remove from the object_object/offset hash table
1422                  */
1423                 hash_id = vm_page_hash(mem->object, mem->offset);
1424                 bucket = &vm_page_buckets[hash_id];
1425                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1426
1427                 lck_spin_lock(bucket_lock);
1428
1429                 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1430                         /* optimize for common case */
1431
1432                         bucket->page_list = mem->next_m;
1433                 } else {
1434                         vm_page_packed_t        *prev;
1435
1436                         for (prev = &this->next_m;
1437                              (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1438                              prev = &this->next_m)
1439                                 continue;
1440                         *prev = this->next_m;
1441                 }
1442 #if     MACH_PAGE_HASH_STATS
1443                 bucket->cur_count--;
1444 #endif /* MACH_PAGE_HASH_STATS */
1445                 mem->hashed = FALSE;
1446                 lck_spin_unlock(bucket_lock);
1447         }
1448         /*
1449          *      Now remove from the object's list of backed pages.
1450          */
1451
1452         VM_PAGE_REMOVE(mem);
1453
1454         /*
1455          *      And show that the object has one fewer resident
1456          *      page.
1457          */
1458
1459         assert(mem->object->resident_page_count > 0);
1460         mem->object->resident_page_count--;
1461
1462         if (mem->object->internal) {
1463 #if DEBUG
1464                 assert(vm_page_internal_count);
1465 #endif /* DEBUG */
1466
1467                 OSAddAtomic(-1, &vm_page_internal_count);
1468         } else {
1469                 assert(vm_page_external_count);
1470                 OSAddAtomic(-1, &vm_page_external_count);
1471
1472                 if (mem->xpmapped) {
1473                         assert(vm_page_xpmapped_external_count);
1474                         OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1475                 }
1476         }
1477         if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1478                 if (mem->object->resident_page_count == 0)
1479                         vm_object_cache_remove(mem->object);
1480         }
1481
1482         if (VM_PAGE_WIRED(mem)) {
1483                 assert(mem->object->wired_page_count > 0);
1484                 mem->object->wired_page_count--;
1485         }
1486         assert(mem->object->resident_page_count >=
1487                mem->object->wired_page_count);
1488         if (mem->reusable) {
1489                 assert(mem->object->reusable_page_count > 0);
1490                 mem->object->reusable_page_count--;
1491                 assert(mem->object->reusable_page_count <=
1492                        mem->object->resident_page_count);
1493                 mem->reusable = FALSE;
1494                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1495                 vm_page_stats_reusable.reused_remove++;
1496         } else if (mem->object->all_reusable) {
1497                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1498                 vm_page_stats_reusable.reused_remove++;
1499         }
1500
1501         if (mem->object->purgable == VM_PURGABLE_DENY) {
1502                 owner = TASK_NULL;
1503         } else {
1504                 owner = mem->object->vo_purgeable_owner;
1505         }
1506         if (owner &&
1507             (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1508              VM_PAGE_WIRED(mem))) {
1509                 /* less non-volatile bytes */
1510                 ledger_debit(owner->ledger,
1511                              task_ledgers.purgeable_nonvolatile,
1512                              PAGE_SIZE);
1513                 /* less footprint */
1514                 ledger_debit(owner->ledger,
1515                              task_ledgers.phys_footprint,
1516                              PAGE_SIZE);
1517         } else if (owner &&
1518                    (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1519                     mem->object->purgable == VM_PURGABLE_EMPTY)) {
1520                 assert(! VM_PAGE_WIRED(mem));
1521                 /* less volatile bytes */
1522                 ledger_debit(owner->ledger,
1523                              task_ledgers.purgeable_volatile,
1524                              PAGE_SIZE);
1525         }
1526         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1527                 if (VM_PAGE_WIRED(mem)) {
1528                         assert(vm_page_purgeable_wired_count > 0);
1529                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1530                 } else {
1531                         assert(vm_page_purgeable_count > 0);
1532                         OSAddAtomic(-1, &vm_page_purgeable_count);
1533                 }
1534         }
1535         if (mem->object->set_cache_attr == TRUE)
1536                 pmap_set_cache_attributes(mem->phys_page, 0);
1537
1538         mem->tabled = FALSE;
1539         mem->object = VM_OBJECT_NULL;
1540         mem->offset = (vm_object_offset_t) -1;
1541 }
1542
1543
1544 /*
1545  *      vm_page_lookup:
1546  *
1547  *      Returns the page associated with the object/offset
1548  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1549  *
1550  *      The object must be locked.  No side effects.
1551  */
1552
1553 unsigned long vm_page_lookup_hint = 0;
1554 unsigned long vm_page_lookup_hint_next = 0;
1555 unsigned long vm_page_lookup_hint_prev = 0;
1556 unsigned long vm_page_lookup_hint_miss = 0;
1557 unsigned long vm_page_lookup_bucket_NULL = 0;
1558 unsigned long vm_page_lookup_miss = 0;
1559
1560
1561 vm_page_t
1562 vm_page_lookup(
1563         vm_object_t             object,
1564         vm_object_offset_t      offset)
1565 {
1566         vm_page_t       mem;
1567         vm_page_bucket_t *bucket;
1568         queue_entry_t   qe;
1569         lck_spin_t      *bucket_lock;
1570         int             hash_id;
1571
1572         vm_object_lock_assert_held(object);
1573         mem = object->memq_hint;
1574
1575         if (mem != VM_PAGE_NULL) {
1576                 assert(mem->object == object);
1577
1578                 if (mem->offset == offset) {
1579                         vm_page_lookup_hint++;
1580                         return mem;
1581                 }
1582                 qe = queue_next(&mem->listq);
1583
1584                 if (! queue_end(&object->memq, qe)) {
1585                         vm_page_t       next_page;
1586
1587                         next_page = (vm_page_t) qe;
1588                         assert(next_page->object == object);
1589
1590                         if (next_page->offset == offset) {
1591                                 vm_page_lookup_hint_next++;
1592                                 object->memq_hint = next_page; /* new hint */
1593                                 return next_page;
1594                         }
1595                 }
1596                 qe = queue_prev(&mem->listq);
1597
1598                 if (! queue_end(&object->memq, qe)) {
1599                         vm_page_t prev_page;
1600
1601                         prev_page = (vm_page_t) qe;
1602                         assert(prev_page->object == object);
1603
1604                         if (prev_page->offset == offset) {
1605                                 vm_page_lookup_hint_prev++;
1606                                 object->memq_hint = prev_page; /* new hint */
1607                                 return prev_page;
1608                         }
1609                 }
1610         }
1611         /*
1612          * Search the hash table for this object/offset pair
1613          */
1614         hash_id = vm_page_hash(object, offset);
1615         bucket = &vm_page_buckets[hash_id];
1616
1617         /*
1618          * since we hold the object lock, we are guaranteed that no
1619          * new pages can be inserted into this object... this in turn
1620          * guarantess that the page we're looking for can't exist
1621          * if the bucket it hashes to is currently NULL even when looked
1622          * at outside the scope of the hash bucket lock... this is a
1623          * really cheap optimiztion to avoid taking the lock
1624          */
1625         if (!bucket->page_list) {
1626                 vm_page_lookup_bucket_NULL++;
1627
1628                 return (VM_PAGE_NULL);
1629         }
1630         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1631
1632         lck_spin_lock(bucket_lock);
1633
1634         for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1635 #if 0
1636                 /*
1637                  * we don't hold the page queue lock
1638                  * so this check isn't safe to make
1639                  */
1640                 VM_PAGE_CHECK(mem);
1641 #endif
1642                 if ((mem->object == object) && (mem->offset == offset))
1643                         break;
1644         }
1645         lck_spin_unlock(bucket_lock);
1646
1647         if (mem != VM_PAGE_NULL) {
1648                 if (object->memq_hint != VM_PAGE_NULL) {
1649                         vm_page_lookup_hint_miss++;
1650                 }
1651                 assert(mem->object == object);
1652                 object->memq_hint = mem;
1653         } else
1654                 vm_page_lookup_miss++;
1655
1656         return(mem);
1657 }
1658
1659
1660 /*
1661  *      vm_page_rename:
1662  *
1663  *      Move the given memory entry from its
1664  *      current object to the specified target object/offset.
1665  *
1666  *      The object must be locked.
1667  */
1668 void
1669 vm_page_rename(
1670         register vm_page_t              mem,
1671         register vm_object_t            new_object,
1672         vm_object_offset_t              new_offset,
1673         boolean_t                       encrypted_ok)
1674 {
1675         boolean_t       internal_to_external, external_to_internal;
1676
1677         assert(mem->object != new_object);
1678
1679         /*
1680          * ENCRYPTED SWAP:
1681          * The encryption key is based on the page's memory object
1682          * (aka "pager") and paging offset.  Moving the page to
1683          * another VM object changes its "pager" and "paging_offset"
1684          * so it has to be decrypted first, or we would lose the key.
1685          *
1686          * One exception is VM object collapsing, where we transfer pages
1687          * from one backing object to its parent object.  This operation also
1688          * transfers the paging information, so the <pager,paging_offset> info
1689          * should remain consistent.  The caller (vm_object_do_collapse())
1690          * sets "encrypted_ok" in this case.
1691          */
1692         if (!encrypted_ok && mem->encrypted) {
1693                 panic("vm_page_rename: page %p is encrypted\n", mem);
1694         }
1695
1696         XPR(XPR_VM_PAGE,
1697                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1698                 new_object, new_offset,
1699                 mem, 0,0);
1700
1701         /*
1702          *      Changes to mem->object require the page lock because
1703          *      the pageout daemon uses that lock to get the object.
1704          */
1705         vm_page_lockspin_queues();
1706
1707         internal_to_external = FALSE;
1708         external_to_internal = FALSE;
1709
1710         if (mem->local) {
1711                 /*
1712                  * it's much easier to get the vm_page_pageable_xxx accounting correct
1713                  * if we first move the page to the active queue... it's going to end
1714                  * up there anyway, and we don't do vm_page_rename's frequently enough
1715                  * for this to matter.
1716                  */
1717                 VM_PAGE_QUEUES_REMOVE(mem);
1718                 vm_page_activate(mem);
1719         }
1720         if (mem->active || mem->inactive || mem->speculative) {
1721                 if (mem->object->internal && !new_object->internal) {
1722                         internal_to_external = TRUE;
1723                 }
1724                 if (!mem->object->internal && new_object->internal) {
1725                         external_to_internal = TRUE;
1726                 }
1727         }
1728
1729         vm_page_remove(mem, TRUE);
1730         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1731
1732         if (internal_to_external) {
1733                 vm_page_pageable_internal_count--;
1734                 vm_page_pageable_external_count++;
1735         } else if (external_to_internal) {
1736                 vm_page_pageable_external_count--;
1737                 vm_page_pageable_internal_count++;
1738         }
1739
1740         vm_page_unlock_queues();
1741 }
1742
1743 /*
1744  *      vm_page_init:
1745  *
1746  *      Initialize the fields in a new page.
1747  *      This takes a structure with random values and initializes it
1748  *      so that it can be given to vm_page_release or vm_page_insert.
1749  */
1750 void
1751 vm_page_init(
1752         vm_page_t       mem,
1753         ppnum_t         phys_page,
1754         boolean_t       lopage)
1755 {
1756         assert(phys_page);
1757
1758 #if     DEBUG
1759         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1760                 if (!(pmap_valid_page(phys_page))) {
1761                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1762                 }
1763         }
1764 #endif
1765         *mem = vm_page_template;
1766         mem->phys_page = phys_page;
1767 #if 0
1768         /*
1769          * we're leaving this turned off for now... currently pages
1770          * come off the free list and are either immediately dirtied/referenced
1771          * due to zero-fill or COW faults, or are used to read or write files...
1772          * in the file I/O case, the UPL mechanism takes care of clearing
1773          * the state of the HW ref/mod bits in a somewhat fragile way.
1774          * Since we may change the way this works in the future (to toughen it up),
1775          * I'm leaving this as a reminder of where these bits could get cleared
1776          */
1777
1778         /*
1779          * make sure both the h/w referenced and modified bits are
1780          * clear at this point... we are especially dependent on
1781          * not finding a 'stale' h/w modified in a number of spots
1782          * once this page goes back into use
1783          */
1784         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1785 #endif
1786         mem->lopage = lopage;
1787 }
1788
1789 /*
1790  *      vm_page_grab_fictitious:
1791  *
1792  *      Remove a fictitious page from the free list.
1793  *      Returns VM_PAGE_NULL if there are no free pages.
1794  */
1795 int     c_vm_page_grab_fictitious = 0;
1796 int     c_vm_page_grab_fictitious_failed = 0;
1797 int     c_vm_page_release_fictitious = 0;
1798 int     c_vm_page_more_fictitious = 0;
1799
1800 vm_page_t
1801 vm_page_grab_fictitious_common(
1802         ppnum_t phys_addr)
1803 {
1804         vm_page_t       m;
1805
1806         if ((m = (vm_page_t)zget(vm_page_zone))) {
1807
1808                 vm_page_init(m, phys_addr, FALSE);
1809                 m->fictitious = TRUE;
1810
1811                 c_vm_page_grab_fictitious++;
1812         } else
1813                 c_vm_page_grab_fictitious_failed++;
1814
1815         return m;
1816 }
1817
1818 vm_page_t
1819 vm_page_grab_fictitious(void)
1820 {
1821         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1822 }
1823
1824 vm_page_t
1825 vm_page_grab_guard(void)
1826 {
1827         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1828 }
1829
1830
1831 /*
1832  *      vm_page_release_fictitious:
1833  *
1834  *      Release a fictitious page to the zone pool
1835  */
1836 void
1837 vm_page_release_fictitious(
1838         vm_page_t m)
1839 {
1840         assert(!m->free);
1841         assert(m->fictitious);
1842         assert(m->phys_page == vm_page_fictitious_addr ||
1843                m->phys_page == vm_page_guard_addr);
1844
1845         c_vm_page_release_fictitious++;
1846
1847         zfree(vm_page_zone, m);
1848 }
1849
1850 /*
1851  *      vm_page_more_fictitious:
1852  *
1853  *      Add more fictitious pages to the zone.
1854  *      Allowed to block. This routine is way intimate
1855  *      with the zones code, for several reasons:
1856  *      1. we need to carve some page structures out of physical
1857  *         memory before zones work, so they _cannot_ come from
1858  *         the zone_map.
1859  *      2. the zone needs to be collectable in order to prevent
1860  *         growth without bound. These structures are used by
1861  *         the device pager (by the hundreds and thousands), as
1862  *         private pages for pageout, and as blocking pages for
1863  *         pagein. Temporary bursts in demand should not result in
1864  *         permanent allocation of a resource.
1865  *      3. To smooth allocation humps, we allocate single pages
1866  *         with kernel_memory_allocate(), and cram them into the
1867  *         zone.
1868  */
1869
1870 void vm_page_more_fictitious(void)
1871 {
1872         vm_offset_t     addr;
1873         kern_return_t   retval;
1874
1875         c_vm_page_more_fictitious++;
1876
1877         /*
1878          * Allocate a single page from the zone_map. Do not wait if no physical
1879          * pages are immediately available, and do not zero the space. We need
1880          * our own blocking lock here to prevent having multiple,
1881          * simultaneous requests from piling up on the zone_map lock. Exactly
1882          * one (of our) threads should be potentially waiting on the map lock.
1883          * If winner is not vm-privileged, then the page allocation will fail,
1884          * and it will temporarily block here in the vm_page_wait().
1885          */
1886         lck_mtx_lock(&vm_page_alloc_lock);
1887         /*
1888          * If another thread allocated space, just bail out now.
1889          */
1890         if (zone_free_count(vm_page_zone) > 5) {
1891                 /*
1892                  * The number "5" is a small number that is larger than the
1893                  * number of fictitious pages that any single caller will
1894                  * attempt to allocate. Otherwise, a thread will attempt to
1895                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1896                  * release all of the resources and locks already acquired,
1897                  * and then call this routine. This routine finds the pages
1898                  * that the caller released, so fails to allocate new space.
1899                  * The process repeats infinitely. The largest known number
1900                  * of fictitious pages required in this manner is 2. 5 is
1901                  * simply a somewhat larger number.
1902                  */
1903                 lck_mtx_unlock(&vm_page_alloc_lock);
1904                 return;
1905         }
1906
1907         retval = kernel_memory_allocate(zone_map,
1908                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1909                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1910         if (retval != KERN_SUCCESS) {
1911                 /*
1912                  * No page was available. Drop the
1913                  * lock to give another thread a chance at it, and
1914                  * wait for the pageout daemon to make progress.
1915                  */
1916                 lck_mtx_unlock(&vm_page_alloc_lock);
1917                 vm_page_wait(THREAD_UNINT);
1918                 return;
1919         }
1920
1921         /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1922         OSAddAtomic64(1, &(vm_page_zone->page_count));
1923
1924         zcram(vm_page_zone, addr, PAGE_SIZE);
1925
1926         lck_mtx_unlock(&vm_page_alloc_lock);
1927 }
1928
1929
1930 /*
1931  *      vm_pool_low():
1932  *
1933  *      Return true if it is not likely that a non-vm_privileged thread
1934  *      can get memory without blocking.  Advisory only, since the
1935  *      situation may change under us.
1936  */
1937 int
1938 vm_pool_low(void)
1939 {
1940         /* No locking, at worst we will fib. */
1941         return( vm_page_free_count <= vm_page_free_reserved );
1942 }
1943
1944
1945
1946 /*
1947  * this is an interface to support bring-up of drivers
1948  * on platforms with physical memory > 4G...
1949  */
1950 int             vm_himemory_mode = 2;
1951
1952
1953 /*
1954  * this interface exists to support hardware controllers
1955  * incapable of generating DMAs with more than 32 bits
1956  * of address on platforms with physical memory > 4G...
1957  */
1958 unsigned int    vm_lopages_allocated_q = 0;
1959 unsigned int    vm_lopages_allocated_cpm_success = 0;
1960 unsigned int    vm_lopages_allocated_cpm_failed = 0;
1961 queue_head_t    vm_lopage_queue_free;
1962
1963 vm_page_t
1964 vm_page_grablo(void)
1965 {
1966         vm_page_t       mem;
1967
1968         if (vm_lopage_needed == FALSE)
1969                 return (vm_page_grab());
1970
1971         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1972
1973         if ( !queue_empty(&vm_lopage_queue_free)) {
1974                 queue_remove_first(&vm_lopage_queue_free,
1975                                    mem,
1976                                    vm_page_t,
1977                                    pageq);
1978                 assert(vm_lopage_free_count);
1979
1980                 vm_lopage_free_count--;
1981                 vm_lopages_allocated_q++;
1982
1983                 if (vm_lopage_free_count < vm_lopage_lowater)
1984                         vm_lopage_refill = TRUE;
1985
1986                 lck_mtx_unlock(&vm_page_queue_free_lock);
1987         } else {
1988                 lck_mtx_unlock(&vm_page_queue_free_lock);
1989
1990                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1991
1992                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1993                         vm_lopages_allocated_cpm_failed++;
1994                         lck_mtx_unlock(&vm_page_queue_free_lock);
1995
1996                         return (VM_PAGE_NULL);
1997                 }
1998                 mem->busy = TRUE;
1999
2000                 vm_page_lockspin_queues();
2001
2002                 mem->gobbled = FALSE;
2003                 vm_page_gobble_count--;
2004                 vm_page_wire_count--;
2005
2006                 vm_lopages_allocated_cpm_success++;
2007                 vm_page_unlock_queues();
2008         }
2009         assert(mem->busy);
2010         assert(!mem->free);
2011         assert(!mem->pmapped);
2012         assert(!mem->wpmapped);
2013         assert(!pmap_is_noencrypt(mem->phys_page));
2014
2015         mem->pageq.next = NULL;
2016         mem->pageq.prev = NULL;
2017
2018         return (mem);
2019 }
2020
2021
2022 /*
2023  *      vm_page_grab:
2024  *
2025  *      first try to grab a page from the per-cpu free list...
2026  *      this must be done while pre-emption is disabled... if
2027  *      a page is available, we're done...
2028  *      if no page is available, grab the vm_page_queue_free_lock
2029  *      and see if current number of free pages would allow us
2030  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
2031  *      if there are pages available, disable preemption and
2032  *      recheck the state of the per-cpu free list... we could
2033  *      have been preempted and moved to a different cpu, or
2034  *      some other thread could have re-filled it... if still
2035  *      empty, figure out how many pages we can steal from the
2036  *      global free queue and move to the per-cpu queue...
2037  *      return 1 of these pages when done... only wakeup the
2038  *      pageout_scan thread if we moved pages from the global
2039  *      list... no need for the wakeup if we've satisfied the
2040  *      request from the per-cpu queue.
2041  */
2042
2043
2044 vm_page_t
2045 vm_page_grab( void )
2046 {
2047         vm_page_t       mem;
2048
2049
2050         disable_preemption();
2051
2052         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2053 return_page_from_cpu_list:
2054                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2055                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2056
2057                 enable_preemption();
2058                 mem->pageq.next = NULL;
2059
2060                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2061                 assert(mem->tabled == FALSE);
2062                 assert(mem->object == VM_OBJECT_NULL);
2063                 assert(!mem->laundry);
2064                 assert(!mem->free);
2065                 assert(pmap_verify_free(mem->phys_page));
2066                 assert(mem->busy);
2067                 assert(!mem->encrypted);
2068                 assert(!mem->pmapped);
2069                 assert(!mem->wpmapped);
2070                 assert(!mem->active);
2071                 assert(!mem->inactive);
2072                 assert(!mem->throttled);
2073                 assert(!mem->speculative);
2074                 assert(!pmap_is_noencrypt(mem->phys_page));
2075
2076                 return mem;
2077         }
2078         enable_preemption();
2079
2080
2081         /*
2082          *      Optionally produce warnings if the wire or gobble
2083          *      counts exceed some threshold.
2084          */
2085 #if VM_PAGE_WIRE_COUNT_WARNING
2086         if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2087                 printf("mk: vm_page_grab(): high wired page count of %d\n",
2088                         vm_page_wire_count);
2089         }
2090 #endif
2091 #if VM_PAGE_GOBBLE_COUNT_WARNING
2092         if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2093                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2094                         vm_page_gobble_count);
2095         }
2096 #endif
2097         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2098
2099         /*
2100          *      Only let privileged threads (involved in pageout)
2101          *      dip into the reserved pool.
2102          */
2103         if ((vm_page_free_count < vm_page_free_reserved) &&
2104             !(current_thread()->options & TH_OPT_VMPRIV)) {
2105                 lck_mtx_unlock(&vm_page_queue_free_lock);
2106                 mem = VM_PAGE_NULL;
2107         }
2108         else {
2109                vm_page_t        head;
2110                vm_page_t        tail;
2111                unsigned int     pages_to_steal;
2112                unsigned int     color;
2113
2114                while ( vm_page_free_count == 0 ) {
2115
2116                         lck_mtx_unlock(&vm_page_queue_free_lock);
2117                         /*
2118                          * must be a privileged thread to be
2119                          * in this state since a non-privileged
2120                          * thread would have bailed if we were
2121                          * under the vm_page_free_reserved mark
2122                          */
2123                         VM_PAGE_WAIT();
2124                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2125                 }
2126
2127                 disable_preemption();
2128
2129                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2130                         lck_mtx_unlock(&vm_page_queue_free_lock);
2131
2132                         /*
2133                          * we got preempted and moved to another processor
2134                          * or we got preempted and someone else ran and filled the cache
2135                          */
2136                         goto return_page_from_cpu_list;
2137                 }
2138                 if (vm_page_free_count <= vm_page_free_reserved)
2139                         pages_to_steal = 1;
2140                 else {
2141                         if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2142                                 pages_to_steal = vm_free_magazine_refill_limit;
2143                         else
2144                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2145                 }
2146                 color = PROCESSOR_DATA(current_processor(), start_color);
2147                 head = tail = NULL;
2148
2149                 vm_page_free_count -= pages_to_steal;
2150
2151                 while (pages_to_steal--) {
2152
2153                         while (queue_empty(&vm_page_queue_free[color]))
2154                                 color = (color + 1) & vm_color_mask;
2155
2156                         queue_remove_first(&vm_page_queue_free[color],
2157                                            mem,
2158                                            vm_page_t,
2159                                            pageq);
2160                         mem->pageq.next = NULL;
2161                         mem->pageq.prev = NULL;
2162
2163                         assert(!mem->active);
2164                         assert(!mem->inactive);
2165                         assert(!mem->throttled);
2166                         assert(!mem->speculative);
2167
2168                         color = (color + 1) & vm_color_mask;
2169
2170                         if (head == NULL)
2171                                 head = mem;
2172                         else
2173                                 tail->pageq.next = (queue_t)mem;
2174                         tail = mem;
2175
2176                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2177                         assert(mem->tabled == FALSE);
2178                         assert(mem->object == VM_OBJECT_NULL);
2179                         assert(!mem->laundry);
2180                         assert(mem->free);
2181                         mem->free = FALSE;
2182
2183                         assert(pmap_verify_free(mem->phys_page));
2184                         assert(mem->busy);
2185                         assert(!mem->free);
2186                         assert(!mem->encrypted);
2187                         assert(!mem->pmapped);
2188                         assert(!mem->wpmapped);
2189                         assert(!pmap_is_noencrypt(mem->phys_page));
2190                 }
2191                 lck_mtx_unlock(&vm_page_queue_free_lock);
2192
2193                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2194                 PROCESSOR_DATA(current_processor(), start_color) = color;
2195
2196                 /*
2197                  * satisfy this request
2198                  */
2199                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2200                 mem = head;
2201                 mem->pageq.next = NULL;
2202
2203                 enable_preemption();
2204         }
2205         /*
2206          *      Decide if we should poke the pageout daemon.
2207          *      We do this if the free count is less than the low
2208          *      water mark, or if the free count is less than the high
2209          *      water mark (but above the low water mark) and the inactive
2210          *      count is less than its target.
2211          *
2212          *      We don't have the counts locked ... if they change a little,
2213          *      it doesn't really matter.
2214          */
2215         if ((vm_page_free_count < vm_page_free_min) ||
2216              ((vm_page_free_count < vm_page_free_target) &&
2217               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2218                  thread_wakeup((event_t) &vm_page_free_wanted);
2219
2220         VM_CHECK_MEMORYSTATUS;
2221
2222 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
2223
2224         return mem;
2225 }
2226
2227 /*
2228  *      vm_page_release:
2229  *
2230  *      Return a page to the free list.
2231  */
2232
2233 void
2234 vm_page_release(
2235         register vm_page_t      mem)
2236 {
2237         unsigned int    color;
2238         int     need_wakeup = 0;
2239         int     need_priv_wakeup = 0;
2240
2241
2242         assert(!mem->private && !mem->fictitious);
2243         if (vm_page_free_verify) {
2244                 assert(pmap_verify_free(mem->phys_page));
2245         }
2246 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
2247
2248         pmap_clear_noencrypt(mem->phys_page);
2249
2250         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2251 #if DEBUG
2252         if (mem->free)
2253                 panic("vm_page_release");
2254 #endif
2255
2256         assert(mem->busy);
2257         assert(!mem->laundry);
2258         assert(mem->object == VM_OBJECT_NULL);
2259         assert(mem->pageq.next == NULL &&
2260                mem->pageq.prev == NULL);
2261         assert(mem->listq.next == NULL &&
2262                mem->listq.prev == NULL);
2263
2264         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2265             vm_lopage_free_count < vm_lopage_free_limit &&
2266             mem->phys_page < max_valid_low_ppnum) {
2267                 /*
2268                  * this exists to support hardware controllers
2269                  * incapable of generating DMAs with more than 32 bits
2270                  * of address on platforms with physical memory > 4G...
2271                  */
2272                 queue_enter_first(&vm_lopage_queue_free,
2273                                   mem,
2274                                   vm_page_t,
2275                                   pageq);
2276                 vm_lopage_free_count++;
2277
2278                 if (vm_lopage_free_count >= vm_lopage_free_limit)
2279                         vm_lopage_refill = FALSE;
2280
2281                 mem->lopage = TRUE;
2282         } else {
2283                 mem->lopage = FALSE;
2284                 mem->free = TRUE;
2285
2286                 color = mem->phys_page & vm_color_mask;
2287                 queue_enter_first(&vm_page_queue_free[color],
2288                                   mem,
2289                                   vm_page_t,
2290                                   pageq);
2291                 vm_page_free_count++;
2292                 /*
2293                  *      Check if we should wake up someone waiting for page.
2294                  *      But don't bother waking them unless they can allocate.
2295                  *
2296                  *      We wakeup only one thread, to prevent starvation.
2297                  *      Because the scheduling system handles wait queues FIFO,
2298                  *      if we wakeup all waiting threads, one greedy thread
2299                  *      can starve multiple niceguy threads.  When the threads
2300                  *      all wakeup, the greedy threads runs first, grabs the page,
2301                  *      and waits for another page.  It will be the first to run
2302                  *      when the next page is freed.
2303                  *
2304                  *      However, there is a slight danger here.
2305                  *      The thread we wake might not use the free page.
2306                  *      Then the other threads could wait indefinitely
2307                  *      while the page goes unused.  To forestall this,
2308                  *      the pageout daemon will keep making free pages
2309                  *      as long as vm_page_free_wanted is non-zero.
2310                  */
2311
2312                 assert(vm_page_free_count > 0);
2313                 if (vm_page_free_wanted_privileged > 0) {
2314                         vm_page_free_wanted_privileged--;
2315                         need_priv_wakeup = 1;
2316                 } else if (vm_page_free_wanted > 0 &&
2317                            vm_page_free_count > vm_page_free_reserved) {
2318                         vm_page_free_wanted--;
2319                         need_wakeup = 1;
2320                 }
2321         }
2322         lck_mtx_unlock(&vm_page_queue_free_lock);
2323
2324         if (need_priv_wakeup)
2325                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2326         else if (need_wakeup)
2327                 thread_wakeup_one((event_t) &vm_page_free_count);
2328
2329         VM_CHECK_MEMORYSTATUS;
2330 }
2331
2332 /*
2333  * This version of vm_page_release() is used only at startup
2334  * when we are single-threaded and pages are being released
2335  * for the first time. Hence, no locking or unnecessary checks are made.
2336  * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2337  */
2338 void
2339 vm_page_release_startup(
2340         register vm_page_t      mem)
2341 {
2342         queue_t queue_free;
2343
2344         if (vm_lopage_free_count < vm_lopage_free_limit &&
2345             mem->phys_page < max_valid_low_ppnum) {
2346                 mem->lopage = TRUE;
2347                 vm_lopage_free_count++;
2348                 queue_free = &vm_lopage_queue_free;
2349         } else {
2350                 mem->lopage = FALSE;
2351                 mem->free = TRUE;
2352                 vm_page_free_count++;
2353                 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2354         }
2355         queue_enter_first(queue_free, mem, vm_page_t, pageq);
2356 }
2357
2358 /*
2359  *      vm_page_wait:
2360  *
2361  *      Wait for a page to become available.
2362  *      If there are plenty of free pages, then we don't sleep.
2363  *
2364  *      Returns:
2365  *              TRUE:  There may be another page, try again
2366  *              FALSE: We were interrupted out of our wait, don't try again
2367  */
2368
2369 boolean_t
2370 vm_page_wait(
2371         int     interruptible )
2372 {
2373         /*
2374          *      We can't use vm_page_free_reserved to make this
2375          *      determination.  Consider: some thread might
2376          *      need to allocate two pages.  The first allocation
2377          *      succeeds, the second fails.  After the first page is freed,
2378          *      a call to vm_page_wait must really block.
2379          */
2380         kern_return_t   wait_result;
2381         int             need_wakeup = 0;
2382         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2383
2384         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2385
2386         if (is_privileged && vm_page_free_count) {
2387                 lck_mtx_unlock(&vm_page_queue_free_lock);
2388                 return TRUE;
2389         }
2390         if (vm_page_free_count < vm_page_free_target) {
2391
2392                 if (is_privileged) {
2393                         if (vm_page_free_wanted_privileged++ == 0)
2394                                 need_wakeup = 1;
2395                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2396                 } else {
2397                         if (vm_page_free_wanted++ == 0)
2398                                 need_wakeup = 1;
2399                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2400                 }
2401                 lck_mtx_unlock(&vm_page_queue_free_lock);
2402                 counter(c_vm_page_wait_block++);
2403
2404                 if (need_wakeup)
2405                         thread_wakeup((event_t)&vm_page_free_wanted);
2406
2407                 if (wait_result == THREAD_WAITING) {
2408                         VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2409                                        vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2410                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2411                         VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2412                 }
2413
2414                 return(wait_result == THREAD_AWAKENED);
2415         } else {
2416                 lck_mtx_unlock(&vm_page_queue_free_lock);
2417                 return TRUE;
2418         }
2419 }
2420
2421 /*
2422  *      vm_page_alloc:
2423  *
2424  *      Allocate and return a memory cell associated
2425  *      with this VM object/offset pair.
2426  *
2427  *      Object must be locked.
2428  */
2429
2430 vm_page_t
2431 vm_page_alloc(
2432         vm_object_t             object,
2433         vm_object_offset_t      offset)
2434 {
2435         register vm_page_t      mem;
2436
2437         vm_object_lock_assert_exclusive(object);
2438         mem = vm_page_grab();
2439         if (mem == VM_PAGE_NULL)
2440                 return VM_PAGE_NULL;
2441
2442         vm_page_insert(mem, object, offset);
2443
2444         return(mem);
2445 }
2446
2447 vm_page_t
2448 vm_page_alloclo(
2449         vm_object_t             object,
2450         vm_object_offset_t      offset)
2451 {
2452         register vm_page_t      mem;
2453
2454         vm_object_lock_assert_exclusive(object);
2455         mem = vm_page_grablo();
2456         if (mem == VM_PAGE_NULL)
2457                 return VM_PAGE_NULL;
2458
2459         vm_page_insert(mem, object, offset);
2460
2461         return(mem);
2462 }
2463
2464
2465 /*
2466  *      vm_page_alloc_guard:
2467  *
2468  *      Allocate a fictitious page which will be used
2469  *      as a guard page.  The page will be inserted into
2470  *      the object and returned to the caller.
2471  */
2472
2473 vm_page_t
2474 vm_page_alloc_guard(
2475         vm_object_t             object,
2476         vm_object_offset_t      offset)
2477 {
2478         register vm_page_t      mem;
2479
2480         vm_object_lock_assert_exclusive(object);
2481         mem = vm_page_grab_guard();
2482         if (mem == VM_PAGE_NULL)
2483                 return VM_PAGE_NULL;
2484
2485         vm_page_insert(mem, object, offset);
2486
2487         return(mem);
2488 }
2489
2490
2491 counter(unsigned int c_laundry_pages_freed = 0;)
2492
2493 /*
2494  *      vm_page_free_prepare:
2495  *
2496  *      Removes page from any queue it may be on
2497  *      and disassociates it from its VM object.
2498  *
2499  *      Object and page queues must be locked prior to entry.
2500  */
2501 static void
2502 vm_page_free_prepare(
2503         vm_page_t       mem)
2504 {
2505         vm_page_free_prepare_queues(mem);
2506         vm_page_free_prepare_object(mem, TRUE);
2507 }
2508
2509
2510 void
2511 vm_page_free_prepare_queues(
2512         vm_page_t       mem)
2513 {
2514         VM_PAGE_CHECK(mem);
2515         assert(!mem->free);
2516         assert(!mem->cleaning);
2517
2518 #if MACH_ASSERT || DEBUG
2519         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2520         if (mem->free)
2521                 panic("vm_page_free: freeing page on free list\n");
2522 #endif /* MACH_ASSERT || DEBUG */
2523         if (mem->object) {
2524                 vm_object_lock_assert_exclusive(mem->object);
2525         }
2526         if (mem->laundry) {
2527                 /*
2528                  * We may have to free a page while it's being laundered
2529                  * if we lost its pager (due to a forced unmount, for example).
2530                  * We need to call vm_pageout_steal_laundry() before removing
2531                  * the page from its VM object, so that we can remove it
2532                  * from its pageout queue and adjust the laundry accounting
2533                  */
2534                 vm_pageout_steal_laundry(mem, TRUE);
2535                 counter(++c_laundry_pages_freed);
2536         }
2537
2538         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2539
2540         if (VM_PAGE_WIRED(mem)) {
2541                 if (mem->object) {
2542                         assert(mem->object->wired_page_count > 0);
2543                         mem->object->wired_page_count--;
2544                         assert(mem->object->resident_page_count >=
2545                                mem->object->wired_page_count);
2546
2547                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2548                                 OSAddAtomic(+1, &vm_page_purgeable_count);
2549                                 assert(vm_page_purgeable_wired_count > 0);
2550                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2551                         }
2552                         if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2553                              mem->object->purgable == VM_PURGABLE_EMPTY) &&
2554                             mem->object->vo_purgeable_owner != TASK_NULL) {
2555                                 task_t owner;
2556
2557                                 owner = mem->object->vo_purgeable_owner;
2558                                 /*
2559                                  * While wired, this page was accounted
2560                                  * as "non-volatile" but it should now
2561                                  * be accounted as "volatile".
2562                                  */
2563                                 /* one less "non-volatile"... */
2564                                 ledger_debit(owner->ledger,
2565                                              task_ledgers.purgeable_nonvolatile,
2566                                              PAGE_SIZE);
2567                                 /* ... and "phys_footprint" */
2568                                 ledger_debit(owner->ledger,
2569                                              task_ledgers.phys_footprint,
2570                                              PAGE_SIZE);
2571                                 /* one more "volatile" */
2572                                 ledger_credit(owner->ledger,
2573                                               task_ledgers.purgeable_volatile,
2574                                               PAGE_SIZE);
2575                         }
2576                 }
2577                 if (!mem->private && !mem->fictitious)
2578                         vm_page_wire_count--;
2579                 mem->wire_count = 0;
2580                 assert(!mem->gobbled);
2581         } else if (mem->gobbled) {
2582                 if (!mem->private && !mem->fictitious)
2583                         vm_page_wire_count--;
2584                 vm_page_gobble_count--;
2585         }
2586 }
2587
2588
2589 void
2590 vm_page_free_prepare_object(
2591         vm_page_t       mem,
2592         boolean_t       remove_from_hash)
2593 {
2594         if (mem->tabled)
2595                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2596
2597         PAGE_WAKEUP(mem);               /* clears wanted */
2598
2599         if (mem->private) {
2600                 mem->private = FALSE;
2601                 mem->fictitious = TRUE;
2602                 mem->phys_page = vm_page_fictitious_addr;
2603         }
2604         if ( !mem->fictitious) {
2605                 vm_page_init(mem, mem->phys_page, mem->lopage);
2606         }
2607 }
2608
2609
2610 /*
2611  *      vm_page_free:
2612  *
2613  *      Returns the given page to the free list,
2614  *      disassociating it with any VM object.
2615  *
2616  *      Object and page queues must be locked prior to entry.
2617  */
2618 void
2619 vm_page_free(
2620         vm_page_t       mem)
2621 {
2622         vm_page_free_prepare(mem);
2623
2624         if (mem->fictitious) {
2625                 vm_page_release_fictitious(mem);
2626         } else {
2627                 vm_page_release(mem);
2628         }
2629 }
2630
2631
2632 void
2633 vm_page_free_unlocked(
2634         vm_page_t       mem,
2635         boolean_t       remove_from_hash)
2636 {
2637         vm_page_lockspin_queues();
2638         vm_page_free_prepare_queues(mem);
2639         vm_page_unlock_queues();
2640
2641         vm_page_free_prepare_object(mem, remove_from_hash);
2642
2643         if (mem->fictitious) {
2644                 vm_page_release_fictitious(mem);
2645         } else {
2646                 vm_page_release(mem);
2647         }
2648 }
2649
2650
2651 /*
2652  * Free a list of pages.  The list can be up to several hundred pages,
2653  * as blocked up by vm_pageout_scan().
2654  * The big win is not having to take the free list lock once
2655  * per page.
2656  */
2657 void
2658 vm_page_free_list(
2659         vm_page_t       freeq,
2660         boolean_t       prepare_object)
2661 {
2662         vm_page_t       mem;
2663         vm_page_t       nxt;
2664         vm_page_t       local_freeq;
2665         int             pg_count;
2666
2667         while (freeq) {
2668
2669                 pg_count = 0;
2670                 local_freeq = VM_PAGE_NULL;
2671                 mem = freeq;
2672
2673                 /*
2674                  * break up the processing into smaller chunks so
2675                  * that we can 'pipeline' the pages onto the
2676                  * free list w/o introducing too much
2677                  * contention on the global free queue lock
2678                  */
2679                 while (mem && pg_count < 64) {
2680
2681                         assert(!mem->inactive);
2682                         assert(!mem->active);
2683                         assert(!mem->throttled);
2684                         assert(!mem->free);
2685                         assert(!mem->speculative);
2686                         assert(!VM_PAGE_WIRED(mem));
2687                         assert(mem->pageq.prev == NULL);
2688
2689                         nxt = (vm_page_t)(mem->pageq.next);
2690
2691                         if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2692                                 assert(pmap_verify_free(mem->phys_page));
2693                         }
2694                         if (prepare_object == TRUE)
2695                                 vm_page_free_prepare_object(mem, TRUE);
2696
2697                         if (!mem->fictitious) {
2698                                 assert(mem->busy);
2699
2700                                 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2701                                     vm_lopage_free_count < vm_lopage_free_limit &&
2702                                     mem->phys_page < max_valid_low_ppnum) {
2703                                         mem->pageq.next = NULL;
2704                                         vm_page_release(mem);
2705                                 } else {
2706                                         /*
2707                                          * IMPORTANT: we can't set the page "free" here
2708                                          * because that would make the page eligible for
2709                                          * a physically-contiguous allocation (see
2710                                          * vm_page_find_contiguous()) right away (we don't
2711                                          * hold the vm_page_queue_free lock).  That would
2712                                          * cause trouble because the page is not actually
2713                                          * in the free queue yet...
2714                                          */
2715                                         mem->pageq.next = (queue_entry_t)local_freeq;
2716                                         local_freeq = mem;
2717                                         pg_count++;
2718
2719                                         pmap_clear_noencrypt(mem->phys_page);
2720                                 }
2721                         } else {
2722                                 assert(mem->phys_page == vm_page_fictitious_addr ||
2723                                        mem->phys_page == vm_page_guard_addr);
2724                                 vm_page_release_fictitious(mem);
2725                         }
2726                         mem = nxt;
2727                 }
2728                 freeq = mem;
2729
2730                 if ( (mem = local_freeq) ) {
2731                         unsigned int    avail_free_count;
2732                         unsigned int    need_wakeup = 0;
2733                         unsigned int    need_priv_wakeup = 0;
2734
2735                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2736
2737                         while (mem) {
2738                                 int     color;
2739
2740                                 nxt = (vm_page_t)(mem->pageq.next);
2741
2742                                 assert(!mem->free);
2743                                 assert(mem->busy);
2744                                 mem->free = TRUE;
2745
2746                                 color = mem->phys_page & vm_color_mask;
2747                                 queue_enter_first(&vm_page_queue_free[color],
2748                                                   mem,
2749                                                   vm_page_t,
2750                                                   pageq);
2751                                 mem = nxt;
2752                         }
2753                         vm_page_free_count += pg_count;
2754                         avail_free_count = vm_page_free_count;
2755
2756                         if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2757
2758                                 if (avail_free_count < vm_page_free_wanted_privileged) {
2759                                         need_priv_wakeup = avail_free_count;
2760                                         vm_page_free_wanted_privileged -= avail_free_count;
2761                                         avail_free_count = 0;
2762                                 } else {
2763                                         need_priv_wakeup = vm_page_free_wanted_privileged;
2764                                         vm_page_free_wanted_privileged = 0;
2765                                         avail_free_count -= vm_page_free_wanted_privileged;
2766                                 }
2767                         }
2768                         if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2769                                 unsigned int  available_pages;
2770
2771                                 available_pages = avail_free_count - vm_page_free_reserved;
2772
2773                                 if (available_pages >= vm_page_free_wanted) {
2774                                         need_wakeup = vm_page_free_wanted;
2775                                         vm_page_free_wanted = 0;
2776                                 } else {
2777                                         need_wakeup = available_pages;
2778                                         vm_page_free_wanted -= available_pages;
2779                                 }
2780                         }
2781                         lck_mtx_unlock(&vm_page_queue_free_lock);
2782
2783                         if (need_priv_wakeup != 0) {
2784                                 /*
2785                                  * There shouldn't be that many VM-privileged threads,
2786                                  * so let's wake them all up, even if we don't quite
2787                                  * have enough pages to satisfy them all.
2788                                  */
2789                                 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2790                         }
2791                         if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2792                                 /*
2793                                  * We don't expect to have any more waiters
2794                                  * after this, so let's wake them all up at
2795                                  * once.
2796                                  */
2797                                 thread_wakeup((event_t) &vm_page_free_count);
2798                         } else for (; need_wakeup != 0; need_wakeup--) {
2799                                 /*
2800                                  * Wake up one waiter per page we just released.
2801                                  */
2802                                 thread_wakeup_one((event_t) &vm_page_free_count);
2803                         }
2804
2805                         VM_CHECK_MEMORYSTATUS;
2806                 }
2807         }
2808 }
2809
2810
2811 /*
2812  *      vm_page_wire:
2813  *
2814  *      Mark this page as wired down by yet
2815  *      another map, removing it from paging queues
2816  *      as necessary.
2817  *
2818  *      The page's object and the page queues must be locked.
2819  */
2820 void
2821 vm_page_wire(
2822         register vm_page_t      mem)
2823 {
2824
2825 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2826
2827         VM_PAGE_CHECK(mem);
2828         if (mem->object) {
2829                 vm_object_lock_assert_exclusive(mem->object);
2830         } else {
2831                 /*
2832                  * In theory, the page should be in an object before it
2833                  * gets wired, since we need to hold the object lock
2834                  * to update some fields in the page structure.
2835                  * However, some code (i386 pmap, for example) might want
2836                  * to wire a page before it gets inserted into an object.
2837                  * That's somewhat OK, as long as nobody else can get to
2838                  * that page and update it at the same time.
2839                  */
2840         }
2841 #if DEBUG
2842         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2843 #endif
2844         if ( !VM_PAGE_WIRED(mem)) {
2845
2846                 if (mem->pageout_queue) {
2847                         mem->pageout = FALSE;
2848                         vm_pageout_throttle_up(mem);
2849                 }
2850                 VM_PAGE_QUEUES_REMOVE(mem);
2851
2852                 if (mem->object) {
2853                         mem->object->wired_page_count++;
2854                         assert(mem->object->resident_page_count >=
2855                                mem->object->wired_page_count);
2856                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2857                                 assert(vm_page_purgeable_count > 0);
2858                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2859                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2860                         }
2861                         if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2862                              mem->object->purgable == VM_PURGABLE_EMPTY) &&
2863                             mem->object->vo_purgeable_owner != TASK_NULL) {
2864                                 task_t owner;
2865
2866                                 owner = mem->object->vo_purgeable_owner;
2867                                 /* less volatile bytes */
2868                                 ledger_debit(owner->ledger,
2869                                              task_ledgers.purgeable_volatile,
2870                                              PAGE_SIZE);
2871                                 /* more not-quite-volatile bytes */
2872                                 ledger_credit(owner->ledger,
2873                                               task_ledgers.purgeable_nonvolatile,
2874                                               PAGE_SIZE);
2875                                 /* more footprint */
2876                                 ledger_credit(owner->ledger,
2877                                               task_ledgers.phys_footprint,
2878                                               PAGE_SIZE);
2879                         }
2880                         if (mem->object->all_reusable) {
2881                                 /*
2882                                  * Wired pages are not counted as "re-usable"
2883                                  * in "all_reusable" VM objects, so nothing
2884                                  * to do here.
2885                                  */
2886                         } else if (mem->reusable) {
2887                                 /*
2888                                  * This page is not "re-usable" when it's
2889                                  * wired, so adjust its state and the
2890                                  * accounting.
2891                                  */
2892                                 vm_object_reuse_pages(mem->object,
2893                                                       mem->offset,
2894                                                       mem->offset+PAGE_SIZE_64,
2895                                                       FALSE);
2896                         }
2897                 }
2898                 assert(!mem->reusable);
2899
2900                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2901                         vm_page_wire_count++;
2902                 if (mem->gobbled)
2903                         vm_page_gobble_count--;
2904                 mem->gobbled = FALSE;
2905
2906                 VM_CHECK_MEMORYSTATUS;
2907
2908                 /*
2909                  * ENCRYPTED SWAP:
2910                  * The page could be encrypted, but
2911                  * We don't have to decrypt it here
2912                  * because we don't guarantee that the
2913                  * data is actually valid at this point.
2914                  * The page will get decrypted in
2915                  * vm_fault_wire() if needed.
2916                  */
2917         }
2918         assert(!mem->gobbled);
2919         mem->wire_count++;
2920         VM_PAGE_CHECK(mem);
2921 }
2922
2923 /*
2924  *      vm_page_gobble:
2925  *
2926  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2927  *
2928  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2929  */
2930 void
2931 vm_page_gobble(
2932         register vm_page_t      mem)
2933 {
2934         vm_page_lockspin_queues();
2935         VM_PAGE_CHECK(mem);
2936
2937         assert(!mem->gobbled);
2938         assert( !VM_PAGE_WIRED(mem));
2939
2940         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2941                 if (!mem->private && !mem->fictitious)
2942                         vm_page_wire_count++;
2943         }
2944         vm_page_gobble_count++;
2945         mem->gobbled = TRUE;
2946         vm_page_unlock_queues();
2947 }
2948
2949 /*
2950  *      vm_page_unwire:
2951  *
2952  *      Release one wiring of this page, potentially
2953  *      enabling it to be paged again.
2954  *
2955  *      The page's object and the page queues must be locked.
2956  */
2957 void
2958 vm_page_unwire(
2959         vm_page_t       mem,
2960         boolean_t       queueit)
2961 {
2962
2963 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2964
2965         VM_PAGE_CHECK(mem);
2966         assert(VM_PAGE_WIRED(mem));
2967         assert(mem->object != VM_OBJECT_NULL);
2968 #if DEBUG
2969         vm_object_lock_assert_exclusive(mem->object);
2970         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2971 #endif
2972         if (--mem->wire_count == 0) {
2973                 assert(!mem->private && !mem->fictitious);
2974                 vm_page_wire_count--;
2975                 assert(mem->object->wired_page_count > 0);
2976                 mem->object->wired_page_count--;
2977                 assert(mem->object->resident_page_count >=
2978                        mem->object->wired_page_count);
2979                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2980                         OSAddAtomic(+1, &vm_page_purgeable_count);
2981                         assert(vm_page_purgeable_wired_count > 0);
2982                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2983                 }
2984                 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2985                      mem->object->purgable == VM_PURGABLE_EMPTY) &&
2986                     mem->object->vo_purgeable_owner != TASK_NULL) {
2987                         task_t owner;
2988
2989                         owner = mem->object->vo_purgeable_owner;
2990                         /* more volatile bytes */
2991                         ledger_credit(owner->ledger,
2992                                       task_ledgers.purgeable_volatile,
2993                                       PAGE_SIZE);
2994                         /* less not-quite-volatile bytes */
2995                         ledger_debit(owner->ledger,
2996                                      task_ledgers.purgeable_nonvolatile,
2997                                      PAGE_SIZE);
2998                         /* less footprint */
2999                         ledger_debit(owner->ledger,
3000                                      task_ledgers.phys_footprint,
3001                                      PAGE_SIZE);
3002                 }
3003                 assert(mem->object != kernel_object);
3004                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3005
3006                 if (queueit == TRUE) {
3007                         if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3008                                 vm_page_deactivate(mem);
3009                         } else {
3010                                 vm_page_activate(mem);
3011                         }
3012                 }
3013
3014                 VM_CHECK_MEMORYSTATUS;
3015
3016         }
3017         VM_PAGE_CHECK(mem);
3018 }
3019
3020 /*
3021  *      vm_page_deactivate:
3022  *
3023  *      Returns the given page to the inactive list,
3024  *      indicating that no physical maps have access
3025  *      to this page.  [Used by the physical mapping system.]
3026  *
3027  *      The page queues must be locked.
3028  */
3029 void
3030 vm_page_deactivate(
3031         vm_page_t       m)
3032 {
3033         vm_page_deactivate_internal(m, TRUE);
3034 }
3035
3036
3037 void
3038 vm_page_deactivate_internal(
3039         vm_page_t       m,
3040         boolean_t       clear_hw_reference)
3041 {
3042
3043         VM_PAGE_CHECK(m);
3044         assert(m->object != kernel_object);
3045         assert(m->phys_page != vm_page_guard_addr);
3046
3047 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
3048 #if DEBUG
3049         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3050 #endif
3051         /*
3052          *      This page is no longer very interesting.  If it was
3053          *      interesting (active or inactive/referenced), then we
3054          *      clear the reference bit and (re)enter it in the
3055          *      inactive queue.  Note wired pages should not have
3056          *      their reference bit cleared.
3057          */
3058         assert ( !(m->absent && !m->unusual));
3059
3060         if (m->gobbled) {               /* can this happen? */
3061                 assert( !VM_PAGE_WIRED(m));
3062
3063                 if (!m->private && !m->fictitious)
3064                         vm_page_wire_count--;
3065                 vm_page_gobble_count--;
3066                 m->gobbled = FALSE;
3067         }
3068         /*
3069          * if this page is currently on the pageout queue, we can't do the
3070          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3071          * and we can't remove it manually since we would need the object lock
3072          * (which is not required here) to decrement the activity_in_progress
3073          * reference which is held on the object while the page is in the pageout queue...
3074          * just let the normal laundry processing proceed
3075          */
3076         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3077                 return;
3078
3079         if (!m->absent && clear_hw_reference == TRUE)
3080                 pmap_clear_reference(m->phys_page);
3081
3082         m->reference = FALSE;
3083         m->no_cache = FALSE;
3084
3085         if (!m->inactive) {
3086                 VM_PAGE_QUEUES_REMOVE(m);
3087
3088                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3089                     m->dirty && m->object->internal &&
3090                     (m->object->purgable == VM_PURGABLE_DENY ||
3091                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3092                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
3093                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3094                         m->throttled = TRUE;
3095                         vm_page_throttled_count++;
3096                 } else {
3097                         if (m->object->named && m->object->ref_count == 1) {
3098                                 vm_page_speculate(m, FALSE);
3099 #if DEVELOPMENT || DEBUG
3100                                 vm_page_speculative_recreated++;
3101 #endif
3102                         } else {
3103                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3104                         }
3105                 }
3106         }
3107 }
3108
3109 /*
3110  * vm_page_enqueue_cleaned
3111  *
3112  * Put the page on the cleaned queue, mark it cleaned, etc.
3113  * Being on the cleaned queue (and having m->clean_queue set)
3114  * does ** NOT ** guarantee that the page is clean!
3115  *
3116  * Call with the queues lock held.
3117  */
3118
3119 void vm_page_enqueue_cleaned(vm_page_t m)
3120 {
3121         assert(m->phys_page != vm_page_guard_addr);
3122 #if DEBUG
3123         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3124 #endif
3125         assert( !(m->absent && !m->unusual));
3126
3127         if (m->gobbled) {
3128                 assert( !VM_PAGE_WIRED(m));
3129                 if (!m->private && !m->fictitious)
3130                         vm_page_wire_count--;
3131                 vm_page_gobble_count--;
3132                 m->gobbled = FALSE;
3133         }
3134         /*
3135          * if this page is currently on the pageout queue, we can't do the
3136          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3137          * and we can't remove it manually since we would need the object lock
3138          * (which is not required here) to decrement the activity_in_progress
3139          * reference which is held on the object while the page is in the pageout queue...
3140          * just let the normal laundry processing proceed
3141          */
3142         if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3143                 return;
3144
3145         VM_PAGE_QUEUES_REMOVE(m);
3146
3147         queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3148         m->clean_queue = TRUE;
3149         vm_page_cleaned_count++;
3150
3151         m->inactive = TRUE;
3152         vm_page_inactive_count++;
3153         if (m->object->internal) {
3154                 vm_page_pageable_internal_count++;
3155         } else {
3156                 vm_page_pageable_external_count++;
3157         }
3158
3159         vm_pageout_enqueued_cleaned++;
3160 }
3161
3162 /*
3163  *      vm_page_activate:
3164  *
3165  *      Put the specified page on the active list (if appropriate).
3166  *
3167  *      The page queues must be locked.
3168  */
3169
3170 void
3171 vm_page_activate(
3172         register vm_page_t      m)
3173 {
3174         VM_PAGE_CHECK(m);
3175 #ifdef  FIXME_4778297
3176         assert(m->object != kernel_object);
3177 #endif
3178         assert(m->phys_page != vm_page_guard_addr);
3179 #if DEBUG
3180         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3181 #endif
3182         assert( !(m->absent && !m->unusual));
3183
3184         if (m->gobbled) {
3185                 assert( !VM_PAGE_WIRED(m));
3186                 if (!m->private && !m->fictitious)
3187                         vm_page_wire_count--;
3188                 vm_page_gobble_count--;
3189                 m->gobbled = FALSE;
3190         }
3191         /*
3192          * if this page is currently on the pageout queue, we can't do the
3193          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3194          * and we can't remove it manually since we would need the object lock
3195          * (which is not required here) to decrement the activity_in_progress
3196          * reference which is held on the object while the page is in the pageout queue...
3197          * just let the normal laundry processing proceed
3198          */
3199         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3200                 return;
3201
3202 #if DEBUG
3203         if (m->active)
3204                 panic("vm_page_activate: already active");
3205 #endif
3206
3207         if (m->speculative) {
3208                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3209                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3210         }
3211
3212         VM_PAGE_QUEUES_REMOVE(m);
3213
3214         if ( !VM_PAGE_WIRED(m)) {
3215
3216                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3217                     m->dirty && m->object->internal &&
3218                     (m->object->purgable == VM_PURGABLE_DENY ||
3219                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3220                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
3221                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3222                         m->throttled = TRUE;
3223                         vm_page_throttled_count++;
3224                 } else {
3225                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3226                         m->active = TRUE;
3227                         vm_page_active_count++;
3228                         if (m->object->internal) {
3229                                 vm_page_pageable_internal_count++;
3230                         } else {
3231                                 vm_page_pageable_external_count++;
3232                         }
3233                 }
3234                 m->reference = TRUE;
3235                 m->no_cache = FALSE;
3236         }
3237         VM_PAGE_CHECK(m);
3238 }
3239
3240
3241 /*
3242  *      vm_page_speculate:
3243  *
3244  *      Put the specified page on the speculative list (if appropriate).
3245  *
3246  *      The page queues must be locked.
3247  */
3248 void
3249 vm_page_speculate(
3250         vm_page_t       m,
3251         boolean_t       new)
3252 {
3253         struct vm_speculative_age_q     *aq;
3254
3255         VM_PAGE_CHECK(m);
3256         assert(m->object != kernel_object);
3257         assert(m->phys_page != vm_page_guard_addr);
3258 #if DEBUG
3259         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3260 #endif
3261         assert( !(m->absent && !m->unusual));
3262
3263         /*
3264          * if this page is currently on the pageout queue, we can't do the
3265          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3266          * and we can't remove it manually since we would need the object lock
3267          * (which is not required here) to decrement the activity_in_progress
3268          * reference which is held on the object while the page is in the pageout queue...
3269          * just let the normal laundry processing proceed
3270          */
3271         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3272                 return;
3273
3274         VM_PAGE_QUEUES_REMOVE(m);
3275
3276         if ( !VM_PAGE_WIRED(m)) {
3277                 mach_timespec_t         ts;
3278                 clock_sec_t sec;
3279                 clock_nsec_t nsec;
3280
3281                 clock_get_system_nanotime(&sec, &nsec);
3282                 ts.tv_sec = (unsigned int) sec;
3283                 ts.tv_nsec = nsec;
3284
3285                 if (vm_page_speculative_count == 0) {
3286
3287                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3288                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3289
3290                         aq = &vm_page_queue_speculative[speculative_age_index];
3291
3292                         /*
3293                          * set the timer to begin a new group
3294                          */
3295                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3296                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3297
3298                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3299                 } else {
3300                         aq = &vm_page_queue_speculative[speculative_age_index];
3301
3302                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3303
3304                                 speculative_age_index++;
3305
3306                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3307                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3308                                 if (speculative_age_index == speculative_steal_index) {
3309                                         speculative_steal_index = speculative_age_index + 1;
3310
3311                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3312                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3313                                 }
3314                                 aq = &vm_page_queue_speculative[speculative_age_index];
3315
3316                                 if (!queue_empty(&aq->age_q))
3317                                         vm_page_speculate_ageit(aq);
3318
3319                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3320                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3321
3322                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3323                         }
3324                 }
3325                 enqueue_tail(&aq->age_q, &m->pageq);
3326                 m->speculative = TRUE;
3327                 vm_page_speculative_count++;
3328                 if (m->object->internal) {
3329                         vm_page_pageable_internal_count++;
3330                 } else {
3331                         vm_page_pageable_external_count++;
3332                 }
3333
3334                 if (new == TRUE) {
3335                         vm_object_lock_assert_exclusive(m->object);
3336
3337                         m->object->pages_created++;
3338 #if DEVELOPMENT || DEBUG
3339                         vm_page_speculative_created++;
3340 #endif
3341                 }
3342         }
3343         VM_PAGE_CHECK(m);
3344 }
3345
3346
3347 /*
3348  * move pages from the specified aging bin to
3349  * the speculative bin that pageout_scan claims from
3350  *
3351  *      The page queues must be locked.
3352  */
3353 void
3354 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3355 {
3356         struct vm_speculative_age_q     *sq;
3357         vm_page_t       t;
3358
3359         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3360
3361         if (queue_empty(&sq->age_q)) {
3362                 sq->age_q.next = aq->age_q.next;
3363                 sq->age_q.prev = aq->age_q.prev;
3364
3365                 t = (vm_page_t)sq->age_q.next;
3366                 t->pageq.prev = &sq->age_q;
3367
3368                 t = (vm_page_t)sq->age_q.prev;
3369                 t->pageq.next = &sq->age_q;
3370         } else {
3371                 t = (vm_page_t)sq->age_q.prev;
3372                 t->pageq.next = aq->age_q.next;
3373
3374                 t = (vm_page_t)aq->age_q.next;
3375                 t->pageq.prev = sq->age_q.prev;
3376
3377                 t = (vm_page_t)aq->age_q.prev;
3378                 t->pageq.next = &sq->age_q;
3379
3380                 sq->age_q.prev = aq->age_q.prev;
3381         }
3382         queue_init(&aq->age_q);
3383 }
3384
3385
3386 void
3387 vm_page_lru(
3388         vm_page_t       m)
3389 {
3390         VM_PAGE_CHECK(m);
3391         assert(m->object != kernel_object);
3392         assert(m->phys_page != vm_page_guard_addr);
3393
3394 #if DEBUG
3395         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3396 #endif
3397         /*
3398          * if this page is currently on the pageout queue, we can't do the
3399          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3400          * and we can't remove it manually since we would need the object lock
3401          * (which is not required here) to decrement the activity_in_progress
3402          * reference which is held on the object while the page is in the pageout queue...
3403          * just let the normal laundry processing proceed
3404          */
3405         if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3406                 return;
3407
3408         m->no_cache = FALSE;
3409
3410         VM_PAGE_QUEUES_REMOVE(m);
3411
3412         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3413 }
3414
3415
3416 void
3417 vm_page_reactivate_all_throttled(void)
3418 {
3419         vm_page_t       first_throttled, last_throttled;
3420         vm_page_t       first_active;
3421         vm_page_t       m;
3422         int             extra_active_count;
3423         int             extra_internal_count, extra_external_count;
3424
3425         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3426                 return;
3427
3428         extra_active_count = 0;
3429         extra_internal_count = 0;
3430         extra_external_count = 0;
3431         vm_page_lock_queues();
3432         if (! queue_empty(&vm_page_queue_throttled)) {
3433                 /*
3434                  * Switch "throttled" pages to "active".
3435                  */
3436                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3437                         VM_PAGE_CHECK(m);
3438                         assert(m->throttled);
3439                         assert(!m->active);
3440                         assert(!m->inactive);
3441                         assert(!m->speculative);
3442                         assert(!VM_PAGE_WIRED(m));
3443
3444                         extra_active_count++;
3445                         if (m->object->internal) {
3446                                 extra_internal_count++;
3447                         } else {
3448                                 extra_external_count++;
3449                         }
3450
3451                         m->throttled = FALSE;
3452                         m->active = TRUE;
3453                         VM_PAGE_CHECK(m);
3454                 }
3455
3456                 /*
3457                  * Transfer the entire throttled queue to a regular LRU page queues.
3458                  * We insert it at the head of the active queue, so that these pages
3459                  * get re-evaluated by the LRU algorithm first, since they've been
3460                  * completely out of it until now.
3461                  */
3462                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3463                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3464                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3465                 if (queue_empty(&vm_page_queue_active)) {
3466                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3467                 } else {
3468                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3469                 }
3470                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3471                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3472                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3473
3474 #if DEBUG
3475                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3476 #endif
3477                 queue_init(&vm_page_queue_throttled);
3478                 /*
3479                  * Adjust the global page counts.
3480                  */
3481                 vm_page_active_count += extra_active_count;
3482                 vm_page_pageable_internal_count += extra_internal_count;
3483                 vm_page_pageable_external_count += extra_external_count;
3484                 vm_page_throttled_count = 0;
3485         }
3486         assert(vm_page_throttled_count == 0);
3487         assert(queue_empty(&vm_page_queue_throttled));
3488         vm_page_unlock_queues();
3489 }
3490
3491
3492 /*
3493  * move pages from the indicated local queue to the global active queue
3494  * its ok to fail if we're below the hard limit and force == FALSE
3495  * the nolocks == TRUE case is to allow this function to be run on
3496  * the hibernate path
3497  */
3498
3499 void
3500 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3501 {
3502         struct vpl      *lq;
3503         vm_page_t       first_local, last_local;
3504         vm_page_t       first_active;
3505         vm_page_t       m;
3506         uint32_t        count = 0;
3507
3508         if (vm_page_local_q == NULL)
3509                 return;
3510
3511         lq = &vm_page_local_q[lid].vpl_un.vpl;
3512
3513         if (nolocks == FALSE) {
3514                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3515                         if ( !vm_page_trylockspin_queues())
3516                                 return;
3517                 } else
3518                         vm_page_lockspin_queues();
3519
3520                 VPL_LOCK(&lq->vpl_lock);
3521         }
3522         if (lq->vpl_count) {
3523                 /*
3524                  * Switch "local" pages to "active".
3525                  */
3526                 assert(!queue_empty(&lq->vpl_queue));
3527
3528                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3529                         VM_PAGE_CHECK(m);
3530                         assert(m->local);
3531                         assert(!m->active);
3532                         assert(!m->inactive);
3533                         assert(!m->speculative);
3534                         assert(!VM_PAGE_WIRED(m));
3535                         assert(!m->throttled);
3536                         assert(!m->fictitious);
3537
3538                         if (m->local_id != lid)
3539                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3540
3541                         m->local_id = 0;
3542                         m->local = FALSE;
3543                         m->active = TRUE;
3544                         VM_PAGE_CHECK(m);
3545
3546                         count++;
3547                 }
3548                 if (count != lq->vpl_count)
3549                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3550
3551                 /*
3552                  * Transfer the entire local queue to a regular LRU page queues.
3553                  */
3554                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3555                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3556                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3557
3558                 if (queue_empty(&vm_page_queue_active)) {
3559                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3560                 } else {
3561                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3562                 }
3563                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3564                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3565                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3566
3567                 queue_init(&lq->vpl_queue);
3568                 /*
3569                  * Adjust the global page counts.
3570                  */
3571                 vm_page_active_count += lq->vpl_count;
3572                 vm_page_pageable_internal_count += lq->vpl_internal_count;
3573                 vm_page_pageable_external_count += lq->vpl_external_count;
3574                 lq->vpl_count = 0;
3575                 lq->vpl_internal_count = 0;
3576                 lq->vpl_external_count = 0;
3577         }
3578         assert(queue_empty(&lq->vpl_queue));
3579
3580         if (nolocks == FALSE) {
3581                 VPL_UNLOCK(&lq->vpl_lock);
3582                 vm_page_unlock_queues();
3583         }
3584 }
3585
3586 /*
3587  *      vm_page_part_zero_fill:
3588  *
3589  *      Zero-fill a part of the page.
3590  */
3591 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3592 void
3593 vm_page_part_zero_fill(
3594         vm_page_t       m,
3595         vm_offset_t     m_pa,
3596         vm_size_t       len)
3597 {
3598
3599 #if 0
3600         /*
3601          * we don't hold the page queue lock
3602          * so this check isn't safe to make
3603          */
3604         VM_PAGE_CHECK(m);
3605 #endif
3606
3607 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3608         pmap_zero_part_page(m->phys_page, m_pa, len);
3609 #else
3610         vm_page_t       tmp;
3611         while (1) {
3612                 tmp = vm_page_grab();
3613                 if (tmp == VM_PAGE_NULL) {
3614                         vm_page_wait(THREAD_UNINT);
3615                         continue;
3616                 }
3617                 break;
3618         }
3619         vm_page_zero_fill(tmp);
3620         if(m_pa != 0) {
3621                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3622         }
3623         if((m_pa + len) <  PAGE_SIZE) {
3624                 vm_page_part_copy(m, m_pa + len, tmp,
3625                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3626         }
3627         vm_page_copy(tmp,m);
3628         VM_PAGE_FREE(tmp);
3629 #endif
3630
3631 }
3632
3633 /*
3634  *      vm_page_zero_fill:
3635  *
3636  *      Zero-fill the specified page.
3637  */
3638 void
3639 vm_page_zero_fill(
3640         vm_page_t       m)
3641 {
3642         XPR(XPR_VM_PAGE,
3643                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3644                 m->object, m->offset, m, 0,0);
3645 #if 0
3646         /*
3647          * we don't hold the page queue lock
3648          * so this check isn't safe to make
3649          */
3650         VM_PAGE_CHECK(m);
3651 #endif
3652
3653 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3654         pmap_zero_page(m->phys_page);
3655 }
3656
3657 /*
3658  *      vm_page_part_copy:
3659  *
3660  *      copy part of one page to another
3661  */
3662
3663 void
3664 vm_page_part_copy(
3665         vm_page_t       src_m,
3666         vm_offset_t     src_pa,
3667         vm_page_t       dst_m,
3668         vm_offset_t     dst_pa,
3669         vm_size_t       len)
3670 {
3671 #if 0
3672         /*
3673          * we don't hold the page queue lock
3674          * so this check isn't safe to make
3675          */
3676         VM_PAGE_CHECK(src_m);
3677         VM_PAGE_CHECK(dst_m);
3678 #endif
3679         pmap_copy_part_page(src_m->phys_page, src_pa,
3680                         dst_m->phys_page, dst_pa, len);
3681 }
3682
3683 /*
3684  *      vm_page_copy:
3685  *
3686  *      Copy one page to another
3687  *
3688  * ENCRYPTED SWAP:
3689  * The source page should not be encrypted.  The caller should
3690  * make sure the page is decrypted first, if necessary.
3691  */
3692
3693 int vm_page_copy_cs_validations = 0;
3694 int vm_page_copy_cs_tainted = 0;
3695
3696 void
3697 vm_page_copy(
3698         vm_page_t       src_m,
3699         vm_page_t       dest_m)
3700 {
3701         XPR(XPR_VM_PAGE,
3702         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3703         src_m->object, src_m->offset,
3704         dest_m->object, dest_m->offset,
3705         0);
3706 #if 0
3707         /*
3708          * we don't hold the page queue lock
3709          * so this check isn't safe to make
3710          */
3711         VM_PAGE_CHECK(src_m);
3712         VM_PAGE_CHECK(dest_m);
3713 #endif
3714         vm_object_lock_assert_held(src_m->object);
3715
3716         /*
3717          * ENCRYPTED SWAP:
3718          * The source page should not be encrypted at this point.
3719          * The destination page will therefore not contain encrypted
3720          * data after the copy.
3721          */
3722         if (src_m->encrypted) {
3723                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3724         }
3725         dest_m->encrypted = FALSE;
3726
3727         if (src_m->object != VM_OBJECT_NULL &&
3728             src_m->object->code_signed) {
3729                 /*
3730                  * We're copying a page from a code-signed object.
3731                  * Whoever ends up mapping the copy page might care about
3732                  * the original page's integrity, so let's validate the
3733                  * source page now.
3734                  */
3735                 vm_page_copy_cs_validations++;
3736                 vm_page_validate_cs(src_m);
3737         }
3738
3739         if (vm_page_is_slideable(src_m)) {
3740                 boolean_t was_busy = src_m->busy;
3741                 src_m->busy = TRUE;
3742                 (void) vm_page_slide(src_m, 0);
3743                 assert(src_m->busy);
3744                 if (!was_busy) {
3745                         PAGE_WAKEUP_DONE(src_m);
3746                 }
3747         }
3748
3749         /*
3750          * Propagate the cs_tainted bit to the copy page. Do not propagate
3751          * the cs_validated bit.
3752          */
3753         dest_m->cs_tainted = src_m->cs_tainted;
3754         if (dest_m->cs_tainted) {
3755                 vm_page_copy_cs_tainted++;
3756         }
3757         dest_m->slid = src_m->slid;
3758         dest_m->error = src_m->error; /* sliding src_m might have failed... */
3759         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3760 }
3761
3762 #if MACH_ASSERT
3763 static void
3764 _vm_page_print(
3765         vm_page_t       p)
3766 {
3767         printf("vm_page %p: \n", p);
3768         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3769         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3770         printf("  next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3771         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3772         printf("  wire_count=%u\n", p->wire_count);
3773
3774         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3775                (p->local ? "" : "!"),
3776                (p->inactive ? "" : "!"),
3777                (p->active ? "" : "!"),
3778                (p->pageout_queue ? "" : "!"),
3779                (p->speculative ? "" : "!"),
3780                (p->laundry ? "" : "!"));
3781         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3782                (p->free ? "" : "!"),
3783                (p->reference ? "" : "!"),
3784                (p->gobbled ? "" : "!"),
3785                (p->private ? "" : "!"),
3786                (p->throttled ? "" : "!"));
3787         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3788                 (p->busy ? "" : "!"),
3789                 (p->wanted ? "" : "!"),
3790                 (p->tabled ? "" : "!"),
3791                 (p->fictitious ? "" : "!"),
3792                 (p->pmapped ? "" : "!"),
3793                 (p->wpmapped ? "" : "!"));
3794         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3795                (p->pageout ? "" : "!"),
3796                (p->absent ? "" : "!"),
3797                (p->error ? "" : "!"),
3798                (p->dirty ? "" : "!"),
3799                (p->cleaning ? "" : "!"),
3800                (p->precious ? "" : "!"),
3801                (p->clustered ? "" : "!"));
3802         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3803                (p->overwriting ? "" : "!"),
3804                (p->restart ? "" : "!"),
3805                (p->unusual ? "" : "!"),
3806                (p->encrypted ? "" : "!"),
3807                (p->encrypted_cleaning ? "" : "!"));
3808         printf("  %scs_validated, %scs_tainted, %sno_cache\n",
3809                (p->cs_validated ? "" : "!"),
3810                (p->cs_tainted ? "" : "!"),
3811                (p->no_cache ? "" : "!"));
3812
3813         printf("phys_page=0x%x\n", p->phys_page);
3814 }
3815
3816 /*
3817  *      Check that the list of pages is ordered by
3818  *      ascending physical address and has no holes.
3819  */
3820 static int
3821 vm_page_verify_contiguous(
3822         vm_page_t       pages,
3823         unsigned int    npages)
3824 {
3825         register vm_page_t      m;
3826         unsigned int            page_count;
3827         vm_offset_t             prev_addr;
3828
3829         prev_addr = pages->phys_page;
3830         page_count = 1;
3831         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3832                 if (m->phys_page != prev_addr + 1) {
3833                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3834                                m, (long)prev_addr, m->phys_page);
3835                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3836                         panic("vm_page_verify_contiguous:  not contiguous!");
3837                 }
3838                 prev_addr = m->phys_page;
3839                 ++page_count;
3840         }
3841         if (page_count != npages) {
3842                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3843                        pages, page_count, npages);
3844                 panic("vm_page_verify_contiguous:  count error");
3845         }
3846         return 1;
3847 }
3848
3849
3850 /*
3851  *      Check the free lists for proper length etc.
3852  */
3853 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
3854 static unsigned int
3855 vm_page_verify_free_list(
3856         queue_head_t    *vm_page_queue,
3857         unsigned int    color,
3858         vm_page_t       look_for_page,
3859         boolean_t       expect_page)
3860 {
3861         unsigned int    npages;
3862         vm_page_t       m;
3863         vm_page_t       prev_m;
3864         boolean_t       found_page;
3865
3866         if (! vm_page_verify_this_free_list_enabled)
3867                 return 0;
3868
3869         found_page = FALSE;
3870         npages = 0;
3871         prev_m = (vm_page_t) vm_page_queue;
3872         queue_iterate(vm_page_queue,
3873                       m,
3874                       vm_page_t,
3875                       pageq) {
3876
3877                 if (m == look_for_page) {
3878                         found_page = TRUE;
3879                 }
3880                 if ((vm_page_t) m->pageq.prev != prev_m)
3881                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3882                               color, npages, m, m->pageq.prev, prev_m);
3883                 if ( ! m->busy )
3884                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3885                               color, npages, m);
3886                 if (color != (unsigned int) -1) {
3887                         if ((m->phys_page & vm_color_mask) != color)
3888                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3889                                       color, npages, m, m->phys_page & vm_color_mask, color);
3890                         if ( ! m->free )
3891                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3892                                       color, npages, m);
3893                 }
3894                 ++npages;
3895                 prev_m = m;
3896         }
3897         if (look_for_page != VM_PAGE_NULL) {
3898                 unsigned int other_color;
3899
3900                 if (expect_page && !found_page) {
3901                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3902                                color, npages, look_for_page, look_for_page->phys_page);
3903                         _vm_page_print(look_for_page);
3904                         for (other_color = 0;
3905                              other_color < vm_colors;
3906                              other_color++) {
3907                                 if (other_color == color)
3908                                         continue;
3909                                 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3910                                                          other_color, look_for_page, FALSE);
3911                         }
3912                         if (color == (unsigned int) -1) {
3913                                 vm_page_verify_free_list(&vm_lopage_queue_free,
3914                                                          (unsigned int) -1, look_for_page, FALSE);
3915                         }
3916                         panic("vm_page_verify_free_list(color=%u)\n", color);
3917                 }
3918                 if (!expect_page && found_page) {
3919                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3920                                color, npages, look_for_page, look_for_page->phys_page);
3921                 }
3922         }
3923         return npages;
3924 }
3925
3926 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
3927 static void
3928 vm_page_verify_free_lists( void )
3929 {
3930         unsigned int    color, npages, nlopages;
3931         boolean_t       toggle = TRUE;
3932
3933         if (! vm_page_verify_all_free_lists_enabled)
3934                 return;
3935
3936         npages = 0;
3937
3938         lck_mtx_lock(&vm_page_queue_free_lock);
3939
3940         if (vm_page_verify_this_free_list_enabled == TRUE) {
3941                 /*
3942                  * This variable has been set globally for extra checking of
3943                  * each free list Q. Since we didn't set it, we don't own it
3944                  * and we shouldn't toggle it.
3945                  */
3946                 toggle = FALSE;
3947         }
3948
3949         if (toggle == TRUE) {
3950                 vm_page_verify_this_free_list_enabled = TRUE;
3951         }
3952
3953         for( color = 0; color < vm_colors; color++ ) {
3954                 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3955                                                    color, VM_PAGE_NULL, FALSE);
3956         }
3957         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3958                                             (unsigned int) -1,
3959                                             VM_PAGE_NULL, FALSE);
3960         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3961                 panic("vm_page_verify_free_lists:  "
3962                       "npages %u free_count %d nlopages %u lo_free_count %u",
3963                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3964
3965         if (toggle == TRUE) {
3966                 vm_page_verify_this_free_list_enabled = FALSE;
3967         }
3968
3969         lck_mtx_unlock(&vm_page_queue_free_lock);
3970 }
3971
3972 void
3973 vm_page_queues_assert(
3974         vm_page_t       mem,
3975         int             val)
3976 {
3977 #if DEBUG
3978         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3979 #endif
3980         if (mem->free + mem->active + mem->inactive + mem->speculative +
3981             mem->throttled + mem->pageout_queue > (val)) {
3982                 _vm_page_print(mem);
3983                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3984         }
3985         if (VM_PAGE_WIRED(mem)) {
3986                 assert(!mem->active);
3987                 assert(!mem->inactive);
3988                 assert(!mem->speculative);
3989                 assert(!mem->throttled);
3990                 assert(!mem->pageout_queue);
3991         }
3992 }
3993 #endif  /* MACH_ASSERT */
3994
3995
3996 /*
3997  *      CONTIGUOUS PAGE ALLOCATION
3998  *
3999  *      Find a region large enough to contain at least n pages
4000  *      of contiguous physical memory.
4001  *
4002  *      This is done by traversing the vm_page_t array in a linear fashion
4003  *      we assume that the vm_page_t array has the avaiable physical pages in an
4004  *      ordered, ascending list... this is currently true of all our implementations
4005  *      and must remain so... there can be 'holes' in the array...  we also can
4006  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4007  *      which use to happen via 'vm_page_convert'... that function was no longer
4008  *      being called and was removed...
4009  *
4010  *      The basic flow consists of stabilizing some of the interesting state of
4011  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4012  *      sweep at the beginning of the array looking for pages that meet our criterea
4013  *      for a 'stealable' page... currently we are pretty conservative... if the page
4014  *      meets this criterea and is physically contiguous to the previous page in the 'run'
4015  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
4016  *      and start to develop a new run... if at this point we've already considered
4017  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4018  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
4019  *      to other threads trying to acquire free pages (or move pages from q to q),
4020  *      and then continue from the spot we left off... we only make 1 pass through the
4021  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
4022  *      which steals the pages from the queues they're currently on... pages on the free
4023  *      queue can be stolen directly... pages that are on any of the other queues
4024  *      must be removed from the object they are tabled on... this requires taking the
4025  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4026  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
4027  *      dump the pages we've currently stolen back to the free list, and pick up our
4028  *      scan from the point where we aborted the 'current' run.
4029  *
4030  *
4031  *      Requirements:
4032  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
4033  *
4034  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4035  *
4036  * Algorithm:
4037  */
4038
4039 #define MAX_CONSIDERED_BEFORE_YIELD     1000
4040
4041
4042 #define RESET_STATE_OF_RUN()    \
4043         MACRO_BEGIN             \
4044         prevcontaddr = -2;      \
4045         start_pnum = -1;        \
4046         free_considered = 0;    \
4047         substitute_needed = 0;  \
4048         npages = 0;             \
4049         MACRO_END
4050
4051 /*
4052  * Can we steal in-use (i.e. not free) pages when searching for
4053  * physically-contiguous pages ?
4054  */
4055 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4056
4057 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
4058 #if DEBUG
4059 int vm_page_find_contig_debug = 0;
4060 #endif
4061
4062 static vm_page_t
4063 vm_page_find_contiguous(
4064         unsigned int    contig_pages,
4065         ppnum_t         max_pnum,
4066         ppnum_t     pnum_mask,
4067         boolean_t       wire,
4068         int             flags)
4069 {
4070         vm_page_t       m = NULL;
4071         ppnum_t         prevcontaddr;
4072         ppnum_t         start_pnum;
4073         unsigned int    npages, considered, scanned;
4074         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
4075         unsigned int    idx_last_contig_page_found = 0;
4076         int             free_considered, free_available;
4077         int             substitute_needed;
4078         boolean_t       wrapped;
4079 #if DEBUG
4080         clock_sec_t     tv_start_sec, tv_end_sec;
4081         clock_usec_t    tv_start_usec, tv_end_usec;
4082 #endif
4083 #if MACH_ASSERT
4084         int             yielded = 0;
4085         int             dumped_run = 0;
4086         int             stolen_pages = 0;
4087         int             compressed_pages = 0;
4088 #endif
4089
4090         if (contig_pages == 0)
4091                 return VM_PAGE_NULL;
4092
4093 #if MACH_ASSERT
4094         vm_page_verify_free_lists();
4095 #endif
4096 #if DEBUG
4097         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4098 #endif
4099         PAGE_REPLACEMENT_ALLOWED(TRUE);
4100
4101         vm_page_lock_queues();
4102         lck_mtx_lock(&vm_page_queue_free_lock);
4103
4104         RESET_STATE_OF_RUN();
4105
4106         scanned = 0;
4107         considered = 0;
4108         free_available = vm_page_free_count - vm_page_free_reserved;
4109
4110         wrapped = FALSE;
4111
4112         if(flags & KMA_LOMEM)
4113                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4114         else
4115                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
4116
4117         orig_last_idx = idx_last_contig_page_found;
4118         last_idx = orig_last_idx;
4119
4120         for (page_idx = last_idx, start_idx = last_idx;
4121              npages < contig_pages && page_idx < vm_pages_count;
4122              page_idx++) {
4123 retry:
4124                 if (wrapped &&
4125                     npages == 0 &&
4126                     page_idx >= orig_last_idx) {
4127                         /*
4128                          * We're back where we started and we haven't
4129                          * found any suitable contiguous range.  Let's
4130                          * give up.
4131                          */
4132                         break;
4133                 }
4134                 scanned++;
4135                 m = &vm_pages[page_idx];
4136
4137                 assert(!m->fictitious);
4138                 assert(!m->private);
4139
4140                 if (max_pnum && m->phys_page > max_pnum) {
4141                         /* no more low pages... */
4142                         break;
4143                 }
4144                 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4145                         /*
4146                          * not aligned
4147                          */
4148                         RESET_STATE_OF_RUN();
4149
4150                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4151                            m->encrypted_cleaning ||
4152                            m->pageout_queue || m->laundry || m->wanted ||
4153                            m->cleaning || m->overwriting || m->pageout) {
4154                         /*
4155                          * page is in a transient state
4156                          * or a state we don't want to deal
4157                          * with, so don't consider it which
4158                          * means starting a new run
4159                          */
4160                         RESET_STATE_OF_RUN();
4161
4162                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4163                         /*
4164                          * page needs to be on one of our queues
4165                          * or it needs to belong to the compressor pool
4166                          * in order for it to be stable behind the
4167                          * locks we hold at this point...
4168                          * if not, don't consider it which
4169                          * means starting a new run
4170                          */
4171                         RESET_STATE_OF_RUN();
4172
4173                 } else if (!m->free && (!m->tabled || m->busy)) {
4174                         /*
4175                          * pages on the free list are always 'busy'
4176                          * so we couldn't test for 'busy' in the check
4177                          * for the transient states... pages that are
4178                          * 'free' are never 'tabled', so we also couldn't
4179                          * test for 'tabled'.  So we check here to make
4180                          * sure that a non-free page is not busy and is
4181                          * tabled on an object...
4182                          * if not, don't consider it which
4183                          * means starting a new run
4184                          */
4185                         RESET_STATE_OF_RUN();
4186
4187                 } else {
4188                         if (m->phys_page != prevcontaddr + 1) {
4189                                 if ((m->phys_page & pnum_mask) != 0) {
4190                                         RESET_STATE_OF_RUN();
4191                                         goto did_consider;
4192                                 } else {
4193                                         npages = 1;
4194                                         start_idx = page_idx;
4195                                         start_pnum = m->phys_page;
4196                                 }
4197                         } else {
4198                                 npages++;
4199                         }
4200                         prevcontaddr = m->phys_page;
4201
4202                         VM_PAGE_CHECK(m);
4203                         if (m->free) {
4204                                 free_considered++;
4205                         } else {
4206                                 /*
4207                                  * This page is not free.
4208                                  * If we can't steal used pages,
4209                                  * we have to give up this run
4210                                  * and keep looking.
4211                                  * Otherwise, we might need to
4212                                  * move the contents of this page
4213                                  * into a substitute page.
4214                                  */
4215 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4216                                 if (m->pmapped || m->dirty || m->precious) {
4217                                         substitute_needed++;
4218                                 }
4219 #else
4220                                 RESET_STATE_OF_RUN();
4221 #endif
4222                         }
4223
4224                         if ((free_considered + substitute_needed) > free_available) {
4225                                 /*
4226                                  * if we let this run continue
4227                                  * we will end up dropping the vm_page_free_count
4228                                  * below the reserve limit... we need to abort
4229                                  * this run, but we can at least re-consider this
4230                                  * page... thus the jump back to 'retry'
4231                                  */
4232                                 RESET_STATE_OF_RUN();
4233
4234                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4235                                         considered++;
4236                                         goto retry;
4237                                 }
4238                                 /*
4239                                  * free_available == 0
4240                                  * so can't consider any free pages... if
4241                                  * we went to retry in this case, we'd
4242                                  * get stuck looking at the same page
4243                                  * w/o making any forward progress
4244                                  * we also want to take this path if we've already
4245                                  * reached our limit that controls the lock latency
4246                                  */
4247                         }
4248                 }
4249 did_consider:
4250                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4251
4252                         PAGE_REPLACEMENT_ALLOWED(FALSE);
4253
4254                         lck_mtx_unlock(&vm_page_queue_free_lock);
4255                         vm_page_unlock_queues();
4256
4257                         mutex_pause(0);
4258
4259                         PAGE_REPLACEMENT_ALLOWED(TRUE);
4260
4261                         vm_page_lock_queues();
4262                         lck_mtx_lock(&vm_page_queue_free_lock);
4263
4264                         RESET_STATE_OF_RUN();
4265                         /*
4266                          * reset our free page limit since we
4267                          * dropped the lock protecting the vm_page_free_queue
4268                          */
4269                         free_available = vm_page_free_count - vm_page_free_reserved;
4270                         considered = 0;
4271 #if MACH_ASSERT
4272                         yielded++;
4273 #endif
4274                         goto retry;
4275                 }
4276                 considered++;
4277         }
4278         m = VM_PAGE_NULL;
4279
4280         if (npages != contig_pages) {
4281                 if (!wrapped) {
4282                         /*
4283                          * We didn't find a contiguous range but we didn't
4284                          * start from the very first page.
4285                          * Start again from the very first page.
4286                          */
4287                         RESET_STATE_OF_RUN();
4288                         if( flags & KMA_LOMEM)
4289                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
4290                         else
4291                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4292                         last_idx = 0;
4293                         page_idx = last_idx;
4294                         wrapped = TRUE;
4295                         goto retry;
4296                 }
4297                 lck_mtx_unlock(&vm_page_queue_free_lock);
4298         } else {
4299                 vm_page_t       m1;
4300                 vm_page_t       m2;
4301                 unsigned int    cur_idx;
4302                 unsigned int    tmp_start_idx;
4303                 vm_object_t     locked_object = VM_OBJECT_NULL;
4304                 boolean_t       abort_run = FALSE;
4305
4306                 assert(page_idx - start_idx == contig_pages);
4307
4308                 tmp_start_idx = start_idx;
4309
4310                 /*
4311                  * first pass through to pull the free pages
4312                  * off of the free queue so that in case we
4313                  * need substitute pages, we won't grab any
4314                  * of the free pages in the run... we'll clear
4315                  * the 'free' bit in the 2nd pass, and even in
4316                  * an abort_run case, we'll collect all of the
4317                  * free pages in this run and return them to the free list
4318                  */
4319                 while (start_idx < page_idx) {
4320
4321                         m1 = &vm_pages[start_idx++];
4322
4323 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4324                         assert(m1->free);
4325 #endif
4326
4327                         if (m1->free) {
4328                                 unsigned int color;
4329
4330                                 color = m1->phys_page & vm_color_mask;
4331 #if MACH_ASSERT
4332                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4333 #endif
4334                                 queue_remove(&vm_page_queue_free[color],
4335                                              m1,
4336                                              vm_page_t,
4337                                              pageq);
4338                                 m1->pageq.next = NULL;
4339                                 m1->pageq.prev = NULL;
4340 #if MACH_ASSERT
4341                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4342 #endif
4343                                 /*
4344                                  * Clear the "free" bit so that this page
4345                                  * does not get considered for another
4346                                  * concurrent physically-contiguous allocation.
4347                                  */
4348                                 m1->free = FALSE;
4349                                 assert(m1->busy);
4350
4351                                 vm_page_free_count--;
4352                         }
4353                 }
4354                 if( flags & KMA_LOMEM)
4355                         vm_page_lomem_find_contiguous_last_idx = page_idx;
4356                 else
4357                         vm_page_find_contiguous_last_idx = page_idx;
4358
4359                 /*
4360                  * we can drop the free queue lock at this point since
4361                  * we've pulled any 'free' candidates off of the list
4362                  * we need it dropped so that we can do a vm_page_grab
4363                  * when substituing for pmapped/dirty pages
4364                  */
4365                 lck_mtx_unlock(&vm_page_queue_free_lock);
4366
4367                 start_idx = tmp_start_idx;
4368                 cur_idx = page_idx - 1;
4369
4370                 while (start_idx++ < page_idx) {
4371                         /*
4372                          * must go through the list from back to front
4373                          * so that the page list is created in the
4374                          * correct order - low -> high phys addresses
4375                          */
4376                         m1 = &vm_pages[cur_idx--];
4377
4378                         assert(!m1->free);
4379
4380                         if (m1->object == VM_OBJECT_NULL) {
4381                                 /*
4382                                  * page has already been removed from
4383                                  * the free list in the 1st pass
4384                                  */
4385                                 assert(m1->offset == (vm_object_offset_t) -1);
4386                                 assert(m1->busy);
4387                                 assert(!m1->wanted);
4388                                 assert(!m1->laundry);
4389                         } else {
4390                                 vm_object_t object;
4391                                 int refmod;
4392                                 boolean_t disconnected, reusable;
4393
4394                                 if (abort_run == TRUE)
4395                                         continue;
4396
4397                                 object = m1->object;
4398
4399                                 if (object != locked_object) {
4400                                         if (locked_object) {
4401                                                 vm_object_unlock(locked_object);
4402                                                 locked_object = VM_OBJECT_NULL;
4403                                         }
4404                                         if (vm_object_lock_try(object))
4405                                                 locked_object = object;
4406                                 }
4407                                 if (locked_object == VM_OBJECT_NULL ||
4408                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
4409                                      m1->encrypted_cleaning ||
4410                                      m1->pageout_queue || m1->laundry || m1->wanted ||
4411                                      m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4412
4413                                         if (locked_object) {
4414                                                 vm_object_unlock(locked_object);
4415                                                 locked_object = VM_OBJECT_NULL;
4416                                         }
4417                                         tmp_start_idx = cur_idx;
4418                                         abort_run = TRUE;
4419                                         continue;
4420                                 }
4421
4422                                 disconnected = FALSE;
4423                                 reusable = FALSE;
4424
4425                                 if ((m1->reusable ||
4426                                      m1->object->all_reusable) &&
4427                                     m1->inactive &&
4428                                     !m1->dirty &&
4429                                     !m1->reference) {
4430                                         /* reusable page... */
4431                                         refmod = pmap_disconnect(m1->phys_page);
4432                                         disconnected = TRUE;
4433                                         if (refmod == 0) {
4434                                                 /*
4435                                                  * ... not reused: can steal
4436                                                  * without relocating contents.
4437                                                  */
4438                                                 reusable = TRUE;
4439                                         }
4440                                 }
4441
4442                                 if ((m1->pmapped &&
4443                                      ! reusable) ||
4444                                     m1->dirty ||
4445                                     m1->precious) {
4446                                         vm_object_offset_t offset;
4447
4448                                         m2 = vm_page_grab();
4449
4450                                         if (m2 == VM_PAGE_NULL) {
4451                                                 if (locked_object) {
4452                                                         vm_object_unlock(locked_object);
4453                                                         locked_object = VM_OBJECT_NULL;
4454                                                 }
4455                                                 tmp_start_idx = cur_idx;
4456                                                 abort_run = TRUE;
4457                                                 continue;
4458                                         }
4459                                         if (! disconnected) {
4460                                                 if (m1->pmapped)
4461                                                         refmod = pmap_disconnect(m1->phys_page);
4462                                                 else
4463                                                         refmod = 0;
4464                                         }
4465
4466                                         /* copy the page's contents */
4467                                         pmap_copy_page(m1->phys_page, m2->phys_page);
4468                                         /* copy the page's state */
4469                                         assert(!VM_PAGE_WIRED(m1));
4470                                         assert(!m1->free);
4471                                         assert(!m1->pageout_queue);
4472                                         assert(!m1->laundry);
4473                                         m2->reference   = m1->reference;
4474                                         assert(!m1->gobbled);
4475                                         assert(!m1->private);
4476                                         m2->no_cache    = m1->no_cache;
4477                                         m2->xpmapped    = 0;
4478                                         assert(!m1->busy);
4479                                         assert(!m1->wanted);
4480                                         assert(!m1->fictitious);
4481                                         m2->pmapped     = m1->pmapped; /* should flush cache ? */
4482                                         m2->wpmapped    = m1->wpmapped;
4483                                         assert(!m1->pageout);
4484                                         m2->absent      = m1->absent;
4485                                         m2->error       = m1->error;
4486                                         m2->dirty       = m1->dirty;
4487                                         assert(!m1->cleaning);
4488                                         m2->precious    = m1->precious;
4489                                         m2->clustered   = m1->clustered;
4490                                         assert(!m1->overwriting);
4491                                         m2->restart     = m1->restart;
4492                                         m2->unusual     = m1->unusual;
4493                                         m2->encrypted   = m1->encrypted;
4494                                         assert(!m1->encrypted_cleaning);
4495                                         m2->cs_validated = m1->cs_validated;
4496                                         m2->cs_tainted  = m1->cs_tainted;
4497
4498                                         /*
4499                                          * If m1 had really been reusable,
4500                                          * we would have just stolen it, so
4501                                          * let's not propagate it's "reusable"
4502                                          * bit and assert that m2 is not
4503                                          * marked as "reusable".
4504                                          */
4505                                         // m2->reusable = m1->reusable;
4506                                         assert(!m2->reusable);
4507
4508                                         assert(!m1->lopage);
4509                                         m2->slid        = m1->slid;
4510                                         m2->compressor  = m1->compressor;
4511
4512                                         /*
4513                                          * page may need to be flushed if
4514                                          * it is marshalled into a UPL
4515                                          * that is going to be used by a device
4516                                          * that doesn't support coherency
4517                                          */
4518                                         m2->written_by_kernel = TRUE;
4519
4520                                         /*
4521                                          * make sure we clear the ref/mod state
4522                                          * from the pmap layer... else we risk
4523                                          * inheriting state from the last time
4524                                          * this page was used...
4525                                          */
4526                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4527
4528                                         if (refmod & VM_MEM_REFERENCED)
4529                                                 m2->reference = TRUE;
4530                                         if (refmod & VM_MEM_MODIFIED) {
4531                                                 SET_PAGE_DIRTY(m2, TRUE);
4532                                         }
4533                                         offset = m1->offset;
4534
4535                                         /*
4536                                          * completely cleans up the state
4537                                          * of the page so that it is ready
4538                                          * to be put onto the free list, or
4539                                          * for this purpose it looks like it
4540                                          * just came off of the free list
4541                                          */
4542                                         vm_page_free_prepare(m1);
4543
4544                                         /*
4545                                          * now put the substitute page
4546                                          * on the object
4547                                          */
4548                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4549
4550                                         if (m2->compressor) {
4551                                                 m2->pmapped = TRUE;
4552                                                 m2->wpmapped = TRUE;
4553
4554                                                 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4555                                                            VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4556 #if MACH_ASSERT
4557                                                 compressed_pages++;
4558 #endif
4559                                         } else {
4560                                                 if (m2->reference)
4561                                                         vm_page_activate(m2);
4562                                                 else
4563                                                         vm_page_deactivate(m2);
4564                                         }
4565                                         PAGE_WAKEUP_DONE(m2);
4566
4567                                 } else {
4568                                         assert(!m1->compressor);
4569
4570                                         /*
4571                                          * completely cleans up the state
4572                                          * of the page so that it is ready
4573                                          * to be put onto the free list, or
4574                                          * for this purpose it looks like it
4575                                          * just came off of the free list
4576                                          */
4577                                         vm_page_free_prepare(m1);
4578                                 }
4579 #if MACH_ASSERT
4580                                 stolen_pages++;
4581 #endif
4582                         }
4583                         m1->pageq.next = (queue_entry_t) m;
4584                         m1->pageq.prev = NULL;
4585                         m = m1;
4586                 }
4587                 if (locked_object) {
4588                         vm_object_unlock(locked_object);
4589                         locked_object = VM_OBJECT_NULL;
4590                 }
4591
4592                 if (abort_run == TRUE) {
4593                         if (m != VM_PAGE_NULL) {
4594                                 vm_page_free_list(m, FALSE);
4595                         }
4596 #if MACH_ASSERT
4597                         dumped_run++;
4598 #endif
4599                         /*
4600                          * want the index of the last
4601                          * page in this run that was
4602                          * successfully 'stolen', so back
4603                          * it up 1 for the auto-decrement on use
4604                          * and 1 more to bump back over this page
4605                          */
4606                         page_idx = tmp_start_idx + 2;
4607                         if (page_idx >= vm_pages_count) {
4608                                 if (wrapped)
4609                                         goto done_scanning;
4610                                 page_idx = last_idx = 0;
4611                                 wrapped = TRUE;
4612                         }
4613                         abort_run = FALSE;
4614
4615                         /*
4616                          * We didn't find a contiguous range but we didn't
4617                          * start from the very first page.
4618                          * Start again from the very first page.
4619                          */
4620                         RESET_STATE_OF_RUN();
4621
4622                         if( flags & KMA_LOMEM)
4623                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4624                         else
4625                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4626
4627                         last_idx = page_idx;
4628
4629                         lck_mtx_lock(&vm_page_queue_free_lock);
4630                         /*
4631                         * reset our free page limit since we
4632                         * dropped the lock protecting the vm_page_free_queue
4633                         */
4634                         free_available = vm_page_free_count - vm_page_free_reserved;
4635                         goto retry;
4636                 }
4637
4638                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4639
4640                         if (wire == TRUE)
4641                                 m1->wire_count++;
4642                         else
4643                                 m1->gobbled = TRUE;
4644                 }
4645                 if (wire == FALSE)
4646                         vm_page_gobble_count += npages;
4647
4648                 /*
4649                  * gobbled pages are also counted as wired pages
4650                  */
4651                 vm_page_wire_count += npages;
4652
4653                 assert(vm_page_verify_contiguous(m, npages));
4654         }
4655 done_scanning:
4656         PAGE_REPLACEMENT_ALLOWED(FALSE);
4657
4658         vm_page_unlock_queues();
4659
4660 #if DEBUG
4661         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4662
4663         tv_end_sec -= tv_start_sec;
4664         if (tv_end_usec < tv_start_usec) {
4665                 tv_end_sec--;
4666                 tv_end_usec += 1000000;
4667         }
4668         tv_end_usec -= tv_start_usec;
4669         if (tv_end_usec >= 1000000) {
4670                 tv_end_sec++;
4671                 tv_end_sec -= 1000000;
4672         }
4673         if (vm_page_find_contig_debug) {
4674                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages\n",
4675                        __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4676                        (long)tv_end_sec, tv_end_usec, orig_last_idx,
4677                        scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4678         }
4679
4680 #endif
4681 #if MACH_ASSERT
4682         vm_page_verify_free_lists();
4683 #endif
4684         return m;
4685 }
4686
4687 /*
4688  *      Allocate a list of contiguous, wired pages.
4689  */
4690 kern_return_t
4691 cpm_allocate(
4692         vm_size_t       size,
4693         vm_page_t       *list,
4694         ppnum_t         max_pnum,
4695         ppnum_t         pnum_mask,
4696         boolean_t       wire,
4697         int             flags)
4698 {
4699         vm_page_t               pages;
4700         unsigned int            npages;
4701
4702         if (size % PAGE_SIZE != 0)
4703                 return KERN_INVALID_ARGUMENT;
4704
4705         npages = (unsigned int) (size / PAGE_SIZE);
4706         if (npages != size / PAGE_SIZE) {
4707                 /* 32-bit overflow */
4708                 return KERN_INVALID_ARGUMENT;
4709         }
4710
4711         /*
4712          *      Obtain a pointer to a subset of the free
4713          *      list large enough to satisfy the request;
4714          *      the region will be physically contiguous.
4715          */
4716         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4717
4718         if (pages == VM_PAGE_NULL)
4719                 return KERN_NO_SPACE;
4720         /*
4721          * determine need for wakeups
4722          */
4723         if ((vm_page_free_count < vm_page_free_min) ||
4724              ((vm_page_free_count < vm_page_free_target) &&
4725               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4726                  thread_wakeup((event_t) &vm_page_free_wanted);
4727
4728         VM_CHECK_MEMORYSTATUS;
4729
4730         /*
4731          *      The CPM pages should now be available and
4732          *      ordered by ascending physical address.
4733          */
4734         assert(vm_page_verify_contiguous(pages, npages));
4735
4736         *list = pages;
4737         return KERN_SUCCESS;
4738 }
4739
4740
4741 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4742
4743 /*
4744  * when working on a 'run' of pages, it is necessary to hold
4745  * the vm_page_queue_lock (a hot global lock) for certain operations
4746  * on the page... however, the majority of the work can be done
4747  * while merely holding the object lock... in fact there are certain
4748  * collections of pages that don't require any work brokered by the
4749  * vm_page_queue_lock... to mitigate the time spent behind the global
4750  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4751  * while doing all of the work that doesn't require the vm_page_queue_lock...
4752  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4753  * necessary work for each page... we will grab the busy bit on the page
4754  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4755  * if it can't immediately take the vm_page_queue_lock in order to compete
4756  * for the locks in the same order that vm_pageout_scan takes them.
4757  * the operation names are modeled after the names of the routines that
4758  * need to be called in order to make the changes very obvious in the
4759  * original loop
4760  */
4761
4762 void
4763 vm_page_do_delayed_work(
4764         vm_object_t     object,
4765         struct vm_page_delayed_work *dwp,
4766         int             dw_count)
4767 {
4768         int             j;
4769         vm_page_t       m;
4770         vm_page_t       local_free_q = VM_PAGE_NULL;
4771
4772         /*
4773          * pageout_scan takes the vm_page_lock_queues first
4774          * then tries for the object lock... to avoid what
4775          * is effectively a lock inversion, we'll go to the
4776          * trouble of taking them in that same order... otherwise
4777          * if this object contains the majority of the pages resident
4778          * in the UBC (or a small set of large objects actively being
4779          * worked on contain the majority of the pages), we could
4780          * cause the pageout_scan thread to 'starve' in its attempt
4781          * to find pages to move to the free queue, since it has to
4782          * successfully acquire the object lock of any candidate page
4783          * before it can steal/clean it.
4784          */
4785         if (!vm_page_trylockspin_queues()) {
4786                 vm_object_unlock(object);
4787
4788                 vm_page_lockspin_queues();
4789
4790                 for (j = 0; ; j++) {
4791                         if (!vm_object_lock_avoid(object) &&
4792                             _vm_object_lock_try(object))
4793                                 break;
4794                         vm_page_unlock_queues();
4795                         mutex_pause(j);
4796                         vm_page_lockspin_queues();
4797                 }
4798         }
4799         for (j = 0; j < dw_count; j++, dwp++) {
4800
4801                 m = dwp->dw_m;
4802
4803                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4804                         vm_pageout_throttle_up(m);
4805 #if CONFIG_PHANTOM_CACHE
4806                 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4807                         vm_phantom_cache_update(m);
4808 #endif
4809                 if (dwp->dw_mask & DW_vm_page_wire)
4810                         vm_page_wire(m);
4811                 else if (dwp->dw_mask & DW_vm_page_unwire) {
4812                         boolean_t       queueit;
4813
4814                         queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4815
4816                         vm_page_unwire(m, queueit);
4817                 }
4818                 if (dwp->dw_mask & DW_vm_page_free) {
4819                         vm_page_free_prepare_queues(m);
4820
4821                         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4822                         /*
4823                          * Add this page to our list of reclaimed pages,
4824                          * to be freed later.
4825                          */
4826                         m->pageq.next = (queue_entry_t) local_free_q;
4827                         local_free_q = m;
4828                 } else {
4829                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4830                                 vm_page_deactivate_internal(m, FALSE);
4831                         else if (dwp->dw_mask & DW_vm_page_activate) {
4832                                 if (m->active == FALSE) {
4833                                         vm_page_activate(m);
4834                                 }
4835                         }
4836                         else if (dwp->dw_mask & DW_vm_page_speculate)
4837                                 vm_page_speculate(m, TRUE);
4838                         else if (dwp->dw_mask & DW_enqueue_cleaned) {
4839                                 /*
4840                                  * if we didn't hold the object lock and did this,
4841                                  * we might disconnect the page, then someone might
4842                                  * soft fault it back in, then we would put it on the
4843                                  * cleaned queue, and so we would have a referenced (maybe even dirty)
4844                                  * page on that queue, which we don't want
4845                                  */
4846                                 int refmod_state = pmap_disconnect(m->phys_page);
4847
4848                                 if ((refmod_state & VM_MEM_REFERENCED)) {
4849                                         /*
4850                                          * this page has been touched since it got cleaned; let's activate it
4851                                          * if it hasn't already been
4852                                          */
4853                                         vm_pageout_enqueued_cleaned++;
4854                                         vm_pageout_cleaned_reactivated++;
4855                                         vm_pageout_cleaned_commit_reactivated++;
4856
4857                                         if (m->active == FALSE)
4858                                                 vm_page_activate(m);
4859                                 } else {
4860                                         m->reference = FALSE;
4861                                         vm_page_enqueue_cleaned(m);
4862                                 }
4863                         }
4864                         else if (dwp->dw_mask & DW_vm_page_lru)
4865                                 vm_page_lru(m);
4866                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4867                                 if ( !m->pageout_queue)
4868                                         VM_PAGE_QUEUES_REMOVE(m);
4869                         }
4870                         if (dwp->dw_mask & DW_set_reference)
4871                                 m->reference = TRUE;
4872                         else if (dwp->dw_mask & DW_clear_reference)
4873                                 m->reference = FALSE;
4874
4875                         if (dwp->dw_mask & DW_move_page) {
4876                                 if ( !m->pageout_queue) {
4877                                         VM_PAGE_QUEUES_REMOVE(m);
4878
4879                                         assert(m->object != kernel_object);
4880
4881                                         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4882                                 }
4883                         }
4884                         if (dwp->dw_mask & DW_clear_busy)
4885                                 m->busy = FALSE;
4886
4887                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
4888                                 PAGE_WAKEUP(m);
4889                 }
4890         }
4891         vm_page_unlock_queues();
4892
4893         if (local_free_q)
4894                 vm_page_free_list(local_free_q, TRUE);
4895
4896         VM_CHECK_MEMORYSTATUS;
4897
4898 }
4899
4900 kern_return_t
4901 vm_page_alloc_list(
4902         int     page_count,
4903         int     flags,
4904         vm_page_t *list)
4905 {
4906         vm_page_t       lo_page_list = VM_PAGE_NULL;
4907         vm_page_t       mem;
4908         int             i;
4909
4910         if ( !(flags & KMA_LOMEM))
4911                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4912
4913         for (i = 0; i < page_count; i++) {
4914
4915                 mem = vm_page_grablo();
4916
4917                 if (mem == VM_PAGE_NULL) {
4918                         if (lo_page_list)
4919                                 vm_page_free_list(lo_page_list, FALSE);
4920
4921                         *list = VM_PAGE_NULL;
4922
4923                         return (KERN_RESOURCE_SHORTAGE);
4924                 }
4925                 mem->pageq.next = (queue_entry_t) lo_page_list;
4926                 lo_page_list = mem;
4927         }
4928         *list = lo_page_list;
4929
4930         return (KERN_SUCCESS);
4931 }
4932
4933 void
4934 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4935 {
4936         page->offset = offset;
4937 }
4938
4939 vm_page_t
4940 vm_page_get_next(vm_page_t page)
4941 {
4942         return ((vm_page_t) page->pageq.next);
4943 }
4944
4945 vm_object_offset_t
4946 vm_page_get_offset(vm_page_t page)
4947 {
4948         return (page->offset);
4949 }
4950
4951 ppnum_t
4952 vm_page_get_phys_page(vm_page_t page)
4953 {
4954         return (page->phys_page);
4955 }
4956
4957
4958 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4959
4960 #if HIBERNATION
4961
4962 static vm_page_t hibernate_gobble_queue;
4963
4964 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4965
4966 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4967 static int  hibernate_flush_dirty_pages(int);
4968 static int  hibernate_flush_queue(queue_head_t *, int);
4969
4970 void hibernate_flush_wait(void);
4971 void hibernate_mark_in_progress(void);
4972 void hibernate_clear_in_progress(void);
4973
4974 void            hibernate_free_range(int, int);
4975 void            hibernate_hash_insert_page(vm_page_t);
4976 uint32_t        hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4977 void            hibernate_rebuild_vm_structs(void);
4978 uint32_t        hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4979 ppnum_t         hibernate_lookup_paddr(unsigned int);
4980
4981 struct hibernate_statistics {
4982         int hibernate_considered;
4983         int hibernate_reentered_on_q;
4984         int hibernate_found_dirty;
4985         int hibernate_skipped_cleaning;
4986         int hibernate_skipped_transient;
4987         int hibernate_skipped_precious;
4988         int hibernate_skipped_external;
4989         int hibernate_queue_nolock;
4990         int hibernate_queue_paused;
4991         int hibernate_throttled;
4992         int hibernate_throttle_timeout;
4993         int hibernate_drained;
4994         int hibernate_drain_timeout;
4995         int cd_lock_failed;
4996         int cd_found_precious;
4997         int cd_found_wired;
4998         int cd_found_busy;
4999         int cd_found_unusual;
5000         int cd_found_cleaning;
5001         int cd_found_laundry;
5002         int cd_found_dirty;
5003         int cd_found_xpmapped;
5004         int cd_skipped_xpmapped;
5005         int cd_local_free;
5006         int cd_total_free;
5007         int cd_vm_page_wire_count;
5008         int cd_vm_struct_pages_unneeded;
5009         int cd_pages;
5010         int cd_discarded;
5011         int cd_count_wire;
5012 } hibernate_stats;
5013
5014
5015 /*
5016  * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5017  * so that we don't overrun the estimated image size, which would
5018  * result in a hibernation failure.
5019  */
5020 #define HIBERNATE_XPMAPPED_LIMIT        40000
5021
5022
5023 static int
5024 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5025 {
5026         wait_result_t   wait_result;
5027
5028         vm_page_lock_queues();
5029
5030         while ( !queue_empty(&q->pgo_pending) ) {
5031
5032                 q->pgo_draining = TRUE;
5033
5034                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5035
5036                 vm_page_unlock_queues();
5037
5038                 wait_result = thread_block(THREAD_CONTINUE_NULL);
5039
5040                 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5041                         hibernate_stats.hibernate_drain_timeout++;
5042
5043                         if (q == &vm_pageout_queue_external)
5044                                 return (0);
5045
5046                         return (1);
5047                 }
5048                 vm_page_lock_queues();
5049
5050                 hibernate_stats.hibernate_drained++;
5051         }
5052         vm_page_unlock_queues();
5053
5054         return (0);
5055 }
5056
5057
5058 boolean_t hibernate_skip_external = FALSE;
5059
5060 static int
5061 hibernate_flush_queue(queue_head_t *q, int qcount)
5062 {
5063         vm_page_t       m;
5064         vm_object_t     l_object = NULL;
5065         vm_object_t     m_object = NULL;
5066         int             refmod_state = 0;
5067         int             try_failed_count = 0;
5068         int             retval = 0;
5069         int             current_run = 0;
5070         struct  vm_pageout_queue *iq;
5071         struct  vm_pageout_queue *eq;
5072         struct  vm_pageout_queue *tq;
5073
5074
5075         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5076
5077         iq = &vm_pageout_queue_internal;
5078         eq = &vm_pageout_queue_external;
5079
5080         vm_page_lock_queues();
5081
5082         while (qcount && !queue_empty(q)) {
5083
5084                 if (current_run++ == 1000) {
5085                         if (hibernate_should_abort()) {
5086                                 retval = 1;
5087                                 break;
5088                         }
5089                         current_run = 0;
5090                 }
5091
5092                 m = (vm_page_t) queue_first(q);
5093                 m_object = m->object;
5094
5095                 /*
5096                  * check to see if we currently are working
5097                  * with the same object... if so, we've
5098                  * already got the lock
5099                  */
5100                 if (m_object != l_object) {
5101                         /*
5102                          * the object associated with candidate page is
5103                          * different from the one we were just working
5104                          * with... dump the lock if we still own it
5105                          */
5106                         if (l_object != NULL) {
5107                                 vm_object_unlock(l_object);
5108                                 l_object = NULL;
5109                         }
5110                         /*
5111                          * Try to lock object; since we've alread got the
5112                          * page queues lock, we can only 'try' for this one.
5113                          * if the 'try' fails, we need to do a mutex_pause
5114                          * to allow the owner of the object lock a chance to
5115                          * run...
5116                          */
5117                         if ( !vm_object_lock_try_scan(m_object)) {
5118
5119                                 if (try_failed_count > 20) {
5120                                         hibernate_stats.hibernate_queue_nolock++;
5121
5122                                         goto reenter_pg_on_q;
5123                                 }
5124
5125                                 vm_page_unlock_queues();
5126                                 mutex_pause(try_failed_count++);
5127                                 vm_page_lock_queues();
5128
5129                                 hibernate_stats.hibernate_queue_paused++;
5130                                 continue;
5131                         } else {
5132                                 l_object = m_object;
5133                         }
5134                 }
5135                 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5136                         /*
5137                          * page is not to be cleaned
5138                          * put it back on the head of its queue
5139                          */
5140                         if (m->cleaning)
5141                                 hibernate_stats.hibernate_skipped_cleaning++;
5142                         else
5143                                 hibernate_stats.hibernate_skipped_transient++;
5144
5145                         goto reenter_pg_on_q;
5146                 }
5147                 if (m_object->copy == VM_OBJECT_NULL) {
5148                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5149                                 /*
5150                                  * let the normal hibernate image path
5151                                  * deal with these
5152                                  */
5153                                 goto reenter_pg_on_q;
5154                         }
5155                 }
5156                 if ( !m->dirty && m->pmapped) {
5157                         refmod_state = pmap_get_refmod(m->phys_page);
5158
5159                         if ((refmod_state & VM_MEM_MODIFIED)) {
5160                                 SET_PAGE_DIRTY(m, FALSE);
5161                         }
5162                 } else
5163                         refmod_state = 0;
5164
5165                 if ( !m->dirty) {
5166                         /*
5167                          * page is not to be cleaned
5168                          * put it back on the head of its queue
5169                          */
5170                         if (m->precious)
5171                                 hibernate_stats.hibernate_skipped_precious++;
5172
5173                         goto reenter_pg_on_q;
5174                 }
5175
5176                 if (hibernate_skip_external == TRUE && !m_object->internal) {
5177
5178                         hibernate_stats.hibernate_skipped_external++;
5179
5180                         goto reenter_pg_on_q;
5181                 }
5182                 tq = NULL;
5183
5184                 if (m_object->internal) {
5185                         if (VM_PAGE_Q_THROTTLED(iq))
5186                                 tq = iq;
5187                 } else if (VM_PAGE_Q_THROTTLED(eq))
5188                         tq = eq;
5189
5190                 if (tq != NULL) {
5191                         wait_result_t   wait_result;
5192                         int             wait_count = 5;
5193
5194                         if (l_object != NULL) {
5195                                 vm_object_unlock(l_object);
5196                                 l_object = NULL;
5197                         }
5198
5199                         while (retval == 0) {
5200
5201                                 tq->pgo_throttled = TRUE;
5202
5203                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5204
5205                                 vm_page_unlock_queues();
5206
5207                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
5208
5209                                 vm_page_lock_queues();
5210
5211                                 if (wait_result != THREAD_TIMED_OUT)
5212                                         break;
5213                                 if (!VM_PAGE_Q_THROTTLED(tq))
5214                                         break;
5215
5216                                 if (hibernate_should_abort())
5217                                         retval = 1;
5218
5219                                 if (--wait_count == 0) {
5220
5221                                         hibernate_stats.hibernate_throttle_timeout++;
5222
5223                                         if (tq == eq) {
5224                                                 hibernate_skip_external = TRUE;
5225                                                 break;
5226                                         }
5227                                         retval = 1;
5228                                 }
5229                         }
5230                         if (retval)
5231                                 break;
5232
5233                         hibernate_stats.hibernate_throttled++;
5234
5235                         continue;
5236                 }
5237                 /*
5238                  * we've already factored out pages in the laundry which
5239                  * means this page can't be on the pageout queue so it's
5240                  * safe to do the VM_PAGE_QUEUES_REMOVE
5241                  */
5242                 assert(!m->pageout_queue);
5243
5244                 VM_PAGE_QUEUES_REMOVE(m);
5245
5246                 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5247                         pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5248
5249                 vm_pageout_cluster(m, FALSE);
5250
5251                 hibernate_stats.hibernate_found_dirty++;
5252
5253                 goto next_pg;
5254
5255 reenter_pg_on_q:
5256                 queue_remove(q, m, vm_page_t, pageq);
5257                 queue_enter(q, m, vm_page_t, pageq);
5258
5259                 hibernate_stats.hibernate_reentered_on_q++;
5260 next_pg:
5261                 hibernate_stats.hibernate_considered++;
5262
5263                 qcount--;
5264                 try_failed_count = 0;
5265         }
5266         if (l_object != NULL) {
5267                 vm_object_unlock(l_object);
5268                 l_object = NULL;
5269         }
5270
5271         vm_page_unlock_queues();
5272
5273         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5274
5275         return (retval);
5276 }
5277
5278
5279 static int
5280 hibernate_flush_dirty_pages(int pass)
5281 {
5282         struct vm_speculative_age_q     *aq;
5283         uint32_t        i;
5284
5285         if (vm_page_local_q) {
5286                 for (i = 0; i < vm_page_local_q_count; i++)
5287                         vm_page_reactivate_local(i, TRUE, FALSE);
5288         }
5289
5290         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5291                 int             qcount;
5292                 vm_page_t       m;
5293
5294                 aq = &vm_page_queue_speculative[i];
5295
5296                 if (queue_empty(&aq->age_q))
5297                         continue;
5298                 qcount = 0;
5299
5300                 vm_page_lockspin_queues();
5301
5302                 queue_iterate(&aq->age_q,
5303                               m,
5304                               vm_page_t,
5305                               pageq)
5306                 {
5307                         qcount++;
5308                 }
5309                 vm_page_unlock_queues();
5310
5311                 if (qcount) {
5312                         if (hibernate_flush_queue(&aq->age_q, qcount))
5313                                 return (1);
5314                 }
5315         }
5316         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5317                 return (1);
5318         if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5319                 return (1);
5320         if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5321                 return (1);
5322         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5323                 return (1);
5324
5325         if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5326                 vm_compressor_record_warmup_start();
5327
5328         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5329                 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5330                         vm_compressor_record_warmup_end();
5331                 return (1);
5332         }
5333         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5334                 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5335                         vm_compressor_record_warmup_end();
5336                 return (1);
5337         }
5338         if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5339                 vm_compressor_record_warmup_end();
5340
5341         if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5342                 return (1);
5343
5344         return (0);
5345 }
5346
5347
5348 void
5349 hibernate_reset_stats()
5350 {
5351         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5352 }
5353
5354
5355 int
5356 hibernate_flush_memory()
5357 {
5358         int     retval;
5359
5360         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5361
5362         hibernate_cleaning_in_progress = TRUE;
5363         hibernate_skip_external = FALSE;
5364
5365         if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5366
5367                 if (COMPRESSED_PAGER_IS_ACTIVE) {
5368
5369                                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5370
5371                                 vm_compressor_flush();
5372
5373                                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5374                 }
5375                 if (consider_buffer_cache_collect != NULL) {
5376                         unsigned int orig_wire_count;
5377
5378                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5379                         orig_wire_count = vm_page_wire_count;
5380
5381                         (void)(*consider_buffer_cache_collect)(1);
5382                         consider_zone_gc(TRUE);
5383
5384                         HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5385
5386                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5387                 }
5388         }
5389         hibernate_cleaning_in_progress = FALSE;
5390
5391         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5392
5393         if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5394                 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5395
5396
5397     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5398                 hibernate_stats.hibernate_considered,
5399                 hibernate_stats.hibernate_reentered_on_q,
5400                 hibernate_stats.hibernate_found_dirty);
5401     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5402                 hibernate_stats.hibernate_skipped_cleaning,
5403                 hibernate_stats.hibernate_skipped_transient,
5404                 hibernate_stats.hibernate_skipped_precious,
5405                 hibernate_stats.hibernate_skipped_external,
5406                 hibernate_stats.hibernate_queue_nolock);
5407     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5408                 hibernate_stats.hibernate_queue_paused,
5409                 hibernate_stats.hibernate_throttled,
5410                 hibernate_stats.hibernate_throttle_timeout,
5411                 hibernate_stats.hibernate_drained,
5412                 hibernate_stats.hibernate_drain_timeout);
5413
5414         return (retval);
5415 }
5416
5417
5418 static void
5419 hibernate_page_list_zero(hibernate_page_list_t *list)
5420 {
5421     uint32_t             bank;
5422     hibernate_bitmap_t * bitmap;
5423
5424     bitmap = &list->bank_bitmap[0];
5425     for (bank = 0; bank < list->bank_count; bank++)
5426     {
5427         uint32_t last_bit;
5428
5429         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5430         // set out-of-bound bits at end of bitmap.
5431         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5432         if (last_bit)
5433             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5434
5435         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5436     }
5437 }
5438
5439 void
5440 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5441 {
5442     uint32_t i;
5443     vm_page_t m;
5444     uint64_t start, end, timeout, nsec;
5445     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5446     clock_get_uptime(&start);
5447
5448     for (i = 0; i < gobble_count; i++)
5449     {
5450         while (VM_PAGE_NULL == (m = vm_page_grab()))
5451         {
5452             clock_get_uptime(&end);
5453             if (end >= timeout)
5454                 break;
5455             VM_PAGE_WAIT();
5456         }
5457         if (!m)
5458             break;
5459         m->busy = FALSE;
5460         vm_page_gobble(m);
5461
5462         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5463         hibernate_gobble_queue = m;
5464     }
5465
5466     clock_get_uptime(&end);
5467     absolutetime_to_nanoseconds(end - start, &nsec);
5468     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5469 }
5470
5471 void
5472 hibernate_free_gobble_pages(void)
5473 {
5474     vm_page_t m, next;
5475     uint32_t  count = 0;
5476
5477     m = (vm_page_t) hibernate_gobble_queue;
5478     while(m)
5479     {
5480         next = (vm_page_t) m->pageq.next;
5481         vm_page_free(m);
5482         count++;
5483         m = next;
5484     }
5485     hibernate_gobble_queue = VM_PAGE_NULL;
5486
5487     if (count)
5488         HIBLOG("Freed %d pages\n", count);
5489 }
5490
5491 static boolean_t
5492 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5493 {
5494     vm_object_t object = NULL;
5495     int                  refmod_state;
5496     boolean_t            discard = FALSE;
5497
5498     do
5499     {
5500         if (m->private)
5501             panic("hibernate_consider_discard: private");
5502
5503         if (!vm_object_lock_try(m->object)) {
5504             if (!preflight) hibernate_stats.cd_lock_failed++;
5505             break;
5506         }
5507         object = m->object;
5508
5509         if (VM_PAGE_WIRED(m)) {
5510             if (!preflight) hibernate_stats.cd_found_wired++;
5511             break;
5512         }
5513         if (m->precious) {
5514             if (!preflight) hibernate_stats.cd_found_precious++;
5515             break;
5516         }
5517         if (m->busy || !object->alive) {
5518            /*
5519             *   Somebody is playing with this page.
5520             */
5521             if (!preflight) hibernate_stats.cd_found_busy++;
5522             break;
5523         }
5524         if (m->absent || m->unusual || m->error) {
5525            /*
5526             * If it's unusual in anyway, ignore it
5527             */
5528             if (!preflight) hibernate_stats.cd_found_unusual++;
5529             break;
5530         }
5531         if (m->cleaning) {
5532             if (!preflight) hibernate_stats.cd_found_cleaning++;
5533             break;
5534         }
5535         if (m->laundry) {
5536             if (!preflight) hibernate_stats.cd_found_laundry++;
5537             break;
5538         }
5539         if (!m->dirty)
5540         {
5541             refmod_state = pmap_get_refmod(m->phys_page);
5542
5543             if (refmod_state & VM_MEM_REFERENCED)
5544                 m->reference = TRUE;
5545             if (refmod_state & VM_MEM_MODIFIED) {
5546                 SET_PAGE_DIRTY(m, FALSE);
5547             }
5548         }
5549
5550         /*
5551          * If it's clean or purgeable we can discard the page on wakeup.
5552          */
5553         discard = (!m->dirty)
5554                     || (VM_PURGABLE_VOLATILE == object->purgable)
5555                     || (VM_PURGABLE_EMPTY    == object->purgable);
5556
5557
5558         if (discard == FALSE) {
5559                 if (!preflight)
5560                         hibernate_stats.cd_found_dirty++;
5561         } else if (m->xpmapped && m->reference && !object->internal) {
5562                 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5563                         if (!preflight)
5564                                 hibernate_stats.cd_found_xpmapped++;
5565                         discard = FALSE;
5566                 } else {
5567                         if (!preflight)
5568                                 hibernate_stats.cd_skipped_xpmapped++;
5569                 }
5570         }
5571     }
5572     while (FALSE);
5573
5574     if (object)
5575         vm_object_unlock(object);
5576
5577     return (discard);
5578 }
5579
5580
5581 static void
5582 hibernate_discard_page(vm_page_t m)
5583 {
5584     if (m->absent || m->unusual || m->error)
5585        /*
5586         * If it's unusual in anyway, ignore
5587         */
5588         return;
5589
5590 #if MACH_ASSERT || DEBUG
5591     vm_object_t object = m->object;
5592     if (!vm_object_lock_try(m->object))
5593         panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5594 #else
5595     /* No need to lock page queue for token delete, hibernate_vm_unlock()
5596        makes sure these locks are uncontended before sleep */
5597 #endif /* MACH_ASSERT || DEBUG */
5598
5599     if (m->pmapped == TRUE)
5600     {
5601         __unused int refmod_state = pmap_disconnect(m->phys_page);
5602     }
5603
5604     if (m->laundry)
5605         panic("hibernate_discard_page(%p) laundry", m);
5606     if (m->private)
5607         panic("hibernate_discard_page(%p) private", m);
5608     if (m->fictitious)
5609         panic("hibernate_discard_page(%p) fictitious", m);
5610
5611     if (VM_PURGABLE_VOLATILE == m->object->purgable)
5612     {
5613         /* object should be on a queue */
5614         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5615         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5616         assert(old_queue);
5617         if (m->object->purgeable_when_ripe) {
5618                 vm_purgeable_token_delete_first(old_queue);
5619         }
5620         m->object->purgable = VM_PURGABLE_EMPTY;
5621
5622         /*
5623          * Purgeable ledgers:  pages of VOLATILE and EMPTY objects are
5624          * accounted in the "volatile" ledger, so no change here.
5625          * We have to update vm_page_purgeable_count, though, since we're
5626          * effectively purging this object.
5627          */
5628         unsigned int delta;
5629         assert(m->object->resident_page_count >= m->object->wired_page_count);
5630         delta = (m->object->resident_page_count - m->object->wired_page_count);
5631         assert(vm_page_purgeable_count >= delta);
5632         assert(delta > 0);
5633         OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5634     }
5635
5636     vm_page_free(m);
5637
5638 #if MACH_ASSERT || DEBUG
5639     vm_object_unlock(object);
5640 #endif  /* MACH_ASSERT || DEBUG */
5641 }
5642
5643 /*
5644  Grab locks for hibernate_page_list_setall()
5645 */
5646 void
5647 hibernate_vm_lock_queues(void)
5648 {
5649     vm_object_lock(compressor_object);
5650     vm_page_lock_queues();
5651     lck_mtx_lock(&vm_page_queue_free_lock);
5652
5653     if (vm_page_local_q) {
5654         uint32_t  i;
5655         for (i = 0; i < vm_page_local_q_count; i++) {
5656             struct vpl  *lq;
5657             lq = &vm_page_local_q[i].vpl_un.vpl;
5658             VPL_LOCK(&lq->vpl_lock);
5659         }
5660     }
5661 }
5662
5663 void
5664 hibernate_vm_unlock_queues(void)
5665 {
5666     if (vm_page_local_q) {
5667         uint32_t  i;
5668         for (i = 0; i < vm_page_local_q_count; i++) {
5669             struct vpl  *lq;
5670             lq = &vm_page_local_q[i].vpl_un.vpl;
5671             VPL_UNLOCK(&lq->vpl_lock);
5672         }
5673     }
5674     lck_mtx_unlock(&vm_page_queue_free_lock);
5675     vm_page_unlock_queues();
5676     vm_object_unlock(compressor_object);
5677 }
5678
5679 /*
5680  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5681  pages known to VM to not need saving are subtracted.
5682  Wired pages to be saved are present in page_list_wired, pageable in page_list.
5683 */
5684
5685 void
5686 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5687                            hibernate_page_list_t * page_list_wired,
5688                            hibernate_page_list_t * page_list_pal,
5689                            boolean_t preflight,
5690                            boolean_t will_discard,
5691                            uint32_t * pagesOut)
5692 {
5693     uint64_t start, end, nsec;
5694     vm_page_t m;
5695     vm_page_t next;
5696     uint32_t pages = page_list->page_count;
5697     uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5698     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5699     uint32_t count_wire = pages;
5700     uint32_t count_discard_active    = 0;
5701     uint32_t count_discard_inactive  = 0;
5702     uint32_t count_discard_cleaned   = 0;
5703     uint32_t count_discard_purgeable = 0;
5704     uint32_t count_discard_speculative = 0;
5705     uint32_t count_discard_vm_struct_pages = 0;
5706     uint32_t i;
5707     uint32_t             bank;
5708     hibernate_bitmap_t * bitmap;
5709     hibernate_bitmap_t * bitmap_wired;
5710     boolean_t                    discard_all;
5711     boolean_t            discard;
5712
5713     HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5714
5715     if (preflight) {
5716         page_list       = NULL;
5717         page_list_wired = NULL;
5718         page_list_pal   = NULL;
5719                 discard_all     = FALSE;
5720     } else {
5721                 discard_all     = will_discard;
5722     }
5723
5724 #if MACH_ASSERT || DEBUG
5725     if (!preflight)
5726     {
5727         vm_page_lock_queues();
5728         if (vm_page_local_q) {
5729             for (i = 0; i < vm_page_local_q_count; i++) {
5730                 struct vpl      *lq;
5731                 lq = &vm_page_local_q[i].vpl_un.vpl;
5732                 VPL_LOCK(&lq->vpl_lock);
5733             }
5734         }
5735     }
5736 #endif  /* MACH_ASSERT || DEBUG */
5737
5738
5739     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5740
5741     clock_get_uptime(&start);
5742
5743     if (!preflight) {
5744         hibernate_page_list_zero(page_list);
5745         hibernate_page_list_zero(page_list_wired);
5746         hibernate_page_list_zero(page_list_pal);
5747
5748         hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5749         hibernate_stats.cd_pages = pages;
5750     }
5751
5752     if (vm_page_local_q) {
5753             for (i = 0; i < vm_page_local_q_count; i++)
5754                     vm_page_reactivate_local(i, TRUE, !preflight);
5755     }
5756
5757     if (preflight) {
5758         vm_object_lock(compressor_object);
5759         vm_page_lock_queues();
5760         lck_mtx_lock(&vm_page_queue_free_lock);
5761     }
5762
5763     m = (vm_page_t) hibernate_gobble_queue;
5764     while (m)
5765     {
5766         pages--;
5767         count_wire--;
5768         if (!preflight) {
5769             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5770             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5771         }
5772         m = (vm_page_t) m->pageq.next;
5773     }
5774
5775     if (!preflight) for( i = 0; i < real_ncpus; i++ )
5776     {
5777         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5778         {
5779             for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5780             {
5781                 pages--;
5782                 count_wire--;
5783                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5784                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5785
5786                 hibernate_stats.cd_local_free++;
5787                 hibernate_stats.cd_total_free++;
5788             }
5789         }
5790     }
5791
5792     for( i = 0; i < vm_colors; i++ )
5793     {
5794         queue_iterate(&vm_page_queue_free[i],
5795                       m,
5796                       vm_page_t,
5797                       pageq)
5798         {
5799             pages--;
5800             count_wire--;
5801             if (!preflight) {
5802                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5803                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5804
5805                 hibernate_stats.cd_total_free++;
5806             }
5807         }
5808     }
5809
5810     queue_iterate(&vm_lopage_queue_free,
5811                   m,
5812                   vm_page_t,
5813                   pageq)
5814     {
5815         pages--;
5816         count_wire--;
5817         if (!preflight) {
5818             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5819             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5820
5821             hibernate_stats.cd_total_free++;
5822         }
5823     }
5824
5825     m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5826     while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5827     {
5828         next = (vm_page_t) m->pageq.next;
5829         discard = FALSE;
5830         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5831          && hibernate_consider_discard(m, preflight))
5832         {
5833             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5834             count_discard_inactive++;
5835             discard = discard_all;
5836         }
5837         else
5838             count_throttled++;
5839         count_wire--;
5840         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5841
5842         if (discard) hibernate_discard_page(m);
5843         m = next;
5844     }
5845
5846     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5847     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5848     {
5849         next = (vm_page_t) m->pageq.next;
5850         discard = FALSE;
5851         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5852          && hibernate_consider_discard(m, preflight))
5853         {
5854             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5855             if (m->dirty)
5856                 count_discard_purgeable++;
5857             else
5858                 count_discard_inactive++;
5859             discard = discard_all;
5860         }
5861         else
5862             count_anonymous++;
5863         count_wire--;
5864         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5865         if (discard)    hibernate_discard_page(m);
5866         m = next;
5867     }
5868
5869     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5870     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5871     {
5872         next = (vm_page_t) m->pageq.next;
5873         discard = FALSE;
5874         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5875          && hibernate_consider_discard(m, preflight))
5876         {
5877             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5878             if (m->dirty)
5879                 count_discard_purgeable++;
5880             else
5881                 count_discard_cleaned++;
5882             discard = discard_all;
5883         }
5884         else
5885             count_cleaned++;
5886         count_wire--;
5887         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5888         if (discard)    hibernate_discard_page(m);
5889         m = next;
5890     }
5891
5892     m = (vm_page_t) queue_first(&vm_page_queue_active);
5893     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5894     {
5895         next = (vm_page_t) m->pageq.next;
5896         discard = FALSE;
5897         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5898          && hibernate_consider_discard(m, preflight))
5899         {
5900             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5901             if (m->dirty)
5902                 count_discard_purgeable++;
5903             else
5904                 count_discard_active++;
5905             discard = discard_all;
5906         }
5907         else
5908             count_active++;
5909         count_wire--;
5910         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5911         if (discard)    hibernate_discard_page(m);
5912         m = next;
5913     }
5914
5915     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5916     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5917     {
5918         next = (vm_page_t) m->pageq.next;
5919         discard = FALSE;
5920         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5921          && hibernate_consider_discard(m, preflight))
5922         {
5923             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5924             if (m->dirty)
5925                 count_discard_purgeable++;
5926             else
5927                 count_discard_inactive++;
5928             discard = discard_all;
5929         }
5930         else
5931             count_inactive++;
5932         count_wire--;
5933         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5934         if (discard)    hibernate_discard_page(m);
5935         m = next;
5936     }
5937
5938     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5939     {
5940         m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5941         while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5942         {
5943             next = (vm_page_t) m->pageq.next;
5944             discard = FALSE;
5945             if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5946              && hibernate_consider_discard(m, preflight))
5947             {
5948                 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5949                 count_discard_speculative++;
5950                 discard = discard_all;
5951             }
5952             else
5953                 count_speculative++;
5954             count_wire--;
5955             if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5956             if (discard)    hibernate_discard_page(m);
5957             m = next;
5958         }
5959     }
5960
5961     queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5962     {
5963         count_compressor++;
5964         count_wire--;
5965         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5966     }
5967
5968     if (preflight == FALSE && discard_all == TRUE) {
5969             KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5970
5971             HIBLOG("hibernate_teardown started\n");
5972             count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5973             HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5974
5975             pages -= count_discard_vm_struct_pages;
5976             count_wire -= count_discard_vm_struct_pages;
5977
5978             hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5979
5980             KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5981     }
5982
5983     if (!preflight) {
5984         // pull wired from hibernate_bitmap
5985         bitmap = &page_list->bank_bitmap[0];
5986         bitmap_wired = &page_list_wired->bank_bitmap[0];
5987         for (bank = 0; bank < page_list->bank_count; bank++)
5988         {
5989             for (i = 0; i < bitmap->bitmapwords; i++)
5990                 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5991             bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5992             bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5993         }
5994     }
5995
5996     // machine dependent adjustments
5997     hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
5998
5999     if (!preflight) {
6000         hibernate_stats.cd_count_wire = count_wire;
6001         hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6002                 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6003     }
6004
6005     clock_get_uptime(&end);
6006     absolutetime_to_nanoseconds(end - start, &nsec);
6007     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6008
6009     HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n  %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6010            pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6011                 discard_all ? "did" : "could",
6012                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6013
6014     if (hibernate_stats.cd_skipped_xpmapped)
6015             HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6016
6017     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6018
6019     if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6020
6021 #if MACH_ASSERT || DEBUG
6022     if (!preflight)
6023     {
6024         if (vm_page_local_q) {
6025             for (i = 0; i < vm_page_local_q_count; i++) {
6026                 struct vpl      *lq;
6027                 lq = &vm_page_local_q[i].vpl_un.vpl;
6028                 VPL_UNLOCK(&lq->vpl_lock);
6029             }
6030         }
6031         vm_page_unlock_queues();
6032     }
6033 #endif  /* MACH_ASSERT || DEBUG */
6034
6035     if (preflight) {
6036         lck_mtx_unlock(&vm_page_queue_free_lock);
6037         vm_page_unlock_queues();
6038         vm_object_unlock(compressor_object);
6039     }
6040
6041     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6042 }
6043
6044 void
6045 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6046 {
6047     uint64_t  start, end, nsec;
6048     vm_page_t m;
6049     vm_page_t next;
6050     uint32_t  i;
6051     uint32_t  count_discard_active    = 0;
6052     uint32_t  count_discard_inactive  = 0;
6053     uint32_t  count_discard_purgeable = 0;
6054     uint32_t  count_discard_cleaned   = 0;
6055     uint32_t  count_discard_speculative = 0;
6056
6057
6058 #if MACH_ASSERT || DEBUG
6059         vm_page_lock_queues();
6060         if (vm_page_local_q) {
6061             for (i = 0; i < vm_page_local_q_count; i++) {
6062                 struct vpl      *lq;
6063                 lq = &vm_page_local_q[i].vpl_un.vpl;
6064                 VPL_LOCK(&lq->vpl_lock);
6065             }
6066         }
6067 #endif  /* MACH_ASSERT || DEBUG */
6068
6069     clock_get_uptime(&start);
6070
6071     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6072     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6073     {
6074         next = (vm_page_t) m->pageq.next;
6075         if (hibernate_page_bittst(page_list, m->phys_page))
6076         {
6077             if (m->dirty)
6078                 count_discard_purgeable++;
6079             else
6080                 count_discard_inactive++;
6081             hibernate_discard_page(m);
6082         }
6083         m = next;
6084     }
6085
6086     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6087     {
6088        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6089        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6090        {
6091            next = (vm_page_t) m->pageq.next;
6092            if (hibernate_page_bittst(page_list, m->phys_page))
6093            {
6094                count_discard_speculative++;
6095                hibernate_discard_page(m);
6096            }
6097            m = next;
6098        }
6099     }
6100
6101     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6102     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6103     {
6104         next = (vm_page_t) m->pageq.next;
6105         if (hibernate_page_bittst(page_list, m->phys_page))
6106         {
6107             if (m->dirty)
6108                 count_discard_purgeable++;
6109             else
6110                 count_discard_inactive++;
6111             hibernate_discard_page(m);
6112         }
6113         m = next;
6114     }
6115
6116     m = (vm_page_t) queue_first(&vm_page_queue_active);
6117     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6118     {
6119         next = (vm_page_t) m->pageq.next;
6120         if (hibernate_page_bittst(page_list, m->phys_page))
6121         {
6122             if (m->dirty)
6123                 count_discard_purgeable++;
6124             else
6125                 count_discard_active++;
6126             hibernate_discard_page(m);
6127         }
6128         m = next;
6129     }
6130
6131     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6132     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6133     {
6134         next = (vm_page_t) m->pageq.next;
6135         if (hibernate_page_bittst(page_list, m->phys_page))
6136         {
6137             if (m->dirty)
6138                 count_discard_purgeable++;
6139             else
6140                 count_discard_cleaned++;
6141             hibernate_discard_page(m);
6142         }
6143         m = next;
6144     }
6145
6146 #if MACH_ASSERT || DEBUG
6147         if (vm_page_local_q) {
6148             for (i = 0; i < vm_page_local_q_count; i++) {
6149                 struct vpl      *lq;
6150                 lq = &vm_page_local_q[i].vpl_un.vpl;
6151                 VPL_UNLOCK(&lq->vpl_lock);
6152             }
6153         }
6154         vm_page_unlock_queues();
6155 #endif  /* MACH_ASSERT || DEBUG */
6156
6157     clock_get_uptime(&end);
6158     absolutetime_to_nanoseconds(end - start, &nsec);
6159     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6160                 nsec / 1000000ULL,
6161                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6162 }
6163
6164 boolean_t       hibernate_paddr_map_inited = FALSE;
6165 boolean_t       hibernate_rebuild_needed = FALSE;
6166 unsigned int    hibernate_teardown_last_valid_compact_indx = -1;
6167 vm_page_t       hibernate_rebuild_hash_list = NULL;
6168
6169 unsigned int    hibernate_teardown_found_tabled_pages = 0;
6170 unsigned int    hibernate_teardown_found_created_pages = 0;
6171 unsigned int    hibernate_teardown_found_free_pages = 0;
6172 unsigned int    hibernate_teardown_vm_page_free_count;
6173
6174
6175 struct ppnum_mapping {
6176         struct ppnum_mapping    *ppnm_next;
6177         ppnum_t                 ppnm_base_paddr;
6178         unsigned int            ppnm_sindx;
6179         unsigned int            ppnm_eindx;
6180 };
6181
6182 struct ppnum_mapping    *ppnm_head;
6183 struct ppnum_mapping    *ppnm_last_found = NULL;
6184
6185
6186 void
6187 hibernate_create_paddr_map()
6188 {
6189         unsigned int    i;
6190         ppnum_t         next_ppnum_in_run = 0;
6191         struct ppnum_mapping *ppnm = NULL;
6192
6193         if (hibernate_paddr_map_inited == FALSE) {
6194
6195                 for (i = 0; i < vm_pages_count; i++) {
6196
6197                         if (ppnm)
6198                                 ppnm->ppnm_eindx = i;
6199
6200                         if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6201
6202                                 ppnm = kalloc(sizeof(struct ppnum_mapping));
6203
6204                                 ppnm->ppnm_next = ppnm_head;
6205                                 ppnm_head = ppnm;
6206
6207                                 ppnm->ppnm_sindx = i;
6208                                 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6209                         }
6210                         next_ppnum_in_run = vm_pages[i].phys_page + 1;
6211                 }
6212                 ppnm->ppnm_eindx++;
6213
6214                 hibernate_paddr_map_inited = TRUE;
6215         }
6216 }
6217
6218 ppnum_t
6219 hibernate_lookup_paddr(unsigned int indx)
6220 {
6221         struct ppnum_mapping *ppnm = NULL;
6222
6223         ppnm = ppnm_last_found;
6224
6225         if (ppnm) {
6226                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6227                         goto done;
6228         }
6229         for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6230
6231                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6232                         ppnm_last_found = ppnm;
6233                         break;
6234                 }
6235         }
6236         if (ppnm == NULL)
6237                 panic("hibernate_lookup_paddr of %d failed\n", indx);
6238 done:
6239         return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6240 }
6241
6242
6243 uint32_t
6244 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6245 {
6246         addr64_t        saddr_aligned;
6247         addr64_t        eaddr_aligned;
6248         addr64_t        addr;
6249         ppnum_t         paddr;
6250         unsigned int    mark_as_unneeded_pages = 0;
6251
6252         saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6253         eaddr_aligned = eaddr & ~PAGE_MASK_64;
6254
6255         for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6256
6257                 paddr = pmap_find_phys(kernel_pmap, addr);
6258
6259                 assert(paddr);
6260
6261                 hibernate_page_bitset(page_list,       TRUE, paddr);
6262                 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6263
6264                 mark_as_unneeded_pages++;
6265         }
6266         return (mark_as_unneeded_pages);
6267 }
6268
6269
6270 void
6271 hibernate_hash_insert_page(vm_page_t mem)
6272 {
6273         vm_page_bucket_t *bucket;
6274         int             hash_id;
6275
6276         assert(mem->hashed);
6277         assert(mem->object);
6278         assert(mem->offset != (vm_object_offset_t) -1);
6279
6280         /*
6281          *      Insert it into the object_object/offset hash table
6282          */
6283         hash_id = vm_page_hash(mem->object, mem->offset);
6284         bucket = &vm_page_buckets[hash_id];
6285
6286         mem->next_m = bucket->page_list;
6287         bucket->page_list = VM_PAGE_PACK_PTR(mem);
6288 }
6289
6290
6291 void
6292 hibernate_free_range(int sindx, int eindx)
6293 {
6294         vm_page_t       mem;
6295         unsigned int    color;
6296
6297         while (sindx < eindx) {
6298                 mem = &vm_pages[sindx];
6299
6300                 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6301
6302                 mem->lopage = FALSE;
6303                 mem->free = TRUE;
6304
6305                 color = mem->phys_page & vm_color_mask;
6306                 queue_enter_first(&vm_page_queue_free[color],
6307                                   mem,
6308                                   vm_page_t,
6309                                   pageq);
6310                 vm_page_free_count++;
6311
6312                 sindx++;
6313         }
6314 }
6315
6316
6317 extern void hibernate_rebuild_pmap_structs(void);
6318
6319 void
6320 hibernate_rebuild_vm_structs(void)
6321 {
6322         int             cindx, sindx, eindx;
6323         vm_page_t       mem, tmem, mem_next;
6324         AbsoluteTime    startTime, endTime;
6325         uint64_t        nsec;
6326
6327         if (hibernate_rebuild_needed == FALSE)
6328                 return;
6329
6330         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6331         HIBLOG("hibernate_rebuild started\n");
6332
6333         clock_get_uptime(&startTime);
6334
6335         hibernate_rebuild_pmap_structs();
6336
6337         bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6338         eindx = vm_pages_count;
6339
6340         for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6341
6342                 mem = &vm_pages[cindx];
6343                 /*
6344                  * hibernate_teardown_vm_structs leaves the location where
6345                  * this vm_page_t must be located in "next".
6346                  */
6347                 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6348                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6349
6350                 sindx = (int)(tmem - &vm_pages[0]);
6351
6352                 if (mem != tmem) {
6353                         /*
6354                          * this vm_page_t was moved by hibernate_teardown_vm_structs,
6355                          * so move it back to its real location
6356                          */
6357                         *tmem = *mem;
6358                         mem = tmem;
6359                 }
6360                 if (mem->hashed)
6361                         hibernate_hash_insert_page(mem);
6362                 /*
6363                  * the 'hole' between this vm_page_t and the previous
6364                  * vm_page_t we moved needs to be initialized as
6365                  * a range of free vm_page_t's
6366                  */
6367                 hibernate_free_range(sindx + 1, eindx);
6368
6369                 eindx = sindx;
6370         }
6371         if (sindx)
6372                 hibernate_free_range(0, sindx);
6373
6374         assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6375
6376         /*
6377          * process the list of vm_page_t's that were entered in the hash,
6378          * but were not located in the vm_pages arrary... these are
6379          * vm_page_t's that were created on the fly (i.e. fictitious)
6380          */
6381         for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6382                 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6383
6384                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6385                 hibernate_hash_insert_page(mem);
6386         }
6387         hibernate_rebuild_hash_list = NULL;
6388
6389         clock_get_uptime(&endTime);
6390         SUB_ABSOLUTETIME(&endTime, &startTime);
6391         absolutetime_to_nanoseconds(endTime, &nsec);
6392
6393         HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6394
6395         hibernate_rebuild_needed = FALSE;
6396
6397         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6398 }
6399
6400
6401 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6402
6403 uint32_t
6404 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6405 {
6406         unsigned int    i;
6407         unsigned int    compact_target_indx;
6408         vm_page_t       mem, mem_next;
6409         vm_page_bucket_t *bucket;
6410         unsigned int    mark_as_unneeded_pages = 0;
6411         unsigned int    unneeded_vm_page_bucket_pages = 0;
6412         unsigned int    unneeded_vm_pages_pages = 0;
6413         unsigned int    unneeded_pmap_pages = 0;
6414         addr64_t        start_of_unneeded = 0;
6415         addr64_t        end_of_unneeded = 0;
6416
6417
6418         if (hibernate_should_abort())
6419                 return (0);
6420
6421         HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6422                vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6423                vm_page_cleaned_count, compressor_object->resident_page_count);
6424
6425         for (i = 0; i < vm_page_bucket_count; i++) {
6426
6427                 bucket = &vm_page_buckets[i];
6428
6429                 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6430                         assert(mem->hashed);
6431
6432                         mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6433
6434                         if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6435                                 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6436                                 hibernate_rebuild_hash_list = mem;
6437                         }
6438                 }
6439         }
6440         unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6441         mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6442
6443         hibernate_teardown_vm_page_free_count = vm_page_free_count;
6444
6445         compact_target_indx = 0;
6446
6447         for (i = 0; i < vm_pages_count; i++) {
6448
6449                 mem = &vm_pages[i];
6450
6451                 if (mem->free) {
6452                         unsigned int color;
6453
6454                         assert(mem->busy);
6455                         assert(!mem->lopage);
6456
6457                         color = mem->phys_page & vm_color_mask;
6458
6459                         queue_remove(&vm_page_queue_free[color],
6460                                      mem,
6461                                      vm_page_t,
6462                                      pageq);
6463                         mem->pageq.next = NULL;
6464                         mem->pageq.prev = NULL;
6465
6466                         vm_page_free_count--;
6467
6468                         hibernate_teardown_found_free_pages++;
6469
6470                         if ( !vm_pages[compact_target_indx].free)
6471                                 compact_target_indx = i;
6472                 } else {
6473                         /*
6474                          * record this vm_page_t's original location
6475                          * we need this even if it doesn't get moved
6476                          * as an indicator to the rebuild function that
6477                          * we don't have to move it
6478                          */
6479                         mem->next_m = VM_PAGE_PACK_PTR(mem);
6480
6481                         if (vm_pages[compact_target_indx].free) {
6482                                 /*
6483                                  * we've got a hole to fill, so
6484                                  * move this vm_page_t to it's new home
6485                                  */
6486                                 vm_pages[compact_target_indx] = *mem;
6487                                 mem->free = TRUE;
6488
6489                                 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6490                                 compact_target_indx++;
6491                         } else
6492                                 hibernate_teardown_last_valid_compact_indx = i;
6493                 }
6494         }
6495         unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6496                                                              (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6497         mark_as_unneeded_pages += unneeded_vm_pages_pages;
6498
6499         hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6500
6501         if (start_of_unneeded) {
6502                 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6503                 mark_as_unneeded_pages += unneeded_pmap_pages;
6504         }
6505         HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6506
6507         hibernate_rebuild_needed = TRUE;
6508
6509         return (mark_as_unneeded_pages);
6510 }
6511
6512
6513 #endif /* HIBERNATION */
6514
6515 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6516
6517 #include <mach_vm_debug.h>
6518 #if     MACH_VM_DEBUG
6519
6520 #include <mach_debug/hash_info.h>
6521 #include <vm/vm_debug.h>
6522
6523 /*
6524  *      Routine:        vm_page_info
6525  *      Purpose:
6526  *              Return information about the global VP table.
6527  *              Fills the buffer with as much information as possible
6528  *              and returns the desired size of the buffer.
6529  *      Conditions:
6530  *              Nothing locked.  The caller should provide
6531  *              possibly-pageable memory.
6532  */
6533
6534 unsigned int
6535 vm_page_info(
6536         hash_info_bucket_t *info,
6537         unsigned int count)
6538 {
6539         unsigned int i;
6540         lck_spin_t      *bucket_lock;
6541
6542         if (vm_page_bucket_count < count)
6543                 count = vm_page_bucket_count;
6544
6545         for (i = 0; i < count; i++) {
6546                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6547                 unsigned int bucket_count = 0;
6548                 vm_page_t m;
6549
6550                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6551                 lck_spin_lock(bucket_lock);
6552
6553                 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6554                         bucket_count++;
6555
6556                 lck_spin_unlock(bucket_lock);
6557
6558                 /* don't touch pageable memory while holding locks */
6559                 info[i].hib_count = bucket_count;
6560         }
6561
6562         return vm_page_bucket_count;
6563 }
6564 #endif  /* MACH_VM_DEBUG */
6565
6566 #if VM_PAGE_BUCKETS_CHECK
6567 void
6568 vm_page_buckets_check(void)
6569 {
6570         unsigned int i;
6571         vm_page_t p;
6572         unsigned int p_hash;
6573         vm_page_bucket_t *bucket;
6574         lck_spin_t      *bucket_lock;
6575
6576         if (!vm_page_buckets_check_ready) {
6577                 return;
6578         }
6579
6580 #if HIBERNATION
6581         if (hibernate_rebuild_needed ||
6582             hibernate_rebuild_hash_list) {
6583                 panic("BUCKET_CHECK: hibernation in progress: "
6584                       "rebuild_needed=%d rebuild_hash_list=%p\n",
6585                       hibernate_rebuild_needed,
6586                       hibernate_rebuild_hash_list);
6587         }
6588 #endif /* HIBERNATION */
6589
6590 #if VM_PAGE_FAKE_BUCKETS
6591         char *cp;
6592         for (cp = (char *) vm_page_fake_buckets_start;
6593              cp < (char *) vm_page_fake_buckets_end;
6594              cp++) {
6595                 if (*cp != 0x5a) {
6596                         panic("BUCKET_CHECK: corruption at %p in fake buckets "
6597                               "[0x%llx:0x%llx]\n",
6598                               cp,
6599                               (uint64_t) vm_page_fake_buckets_start,
6600                               (uint64_t) vm_page_fake_buckets_end);
6601                 }
6602         }
6603 #endif /* VM_PAGE_FAKE_BUCKETS */
6604
6605         for (i = 0; i < vm_page_bucket_count; i++) {
6606                 bucket = &vm_page_buckets[i];
6607                 if (!bucket->page_list) {
6608                         continue;
6609                 }
6610
6611                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6612                 lck_spin_lock(bucket_lock);
6613                 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6614                 while (p != VM_PAGE_NULL) {
6615                         if (!p->hashed) {
6616                                 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6617                                       "hash %d in bucket %d at %p "
6618                                       "is not hashed\n",
6619                                       p, p->object, p->offset,
6620                                       p_hash, i, bucket);
6621                         }
6622                         p_hash = vm_page_hash(p->object, p->offset);
6623                         if (p_hash != i) {
6624                                 panic("BUCKET_CHECK: corruption in bucket %d "
6625                                       "at %p: page %p object %p offset 0x%llx "
6626                                       "hash %d\n",
6627                                       i, bucket, p, p->object, p->offset,
6628                                       p_hash);
6629                         }
6630                         p = VM_PAGE_UNPACK_PTR(p->next_m);
6631                 }
6632                 lck_spin_unlock(bucket_lock);
6633         }
6634
6635 //      printf("BUCKET_CHECK: checked buckets\n");
6636 }
6637 #endif /* VM_PAGE_BUCKETS_CHECK */