osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <kern/ledger.h>
  80 #include <vm/pmap.h>
  81 #include <vm/vm_init.h>
  82 #include <vm/vm_map.h>
  83 #include <vm/vm_page.h>
  84 #include <vm/vm_pageout.h>
  85 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  86 #include <kern/misc_protos.h>
  87 #include <zone_debug.h>
  88 #include <vm/cpm.h>
  89 #include <pexpert/pexpert.h>
  90
  91 #include <vm/vm_protos.h>
  92 #include <vm/memory_object.h>
  93 #include <vm/vm_purgeable_internal.h>
  94 #include <vm/vm_compressor.h>
  95
  96 #if CONFIG_PHANTOM_CACHE
  97 #include <vm/vm_phantom_cache.h>
  98 #endif
  99
 100 #include <IOKit/IOHibernatePrivate.h>
 101
 102 #include <sys/kdebug.h>
 103
 104 boolean_t       hibernate_cleaning_in_progress = FALSE;
 105 boolean_t       vm_page_free_verify = TRUE;
 106
 107 uint32_t        vm_lopage_free_count = 0;
 108 uint32_t        vm_lopage_free_limit = 0;
 109 uint32_t        vm_lopage_lowater    = 0;
 110 boolean_t       vm_lopage_refill = FALSE;
 111 boolean_t       vm_lopage_needed = FALSE;
 112
 113 lck_mtx_ext_t   vm_page_queue_lock_ext;
 114 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 115 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 116
 117 int             speculative_age_index = 0;
 118 int             speculative_steal_index = 0;
 119 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 120
 121
 122 __private_extern__ void         vm_page_init_lck_grp(void);
 123
 124 static void             vm_page_free_prepare(vm_page_t  page);
 125 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 126
 127
 128
 129
 130 /*
 131  *      Associated with page of user-allocatable memory is a
 132  *      page structure.
 133  */
 134
 135 /*
 136  *      These variables record the values returned by vm_page_bootstrap,
 137  *      for debugging purposes.  The implementation of pmap_steal_memory
 138  *      and pmap_startup here also uses them internally.
 139  */
 140
 141 vm_offset_t virtual_space_start;
 142 vm_offset_t virtual_space_end;
 143 uint32_t        vm_page_pages;
 144
 145 /*
 146  *      The vm_page_lookup() routine, which provides for fast
 147  *      (virtual memory object, offset) to page lookup, employs
 148  *      the following hash table.  The vm_page_{insert,remove}
 149  *      routines install and remove associations in the table.
 150  *      [This table is often called the virtual-to-physical,
 151  *      or VP, table.]
 152  */
 153 typedef struct {
 154         vm_page_packed_t page_list;
 155 #if     MACH_PAGE_HASH_STATS
 156         int             cur_count;              /* current count */
 157         int             hi_count;               /* high water mark */
 158 #endif /* MACH_PAGE_HASH_STATS */
 159 } vm_page_bucket_t;
 160
 161
 162 #define BUCKETS_PER_LOCK        16
 163
 164 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 165 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 166 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 167 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 168 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 169 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 170
 171 lck_spin_t      *vm_page_bucket_locks;
 172
 173 #if VM_PAGE_BUCKETS_CHECK
 174 boolean_t vm_page_buckets_check_ready = FALSE;
 175 #if VM_PAGE_FAKE_BUCKETS
 176 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
 177 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
 178 #endif /* VM_PAGE_FAKE_BUCKETS */
 179 #endif /* VM_PAGE_BUCKETS_CHECK */
 180
 181 #if     MACH_PAGE_HASH_STATS
 182 /* This routine is only for debug.  It is intended to be called by
 183  * hand by a developer using a kernel debugger.  This routine prints
 184  * out vm_page_hash table statistics to the kernel debug console.
 185  */
 186 void
 187 hash_debug(void)
 188 {
 189         int     i;
 190         int     numbuckets = 0;
 191         int     highsum = 0;
 192         int     maxdepth = 0;
 193
 194         for (i = 0; i < vm_page_bucket_count; i++) {
 195                 if (vm_page_buckets[i].hi_count) {
 196                         numbuckets++;
 197                         highsum += vm_page_buckets[i].hi_count;
 198                         if (vm_page_buckets[i].hi_count > maxdepth)
 199                                 maxdepth = vm_page_buckets[i].hi_count;
 200                 }
 201         }
 202         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 203         printf("Number used buckets:     %d = %d%%\n",
 204                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 205         printf("Number unused buckets:   %d = %d%%\n",
 206                 vm_page_bucket_count - numbuckets,
 207                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 208         printf("Sum of bucket max depth: %d\n", highsum);
 209         printf("Average bucket depth:    %d.%2d\n",
 210                 highsum/vm_page_bucket_count,
 211                 highsum%vm_page_bucket_count);
 212         printf("Maximum bucket depth:    %d\n", maxdepth);
 213 }
 214 #endif /* MACH_PAGE_HASH_STATS */
 215
 216 /*
 217  *      The virtual page size is currently implemented as a runtime
 218  *      variable, but is constant once initialized using vm_set_page_size.
 219  *      This initialization must be done in the machine-dependent
 220  *      bootstrap sequence, before calling other machine-independent
 221  *      initializations.
 222  *
 223  *      All references to the virtual page size outside this
 224  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 225  *      constants.
 226  */
 227 vm_size_t       page_size  = PAGE_SIZE;
 228 vm_size_t       page_mask  = PAGE_MASK;
 229 int             page_shift = PAGE_SHIFT;
 230
 231 /*
 232  *      Resident page structures are initialized from
 233  *      a template (see vm_page_alloc).
 234  *
 235  *      When adding a new field to the virtual memory
 236  *      object structure, be sure to add initialization
 237  *      (see vm_page_bootstrap).
 238  */
 239 struct vm_page  vm_page_template;
 240
 241 vm_page_t       vm_pages = VM_PAGE_NULL;
 242 unsigned int    vm_pages_count = 0;
 243 ppnum_t         vm_page_lowest = 0;
 244
 245 /*
 246  *      Resident pages that represent real memory
 247  *      are allocated from a set of free lists,
 248  *      one per color.
 249  */
 250 unsigned int    vm_colors;
 251 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 252 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 253 unsigned int    vm_free_magazine_refill_limit = 0;
 254 queue_head_t    vm_page_queue_free[MAX_COLORS];
 255 unsigned int    vm_page_free_wanted;
 256 unsigned int    vm_page_free_wanted_privileged;
 257 unsigned int    vm_page_free_count;
 258 unsigned int    vm_page_fictitious_count;
 259
 260 /*
 261  *      Occasionally, the virtual memory system uses
 262  *      resident page structures that do not refer to
 263  *      real pages, for example to leave a page with
 264  *      important state information in the VP table.
 265  *
 266  *      These page structures are allocated the way
 267  *      most other kernel structures are.
 268  */
 269 zone_t  vm_page_zone;
 270 vm_locks_array_t vm_page_locks;
 271 decl_lck_mtx_data(,vm_page_alloc_lock)
 272 lck_mtx_ext_t vm_page_alloc_lock_ext;
 273
 274 unsigned int io_throttle_zero_fill;
 275
 276 unsigned int    vm_page_local_q_count = 0;
 277 unsigned int    vm_page_local_q_soft_limit = 250;
 278 unsigned int    vm_page_local_q_hard_limit = 500;
 279 struct vplq     *vm_page_local_q = NULL;
 280
 281 /* N.B. Guard and fictitious pages must not
 282  * be assigned a zero phys_page value.
 283  */
 284 /*
 285  *      Fictitious pages don't have a physical address,
 286  *      but we must initialize phys_page to something.
 287  *      For debugging, this should be a strange value
 288  *      that the pmap module can recognize in assertions.
 289  */
 290 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 291
 292 /*
 293  *      Guard pages are not accessible so they don't
 294  *      need a physical address, but we need to enter
 295  *      one in the pmap.
 296  *      Let's make it recognizable and make sure that
 297  *      we don't use a real physical page with that
 298  *      physical address.
 299  */
 300 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 301
 302 /*
 303  *      Resident page structures are also chained on
 304  *      queues that are used by the page replacement
 305  *      system (pageout daemon).  These queues are
 306  *      defined here, but are shared by the pageout
 307  *      module.  The inactive queue is broken into
 308  *      file backed and anonymous for convenience as the
 309  *      pageout daemon often assignes a higher
 310  *      importance to anonymous pages (less likely to pick)
 311  */
 312 queue_head_t    vm_page_queue_active;
 313 queue_head_t    vm_page_queue_inactive;
 314 queue_head_t    vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
 315 queue_head_t    vm_page_queue_throttled;
 316
 317 unsigned int    vm_page_active_count;
 318 unsigned int    vm_page_inactive_count;
 319 unsigned int    vm_page_anonymous_count;
 320 unsigned int    vm_page_throttled_count;
 321 unsigned int    vm_page_speculative_count;
 322 unsigned int    vm_page_wire_count;
 323 unsigned int    vm_page_wire_count_initial;
 324 unsigned int    vm_page_gobble_count = 0;
 325
 326 #define VM_PAGE_WIRE_COUNT_WARNING      0
 327 #define VM_PAGE_GOBBLE_COUNT_WARNING    0
 328
 329 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 330 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 331 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 332
 333 unsigned int    vm_page_xpmapped_external_count = 0;
 334 unsigned int    vm_page_external_count = 0;
 335 unsigned int    vm_page_internal_count = 0;
 336 unsigned int    vm_page_pageable_external_count = 0;
 337 unsigned int    vm_page_pageable_internal_count = 0;
 338
 339 #if DEVELOPMENT || DEBUG
 340 unsigned int    vm_page_speculative_recreated = 0;
 341 unsigned int    vm_page_speculative_created = 0;
 342 unsigned int    vm_page_speculative_used = 0;
 343 #endif
 344
 345 queue_head_t    vm_page_queue_cleaned;
 346
 347 unsigned int    vm_page_cleaned_count = 0;
 348 unsigned int    vm_pageout_enqueued_cleaned = 0;
 349
 350 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 351 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 352
 353
 354 /*
 355  *      Several page replacement parameters are also
 356  *      shared with this module, so that page allocation
 357  *      (done here in vm_page_alloc) can trigger the
 358  *      pageout daemon.
 359  */
 360 unsigned int    vm_page_free_target = 0;
 361 unsigned int    vm_page_free_min = 0;
 362 unsigned int    vm_page_throttle_limit = 0;
 363 unsigned int    vm_page_inactive_target = 0;
 364 unsigned int    vm_page_anonymous_min = 0;
 365 unsigned int    vm_page_inactive_min = 0;
 366 unsigned int    vm_page_free_reserved = 0;
 367 unsigned int    vm_page_throttle_count = 0;
 368
 369
 370 /*
 371  *      The VM system has a couple of heuristics for deciding
 372  *      that pages are "uninteresting" and should be placed
 373  *      on the inactive queue as likely candidates for replacement.
 374  *      These variables let the heuristics be controlled at run-time
 375  *      to make experimentation easier.
 376  */
 377
 378 boolean_t vm_page_deactivate_hint = TRUE;
 379
 380 struct vm_page_stats_reusable vm_page_stats_reusable;
 381
 382 /*
 383  *      vm_set_page_size:
 384  *
 385  *      Sets the page size, perhaps based upon the memory
 386  *      size.  Must be called before any use of page-size
 387  *      dependent functions.
 388  *
 389  *      Sets page_shift and page_mask from page_size.
 390  */
 391 void
 392 vm_set_page_size(void)
 393 {
 394         page_size  = PAGE_SIZE;
 395         page_mask  = PAGE_MASK;
 396         page_shift = PAGE_SHIFT;
 397
 398         if ((page_mask & page_size) != 0)
 399                 panic("vm_set_page_size: page size not a power of two");
 400
 401         for (page_shift = 0; ; page_shift++)
 402                 if ((1U << page_shift) == page_size)
 403                         break;
 404 }
 405
 406 #define COLOR_GROUPS_TO_STEAL   4
 407
 408
 409 /* Called once during statup, once the cache geometry is known.
 410  */
 411 static void
 412 vm_page_set_colors( void )
 413 {
 414         unsigned int    n, override;
 415
 416         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 417                 n = override;
 418         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 419                 n = vm_cache_geometry_colors;
 420         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 421
 422         if ( n == 0 )
 423                 n = 1;
 424         if ( n > MAX_COLORS )
 425                 n = MAX_COLORS;
 426
 427         /* the count must be a power of 2  */
 428         if ( ( n & (n - 1)) != 0  )
 429                 panic("vm_page_set_colors");
 430
 431         vm_colors = n;
 432         vm_color_mask = n - 1;
 433
 434         vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
 435 }
 436
 437
 438 lck_grp_t               vm_page_lck_grp_free;
 439 lck_grp_t               vm_page_lck_grp_queue;
 440 lck_grp_t               vm_page_lck_grp_local;
 441 lck_grp_t               vm_page_lck_grp_purge;
 442 lck_grp_t               vm_page_lck_grp_alloc;
 443 lck_grp_t               vm_page_lck_grp_bucket;
 444 lck_grp_attr_t          vm_page_lck_grp_attr;
 445 lck_attr_t              vm_page_lck_attr;
 446
 447
 448 __private_extern__ void
 449 vm_page_init_lck_grp(void)
 450 {
 451         /*
 452          * initialze the vm_page lock world
 453          */
 454         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 455         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 456         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 457         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 458         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 459         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 460         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 461         lck_attr_setdefault(&vm_page_lck_attr);
 462         lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 463
 464         vm_compressor_init_locks();
 465 }
 466
 467 void
 468 vm_page_init_local_q()
 469 {
 470         unsigned int            num_cpus;
 471         unsigned int            i;
 472         struct vplq             *t_local_q;
 473
 474         num_cpus = ml_get_max_cpus();
 475
 476         /*
 477          * no point in this for a uni-processor system
 478          */
 479         if (num_cpus >= 2) {
 480                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 481
 482                 for (i = 0; i < num_cpus; i++) {
 483                         struct vpl      *lq;
 484
 485                         lq = &t_local_q[i].vpl_un.vpl;
 486                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 487                         queue_init(&lq->vpl_queue);
 488                         lq->vpl_count = 0;
 489                         lq->vpl_internal_count = 0;
 490                         lq->vpl_external_count = 0;
 491                 }
 492                 vm_page_local_q_count = num_cpus;
 493
 494                 vm_page_local_q = (struct vplq *)t_local_q;
 495         }
 496 }
 497
 498
 499 /*
 500  *      vm_page_bootstrap:
 501  *
 502  *      Initializes the resident memory module.
 503  *
 504  *      Allocates memory for the page cells, and
 505  *      for the object/offset-to-page hash table headers.
 506  *      Each page cell is initialized and placed on the free list.
 507  *      Returns the range of available kernel virtual memory.
 508  */
 509
 510 void
 511 vm_page_bootstrap(
 512         vm_offset_t             *startp,
 513         vm_offset_t             *endp)
 514 {
 515         register vm_page_t      m;
 516         unsigned int            i;
 517         unsigned int            log1;
 518         unsigned int            log2;
 519         unsigned int            size;
 520
 521         /*
 522          *      Initialize the vm_page template.
 523          */
 524
 525         m = &vm_page_template;
 526         bzero(m, sizeof (*m));
 527
 528         m->pageq.next = NULL;
 529         m->pageq.prev = NULL;
 530         m->listq.next = NULL;
 531         m->listq.prev = NULL;
 532         m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
 533
 534         m->object = VM_OBJECT_NULL;             /* reset later */
 535         m->offset = (vm_object_offset_t) -1;    /* reset later */
 536
 537         m->wire_count = 0;
 538         m->local = FALSE;
 539         m->inactive = FALSE;
 540         m->active = FALSE;
 541         m->pageout_queue = FALSE;
 542         m->speculative = FALSE;
 543         m->laundry = FALSE;
 544         m->free = FALSE;
 545         m->reference = FALSE;
 546         m->gobbled = FALSE;
 547         m->private = FALSE;
 548         m->throttled = FALSE;
 549         m->__unused_pageq_bits = 0;
 550
 551         m->phys_page = 0;               /* reset later */
 552
 553         m->busy = TRUE;
 554         m->wanted = FALSE;
 555         m->tabled = FALSE;
 556         m->hashed = FALSE;
 557         m->fictitious = FALSE;
 558         m->pmapped = FALSE;
 559         m->wpmapped = FALSE;
 560         m->pageout = FALSE;
 561         m->absent = FALSE;
 562         m->error = FALSE;
 563         m->dirty = FALSE;
 564         m->cleaning = FALSE;
 565         m->precious = FALSE;
 566         m->clustered = FALSE;
 567         m->overwriting = FALSE;
 568         m->restart = FALSE;
 569         m->unusual = FALSE;
 570         m->encrypted = FALSE;
 571         m->encrypted_cleaning = FALSE;
 572         m->cs_validated = FALSE;
 573         m->cs_tainted = FALSE;
 574         m->cs_nx = FALSE;
 575         m->no_cache = FALSE;
 576         m->reusable = FALSE;
 577         m->slid = FALSE;
 578         m->xpmapped = FALSE;
 579         m->compressor = FALSE;
 580         m->written_by_kernel = FALSE;
 581         m->__unused_object_bits = 0;
 582
 583         /*
 584          *      Initialize the page queues.
 585          */
 586         vm_page_init_lck_grp();
 587
 588         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 589         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 590         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 591
 592         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 593                 int group;
 594
 595                 purgeable_queues[i].token_q_head = 0;
 596                 purgeable_queues[i].token_q_tail = 0;
 597                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 598                         queue_init(&purgeable_queues[i].objq[group]);
 599
 600                 purgeable_queues[i].type = i;
 601                 purgeable_queues[i].new_pages = 0;
 602 #if MACH_ASSERT
 603                 purgeable_queues[i].debug_count_tokens = 0;
 604                 purgeable_queues[i].debug_count_objects = 0;
 605 #endif
 606         };
 607         purgeable_nonvolatile_count = 0;
 608         queue_init(&purgeable_nonvolatile_queue);
 609
 610         for (i = 0; i < MAX_COLORS; i++ )
 611                 queue_init(&vm_page_queue_free[i]);
 612
 613         queue_init(&vm_lopage_queue_free);
 614         queue_init(&vm_page_queue_active);
 615         queue_init(&vm_page_queue_inactive);
 616         queue_init(&vm_page_queue_cleaned);
 617         queue_init(&vm_page_queue_throttled);
 618         queue_init(&vm_page_queue_anonymous);
 619
 620         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 621                 queue_init(&vm_page_queue_speculative[i].age_q);
 622
 623                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 624                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 625         }
 626         vm_page_free_wanted = 0;
 627         vm_page_free_wanted_privileged = 0;
 628
 629         vm_page_set_colors();
 630
 631
 632         /*
 633          *      Steal memory for the map and zone subsystems.
 634          */
 635         kernel_debug_string("zone_steal_memory");
 636         zone_steal_memory();
 637         kernel_debug_string("vm_map_steal_memory");
 638         vm_map_steal_memory();
 639
 640         /*
 641          *      Allocate (and initialize) the virtual-to-physical
 642          *      table hash buckets.
 643          *
 644          *      The number of buckets should be a power of two to
 645          *      get a good hash function.  The following computation
 646          *      chooses the first power of two that is greater
 647          *      than the number of physical pages in the system.
 648          */
 649
 650         if (vm_page_bucket_count == 0) {
 651                 unsigned int npages = pmap_free_pages();
 652
 653                 vm_page_bucket_count = 1;
 654                 while (vm_page_bucket_count < npages)
 655                         vm_page_bucket_count <<= 1;
 656         }
 657         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 658
 659         vm_page_hash_mask = vm_page_bucket_count - 1;
 660
 661         /*
 662          *      Calculate object shift value for hashing algorithm:
 663          *              O = log2(sizeof(struct vm_object))
 664          *              B = log2(vm_page_bucket_count)
 665          *              hash shifts the object left by
 666          *              B/2 - O
 667          */
 668         size = vm_page_bucket_count;
 669         for (log1 = 0; size > 1; log1++)
 670                 size /= 2;
 671         size = sizeof(struct vm_object);
 672         for (log2 = 0; size > 1; log2++)
 673                 size /= 2;
 674         vm_page_hash_shift = log1/2 - log2 + 1;
 675
 676         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 677         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 678         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 679
 680         if (vm_page_hash_mask & vm_page_bucket_count)
 681                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 682
 683 #if VM_PAGE_BUCKETS_CHECK
 684 #if VM_PAGE_FAKE_BUCKETS
 685         /*
 686          * Allocate a decoy set of page buckets, to detect
 687          * any stomping there.
 688          */
 689         vm_page_fake_buckets = (vm_page_bucket_t *)
 690                 pmap_steal_memory(vm_page_bucket_count *
 691                                   sizeof(vm_page_bucket_t));
 692         vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
 693         vm_page_fake_buckets_end =
 694                 vm_map_round_page((vm_page_fake_buckets_start +
 695                                    (vm_page_bucket_count *
 696                                     sizeof (vm_page_bucket_t))),
 697                                   PAGE_MASK);
 698         char *cp;
 699         for (cp = (char *)vm_page_fake_buckets_start;
 700              cp < (char *)vm_page_fake_buckets_end;
 701              cp++) {
 702                 *cp = 0x5a;
 703         }
 704 #endif /* VM_PAGE_FAKE_BUCKETS */
 705 #endif /* VM_PAGE_BUCKETS_CHECK */
 706
 707         kernel_debug_string("vm_page_buckets");
 708         vm_page_buckets = (vm_page_bucket_t *)
 709                 pmap_steal_memory(vm_page_bucket_count *
 710                                   sizeof(vm_page_bucket_t));
 711
 712         kernel_debug_string("vm_page_bucket_locks");
 713         vm_page_bucket_locks = (lck_spin_t *)
 714                 pmap_steal_memory(vm_page_bucket_lock_count *
 715                                   sizeof(lck_spin_t));
 716
 717         for (i = 0; i < vm_page_bucket_count; i++) {
 718                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 719
 720                 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
 721 #if     MACH_PAGE_HASH_STATS
 722                 bucket->cur_count = 0;
 723                 bucket->hi_count = 0;
 724 #endif /* MACH_PAGE_HASH_STATS */
 725         }
 726
 727         for (i = 0; i < vm_page_bucket_lock_count; i++)
 728                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 729
 730 #if VM_PAGE_BUCKETS_CHECK
 731         vm_page_buckets_check_ready = TRUE;
 732 #endif /* VM_PAGE_BUCKETS_CHECK */
 733
 734         /*
 735          *      Machine-dependent code allocates the resident page table.
 736          *      It uses vm_page_init to initialize the page frames.
 737          *      The code also returns to us the virtual space available
 738          *      to the kernel.  We don't trust the pmap module
 739          *      to get the alignment right.
 740          */
 741
 742         kernel_debug_string("pmap_startup");
 743         pmap_startup(&virtual_space_start, &virtual_space_end);
 744         virtual_space_start = round_page(virtual_space_start);
 745         virtual_space_end = trunc_page(virtual_space_end);
 746
 747         *startp = virtual_space_start;
 748         *endp = virtual_space_end;
 749
 750         /*
 751          *      Compute the initial "wire" count.
 752          *      Up until now, the pages which have been set aside are not under
 753          *      the VM system's control, so although they aren't explicitly
 754          *      wired, they nonetheless can't be moved. At this moment,
 755          *      all VM managed pages are "free", courtesy of pmap_startup.
 756          */
 757         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 758         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 759         vm_page_wire_count_initial = vm_page_wire_count;
 760
 761         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 762                vm_page_free_count, vm_page_wire_count);
 763
 764         kernel_debug_string("vm_page_bootstrap complete");
 765         simple_lock_init(&vm_paging_lock, 0);
 766 }
 767
 768 #ifndef MACHINE_PAGES
 769 /*
 770  *      We implement pmap_steal_memory and pmap_startup with the help
 771  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 772  */
 773
 774 void *
 775 pmap_steal_memory(
 776         vm_size_t size)
 777 {
 778         vm_offset_t addr, vaddr;
 779         ppnum_t phys_page;
 780
 781         /*
 782          *      We round the size to a round multiple.
 783          */
 784
 785         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 786
 787         /*
 788          *      If this is the first call to pmap_steal_memory,
 789          *      we have to initialize ourself.
 790          */
 791
 792         if (virtual_space_start == virtual_space_end) {
 793                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 794
 795                 /*
 796                  *      The initial values must be aligned properly, and
 797                  *      we don't trust the pmap module to do it right.
 798                  */
 799
 800                 virtual_space_start = round_page(virtual_space_start);
 801                 virtual_space_end = trunc_page(virtual_space_end);
 802         }
 803
 804         /*
 805          *      Allocate virtual memory for this request.
 806          */
 807
 808         addr = virtual_space_start;
 809         virtual_space_start += size;
 810
 811         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
 812
 813         /*
 814          *      Allocate and map physical pages to back new virtual pages.
 815          */
 816
 817         for (vaddr = round_page(addr);
 818              vaddr < addr + size;
 819              vaddr += PAGE_SIZE) {
 820
 821                 if (!pmap_next_page_hi(&phys_page))
 822                         panic("pmap_steal_memory");
 823
 824                 /*
 825                  *      XXX Logically, these mappings should be wired,
 826                  *      but some pmap modules barf if they are.
 827                  */
 828 #if defined(__LP64__)
 829                 pmap_pre_expand(kernel_pmap, vaddr);
 830 #endif
 831
 832                 pmap_enter(kernel_pmap, vaddr, phys_page,
 833                            VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
 834                                 VM_WIMG_USE_DEFAULT, FALSE);
 835                 /*
 836                  * Account for newly stolen memory
 837                  */
 838                 vm_page_wire_count++;
 839
 840         }
 841
 842         return (void *) addr;
 843 }
 844
 845 void vm_page_release_startup(vm_page_t mem);
 846 void
 847 pmap_startup(
 848         vm_offset_t *startp,
 849         vm_offset_t *endp)
 850 {
 851         unsigned int i, npages, pages_initialized, fill, fillval;
 852         ppnum_t         phys_page;
 853         addr64_t        tmpaddr;
 854
 855
 856 #if    defined(__LP64__)
 857         /*
 858          * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
 859          */
 860         assert(sizeof(struct vm_page) == 64);
 861
 862         /*
 863          * make sure we are aligned on a 64 byte boundary
 864          * for VM_PAGE_PACK_PTR (it clips off the low-order
 865          * 6 bits of the pointer)
 866          */
 867         if (virtual_space_start != virtual_space_end)
 868                 virtual_space_start = round_page(virtual_space_start);
 869 #endif
 870
 871         /*
 872          *      We calculate how many page frames we will have
 873          *      and then allocate the page structures in one chunk.
 874          */
 875
 876         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 877         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 878         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 879
 880         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 881
 882         /*
 883          *      Initialize the page frames.
 884          */
 885         kernel_debug_string("Initialize the page frames");
 886         for (i = 0, pages_initialized = 0; i < npages; i++) {
 887                 if (!pmap_next_page(&phys_page))
 888                         break;
 889                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
 890                         vm_page_lowest = phys_page;
 891
 892                 vm_page_init(&vm_pages[i], phys_page, FALSE);
 893                 vm_page_pages++;
 894                 pages_initialized++;
 895         }
 896         vm_pages_count = pages_initialized;
 897
 898 #if    defined(__LP64__)
 899
 900         if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
 901                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
 902
 903         if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
 904                 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
 905 #endif
 906         kernel_debug_string("page fill/release");
 907         /*
 908          * Check if we want to initialize pages to a known value
 909          */
 910         fill = 0;                                                               /* Assume no fill */
 911         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 912 #if     DEBUG
 913         /* This slows down booting the DEBUG kernel, particularly on
 914          * large memory systems, but is worthwhile in deterministically
 915          * trapping uninitialized memory usage.
 916          */
 917         if (fill == 0) {
 918                 fill = 1;
 919                 fillval = 0xDEB8F177;
 920         }
 921 #endif
 922         if (fill)
 923                 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
 924         // -debug code remove
 925         if (2 == vm_himemory_mode) {
 926                 // free low -> high so high is preferred
 927                 for (i = 1; i <= pages_initialized; i++) {
 928                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 929                         vm_page_release_startup(&vm_pages[i - 1]);
 930                 }
 931         }
 932         else
 933         // debug code remove-
 934
 935         /*
 936          * Release pages in reverse order so that physical pages
 937          * initially get allocated in ascending addresses. This keeps
 938          * the devices (which must address physical memory) happy if
 939          * they require several consecutive pages.
 940          */
 941         for (i = pages_initialized; i > 0; i--) {
 942                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 943                 vm_page_release_startup(&vm_pages[i - 1]);
 944         }
 945
 946         VM_CHECK_MEMORYSTATUS;
 947
 948 #if 0
 949         {
 950                 vm_page_t xx, xxo, xxl;
 951                 int i, j, k, l;
 952
 953                 j = 0;                                                                                                  /* (BRINGUP) */
 954                 xxl = 0;
 955
 956                 for( i = 0; i < vm_colors; i++ ) {
 957                         queue_iterate(&vm_page_queue_free[i],
 958                                       xx,
 959                                       vm_page_t,
 960                                       pageq) {  /* BRINGUP */
 961                                 j++;                                                                                            /* (BRINGUP) */
 962                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 963                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 964                                 }
 965
 966                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 967                                 k = 0;                                                                                          /* (BRINGUP) */
 968
 969                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 970
 971                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 972                                         k++;
 973                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 974                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 975                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 976                                         }
 977                                 }
 978
 979                                 xxl = xx;
 980                         }
 981                 }
 982
 983                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 984                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 985                 }
 986         }
 987 #endif
 988
 989
 990         /*
 991          *      We have to re-align virtual_space_start,
 992          *      because pmap_steal_memory has been using it.
 993          */
 994
 995         virtual_space_start = round_page(virtual_space_start);
 996
 997         *startp = virtual_space_start;
 998         *endp = virtual_space_end;
 999 }
1000 #endif  /* MACHINE_PAGES */
1001
1002 /*
1003  *      Routine:        vm_page_module_init
1004  *      Purpose:
1005  *              Second initialization pass, to be done after
1006  *              the basic VM system is ready.
1007  */
1008 void
1009 vm_page_module_init(void)
1010 {
1011         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1012                              0, PAGE_SIZE, "vm pages");
1013
1014 #if     ZONE_DEBUG
1015         zone_debug_disable(vm_page_zone);
1016 #endif  /* ZONE_DEBUG */
1017
1018         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1019         zone_change(vm_page_zone, Z_EXPAND, FALSE);
1020         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1021         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1022         zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1023         /*
1024          * Adjust zone statistics to account for the real pages allocated
1025          * in vm_page_create(). [Q: is this really what we want?]
1026          */
1027         vm_page_zone->count += vm_page_pages;
1028         vm_page_zone->sum_count += vm_page_pages;
1029         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1030 }
1031
1032 /*
1033  *      Routine:        vm_page_create
1034  *      Purpose:
1035  *              After the VM system is up, machine-dependent code
1036  *              may stumble across more physical memory.  For example,
1037  *              memory that it was reserving for a frame buffer.
1038  *              vm_page_create turns this memory into available pages.
1039  */
1040
1041 void
1042 vm_page_create(
1043         ppnum_t start,
1044         ppnum_t end)
1045 {
1046         ppnum_t         phys_page;
1047         vm_page_t       m;
1048
1049         for (phys_page = start;
1050              phys_page < end;
1051              phys_page++) {
1052                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1053                         == VM_PAGE_NULL)
1054                         vm_page_more_fictitious();
1055
1056                 m->fictitious = FALSE;
1057                 pmap_clear_noencrypt(phys_page);
1058
1059                 vm_page_pages++;
1060                 vm_page_release(m);
1061         }
1062 }
1063
1064 /*
1065  *      vm_page_hash:
1066  *
1067  *      Distributes the object/offset key pair among hash buckets.
1068  *
1069  *      NOTE:   The bucket count must be a power of 2
1070  */
1071 #define vm_page_hash(object, offset) (\
1072         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1073          & vm_page_hash_mask)
1074
1075
1076 /*
1077  *      vm_page_insert:         [ internal use only ]
1078  *
1079  *      Inserts the given mem entry into the object/object-page
1080  *      table and object list.
1081  *
1082  *      The object must be locked.
1083  */
1084 void
1085 vm_page_insert(
1086         vm_page_t               mem,
1087         vm_object_t             object,
1088         vm_object_offset_t      offset)
1089 {
1090         vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1091 }
1092
1093 void
1094 vm_page_insert_internal(
1095         vm_page_t               mem,
1096         vm_object_t             object,
1097         vm_object_offset_t      offset,
1098         boolean_t               queues_lock_held,
1099         boolean_t               insert_in_hash,
1100         boolean_t               batch_pmap_op)
1101 {
1102         vm_page_bucket_t        *bucket;
1103         lck_spin_t              *bucket_lock;
1104         int                     hash_id;
1105         task_t                  owner;
1106
1107         XPR(XPR_VM_PAGE,
1108                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1109                 object, offset, mem, 0,0);
1110 #if 0
1111         /*
1112          * we may not hold the page queue lock
1113          * so this check isn't safe to make
1114          */
1115         VM_PAGE_CHECK(mem);
1116 #endif
1117
1118         assert(page_aligned(offset));
1119
1120         /* the vm_submap_object is only a placeholder for submaps */
1121         assert(object != vm_submap_object);
1122
1123         vm_object_lock_assert_exclusive(object);
1124 #if DEBUG
1125         lck_mtx_assert(&vm_page_queue_lock,
1126                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1127                                         : LCK_MTX_ASSERT_NOTOWNED);
1128 #endif  /* DEBUG */
1129
1130         if (insert_in_hash == TRUE) {
1131 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1132                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1133                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1134                               "already in (obj=%p,off=0x%llx)",
1135                               mem, object, offset, mem->object, mem->offset);
1136 #endif
1137                 assert(!object->internal || offset < object->vo_size);
1138
1139                 /* only insert "pageout" pages into "pageout" objects,
1140                  * and normal pages into normal objects */
1141                 assert(object->pageout == mem->pageout);
1142
1143                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1144
1145                 /*
1146                  *      Record the object/offset pair in this page
1147                  */
1148
1149                 mem->object = object;
1150                 mem->offset = offset;
1151
1152                 /*
1153                  *      Insert it into the object_object/offset hash table
1154                  */
1155                 hash_id = vm_page_hash(object, offset);
1156                 bucket = &vm_page_buckets[hash_id];
1157                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1158
1159                 lck_spin_lock(bucket_lock);
1160
1161                 mem->next_m = bucket->page_list;
1162                 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1163                 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1164
1165 #if     MACH_PAGE_HASH_STATS
1166                 if (++bucket->cur_count > bucket->hi_count)
1167                         bucket->hi_count = bucket->cur_count;
1168 #endif /* MACH_PAGE_HASH_STATS */
1169                 mem->hashed = TRUE;
1170                 lck_spin_unlock(bucket_lock);
1171         }
1172
1173         {
1174                 unsigned int    cache_attr;
1175
1176                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1177
1178                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1179                         PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1180                 }
1181         }
1182         /*
1183          *      Now link into the object's list of backed pages.
1184          */
1185         VM_PAGE_INSERT(mem, object);
1186         mem->tabled = TRUE;
1187
1188         /*
1189          *      Show that the object has one more resident page.
1190          */
1191
1192         object->resident_page_count++;
1193         if (VM_PAGE_WIRED(mem)) {
1194                 object->wired_page_count++;
1195         }
1196         assert(object->resident_page_count >= object->wired_page_count);
1197
1198         if (object->internal) {
1199                 OSAddAtomic(1, &vm_page_internal_count);
1200         } else {
1201                 OSAddAtomic(1, &vm_page_external_count);
1202         }
1203
1204         /*
1205          * It wouldn't make sense to insert a "reusable" page in
1206          * an object (the page would have been marked "reusable" only
1207          * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1208          * in the object at that time).
1209          * But a page could be inserted in a "all_reusable" object, if
1210          * something faults it in (a vm_read() from another task or a
1211          * "use-after-free" issue in user space, for example).  It can
1212          * also happen if we're relocating a page from that object to
1213          * a different physical page during a physically-contiguous
1214          * allocation.
1215          */
1216         assert(!mem->reusable);
1217         if (mem->object->all_reusable) {
1218                 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1219         }
1220
1221         if (object->purgable == VM_PURGABLE_DENY) {
1222                 owner = TASK_NULL;
1223         } else {
1224                 owner = object->vo_purgeable_owner;
1225         }
1226         if (owner &&
1227             (object->purgable == VM_PURGABLE_NONVOLATILE ||
1228              VM_PAGE_WIRED(mem))) {
1229                 /* more non-volatile bytes */
1230                 ledger_credit(owner->ledger,
1231                               task_ledgers.purgeable_nonvolatile,
1232                               PAGE_SIZE);
1233                 /* more footprint */
1234                 ledger_credit(owner->ledger,
1235                               task_ledgers.phys_footprint,
1236                               PAGE_SIZE);
1237
1238         } else if (owner &&
1239                    (object->purgable == VM_PURGABLE_VOLATILE ||
1240                     object->purgable == VM_PURGABLE_EMPTY)) {
1241                 assert(! VM_PAGE_WIRED(mem));
1242                 /* more volatile bytes */
1243                 ledger_credit(owner->ledger,
1244                               task_ledgers.purgeable_volatile,
1245                               PAGE_SIZE);
1246         }
1247
1248         if (object->purgable == VM_PURGABLE_VOLATILE) {
1249                 if (VM_PAGE_WIRED(mem)) {
1250                         OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1251                 } else {
1252                         OSAddAtomic(+1, &vm_page_purgeable_count);
1253                 }
1254         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1255                    mem->throttled) {
1256                 /*
1257                  * This page belongs to a purged VM object but hasn't
1258                  * been purged (because it was "busy").
1259                  * It's in the "throttled" queue and hence not
1260                  * visible to vm_pageout_scan().  Move it to a pageable
1261                  * queue, so that it can eventually be reclaimed, instead
1262                  * of lingering in the "empty" object.
1263                  */
1264                 if (queues_lock_held == FALSE)
1265                         vm_page_lockspin_queues();
1266                 vm_page_deactivate(mem);
1267                 if (queues_lock_held == FALSE)
1268                         vm_page_unlock_queues();
1269         }
1270
1271 #if VM_OBJECT_TRACKING_OP_MODIFIED
1272         if (vm_object_tracking_inited &&
1273             object->internal &&
1274             object->resident_page_count == 0 &&
1275             object->pager == NULL &&
1276             object->shadow != NULL &&
1277             object->shadow->copy == object) {
1278                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1279                 int numsaved = 0;
1280
1281                 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1282                 btlog_add_entry(vm_object_tracking_btlog,
1283                                 object,
1284                                 VM_OBJECT_TRACKING_OP_MODIFIED,
1285                                 bt,
1286                                 numsaved);
1287         }
1288 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1289 }
1290
1291 /*
1292  *      vm_page_replace:
1293  *
1294  *      Exactly like vm_page_insert, except that we first
1295  *      remove any existing page at the given offset in object.
1296  *
1297  *      The object must be locked.
1298  */
1299 void
1300 vm_page_replace(
1301         register vm_page_t              mem,
1302         register vm_object_t            object,
1303         register vm_object_offset_t     offset)
1304 {
1305         vm_page_bucket_t *bucket;
1306         vm_page_t        found_m = VM_PAGE_NULL;
1307         lck_spin_t      *bucket_lock;
1308         int             hash_id;
1309
1310 #if 0
1311         /*
1312          * we don't hold the page queue lock
1313          * so this check isn't safe to make
1314          */
1315         VM_PAGE_CHECK(mem);
1316 #endif
1317         vm_object_lock_assert_exclusive(object);
1318 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1319         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1320                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1321                       "already in (obj=%p,off=0x%llx)",
1322                       mem, object, offset, mem->object, mem->offset);
1323         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1324 #endif
1325         /*
1326          *      Record the object/offset pair in this page
1327          */
1328
1329         mem->object = object;
1330         mem->offset = offset;
1331
1332         /*
1333          *      Insert it into the object_object/offset hash table,
1334          *      replacing any page that might have been there.
1335          */
1336
1337         hash_id = vm_page_hash(object, offset);
1338         bucket = &vm_page_buckets[hash_id];
1339         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1340
1341         lck_spin_lock(bucket_lock);
1342
1343         if (bucket->page_list) {
1344                 vm_page_packed_t *mp = &bucket->page_list;
1345                 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1346
1347                 do {
1348                         if (m->object == object && m->offset == offset) {
1349                                 /*
1350                                  * Remove old page from hash list
1351                                  */
1352                                 *mp = m->next_m;
1353                                 m->hashed = FALSE;
1354
1355                                 found_m = m;
1356                                 break;
1357                         }
1358                         mp = &m->next_m;
1359                 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1360
1361                 mem->next_m = bucket->page_list;
1362         } else {
1363                 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1364         }
1365         /*
1366          * insert new page at head of hash list
1367          */
1368         bucket->page_list = VM_PAGE_PACK_PTR(mem);
1369         mem->hashed = TRUE;
1370
1371         lck_spin_unlock(bucket_lock);
1372
1373         if (found_m) {
1374                 /*
1375                  * there was already a page at the specified
1376                  * offset for this object... remove it from
1377                  * the object and free it back to the free list
1378                  */
1379                 vm_page_free_unlocked(found_m, FALSE);
1380         }
1381         vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1382 }
1383
1384 /*
1385  *      vm_page_remove:         [ internal use only ]
1386  *
1387  *      Removes the given mem entry from the object/offset-page
1388  *      table and the object page list.
1389  *
1390  *      The object must be locked.
1391  */
1392
1393 void
1394 vm_page_remove(
1395         vm_page_t       mem,
1396         boolean_t       remove_from_hash)
1397 {
1398         vm_page_bucket_t *bucket;
1399         vm_page_t       this;
1400         lck_spin_t      *bucket_lock;
1401         int             hash_id;
1402         task_t          owner;
1403
1404         XPR(XPR_VM_PAGE,
1405                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1406                 mem->object, mem->offset,
1407                 mem, 0,0);
1408
1409         vm_object_lock_assert_exclusive(mem->object);
1410         assert(mem->tabled);
1411         assert(!mem->cleaning);
1412         assert(!mem->laundry);
1413 #if 0
1414         /*
1415          * we don't hold the page queue lock
1416          * so this check isn't safe to make
1417          */
1418         VM_PAGE_CHECK(mem);
1419 #endif
1420         if (remove_from_hash == TRUE) {
1421                 /*
1422                  *      Remove from the object_object/offset hash table
1423                  */
1424                 hash_id = vm_page_hash(mem->object, mem->offset);
1425                 bucket = &vm_page_buckets[hash_id];
1426                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1427
1428                 lck_spin_lock(bucket_lock);
1429
1430                 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1431                         /* optimize for common case */
1432
1433                         bucket->page_list = mem->next_m;
1434                 } else {
1435                         vm_page_packed_t        *prev;
1436
1437                         for (prev = &this->next_m;
1438                              (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1439                              prev = &this->next_m)
1440                                 continue;
1441                         *prev = this->next_m;
1442                 }
1443 #if     MACH_PAGE_HASH_STATS
1444                 bucket->cur_count--;
1445 #endif /* MACH_PAGE_HASH_STATS */
1446                 mem->hashed = FALSE;
1447                 lck_spin_unlock(bucket_lock);
1448         }
1449         /*
1450          *      Now remove from the object's list of backed pages.
1451          */
1452
1453         VM_PAGE_REMOVE(mem);
1454
1455         /*
1456          *      And show that the object has one fewer resident
1457          *      page.
1458          */
1459
1460         assert(mem->object->resident_page_count > 0);
1461         mem->object->resident_page_count--;
1462
1463         if (mem->object->internal) {
1464 #if DEBUG
1465                 assert(vm_page_internal_count);
1466 #endif /* DEBUG */
1467
1468                 OSAddAtomic(-1, &vm_page_internal_count);
1469         } else {
1470                 assert(vm_page_external_count);
1471                 OSAddAtomic(-1, &vm_page_external_count);
1472
1473                 if (mem->xpmapped) {
1474                         assert(vm_page_xpmapped_external_count);
1475                         OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1476                 }
1477         }
1478         if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1479                 if (mem->object->resident_page_count == 0)
1480                         vm_object_cache_remove(mem->object);
1481         }
1482
1483         if (VM_PAGE_WIRED(mem)) {
1484                 assert(mem->object->wired_page_count > 0);
1485                 mem->object->wired_page_count--;
1486         }
1487         assert(mem->object->resident_page_count >=
1488                mem->object->wired_page_count);
1489         if (mem->reusable) {
1490                 assert(mem->object->reusable_page_count > 0);
1491                 mem->object->reusable_page_count--;
1492                 assert(mem->object->reusable_page_count <=
1493                        mem->object->resident_page_count);
1494                 mem->reusable = FALSE;
1495                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1496                 vm_page_stats_reusable.reused_remove++;
1497         } else if (mem->object->all_reusable) {
1498                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1499                 vm_page_stats_reusable.reused_remove++;
1500         }
1501
1502         if (mem->object->purgable == VM_PURGABLE_DENY) {
1503                 owner = TASK_NULL;
1504         } else {
1505                 owner = mem->object->vo_purgeable_owner;
1506         }
1507         if (owner &&
1508             (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1509              VM_PAGE_WIRED(mem))) {
1510                 /* less non-volatile bytes */
1511                 ledger_debit(owner->ledger,
1512                              task_ledgers.purgeable_nonvolatile,
1513                              PAGE_SIZE);
1514                 /* less footprint */
1515                 ledger_debit(owner->ledger,
1516                              task_ledgers.phys_footprint,
1517                              PAGE_SIZE);
1518         } else if (owner &&
1519                    (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1520                     mem->object->purgable == VM_PURGABLE_EMPTY)) {
1521                 assert(! VM_PAGE_WIRED(mem));
1522                 /* less volatile bytes */
1523                 ledger_debit(owner->ledger,
1524                              task_ledgers.purgeable_volatile,
1525                              PAGE_SIZE);
1526         }
1527         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1528                 if (VM_PAGE_WIRED(mem)) {
1529                         assert(vm_page_purgeable_wired_count > 0);
1530                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1531                 } else {
1532                         assert(vm_page_purgeable_count > 0);
1533                         OSAddAtomic(-1, &vm_page_purgeable_count);
1534                 }
1535         }
1536         if (mem->object->set_cache_attr == TRUE)
1537                 pmap_set_cache_attributes(mem->phys_page, 0);
1538
1539         mem->tabled = FALSE;
1540         mem->object = VM_OBJECT_NULL;
1541         mem->offset = (vm_object_offset_t) -1;
1542 }
1543
1544
1545 /*
1546  *      vm_page_lookup:
1547  *
1548  *      Returns the page associated with the object/offset
1549  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1550  *
1551  *      The object must be locked.  No side effects.
1552  */
1553
1554 unsigned long vm_page_lookup_hint = 0;
1555 unsigned long vm_page_lookup_hint_next = 0;
1556 unsigned long vm_page_lookup_hint_prev = 0;
1557 unsigned long vm_page_lookup_hint_miss = 0;
1558 unsigned long vm_page_lookup_bucket_NULL = 0;
1559 unsigned long vm_page_lookup_miss = 0;
1560
1561
1562 vm_page_t
1563 vm_page_lookup(
1564         vm_object_t             object,
1565         vm_object_offset_t      offset)
1566 {
1567         vm_page_t       mem;
1568         vm_page_bucket_t *bucket;
1569         queue_entry_t   qe;
1570         lck_spin_t      *bucket_lock;
1571         int             hash_id;
1572
1573         vm_object_lock_assert_held(object);
1574         mem = object->memq_hint;
1575
1576         if (mem != VM_PAGE_NULL) {
1577                 assert(mem->object == object);
1578
1579                 if (mem->offset == offset) {
1580                         vm_page_lookup_hint++;
1581                         return mem;
1582                 }
1583                 qe = queue_next(&mem->listq);
1584
1585                 if (! queue_end(&object->memq, qe)) {
1586                         vm_page_t       next_page;
1587
1588                         next_page = (vm_page_t) qe;
1589                         assert(next_page->object == object);
1590
1591                         if (next_page->offset == offset) {
1592                                 vm_page_lookup_hint_next++;
1593                                 object->memq_hint = next_page; /* new hint */
1594                                 return next_page;
1595                         }
1596                 }
1597                 qe = queue_prev(&mem->listq);
1598
1599                 if (! queue_end(&object->memq, qe)) {
1600                         vm_page_t prev_page;
1601
1602                         prev_page = (vm_page_t) qe;
1603                         assert(prev_page->object == object);
1604
1605                         if (prev_page->offset == offset) {
1606                                 vm_page_lookup_hint_prev++;
1607                                 object->memq_hint = prev_page; /* new hint */
1608                                 return prev_page;
1609                         }
1610                 }
1611         }
1612         /*
1613          * Search the hash table for this object/offset pair
1614          */
1615         hash_id = vm_page_hash(object, offset);
1616         bucket = &vm_page_buckets[hash_id];
1617
1618         /*
1619          * since we hold the object lock, we are guaranteed that no
1620          * new pages can be inserted into this object... this in turn
1621          * guarantess that the page we're looking for can't exist
1622          * if the bucket it hashes to is currently NULL even when looked
1623          * at outside the scope of the hash bucket lock... this is a
1624          * really cheap optimiztion to avoid taking the lock
1625          */
1626         if (!bucket->page_list) {
1627                 vm_page_lookup_bucket_NULL++;
1628
1629                 return (VM_PAGE_NULL);
1630         }
1631         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1632
1633         lck_spin_lock(bucket_lock);
1634
1635         for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1636 #if 0
1637                 /*
1638                  * we don't hold the page queue lock
1639                  * so this check isn't safe to make
1640                  */
1641                 VM_PAGE_CHECK(mem);
1642 #endif
1643                 if ((mem->object == object) && (mem->offset == offset))
1644                         break;
1645         }
1646         lck_spin_unlock(bucket_lock);
1647
1648         if (mem != VM_PAGE_NULL) {
1649                 if (object->memq_hint != VM_PAGE_NULL) {
1650                         vm_page_lookup_hint_miss++;
1651                 }
1652                 assert(mem->object == object);
1653                 object->memq_hint = mem;
1654         } else
1655                 vm_page_lookup_miss++;
1656
1657         return(mem);
1658 }
1659
1660
1661 /*
1662  *      vm_page_rename:
1663  *
1664  *      Move the given memory entry from its
1665  *      current object to the specified target object/offset.
1666  *
1667  *      The object must be locked.
1668  */
1669 void
1670 vm_page_rename(
1671         register vm_page_t              mem,
1672         register vm_object_t            new_object,
1673         vm_object_offset_t              new_offset,
1674         boolean_t                       encrypted_ok)
1675 {
1676         boolean_t       internal_to_external, external_to_internal;
1677
1678         assert(mem->object != new_object);
1679
1680         /*
1681          * ENCRYPTED SWAP:
1682          * The encryption key is based on the page's memory object
1683          * (aka "pager") and paging offset.  Moving the page to
1684          * another VM object changes its "pager" and "paging_offset"
1685          * so it has to be decrypted first, or we would lose the key.
1686          *
1687          * One exception is VM object collapsing, where we transfer pages
1688          * from one backing object to its parent object.  This operation also
1689          * transfers the paging information, so the <pager,paging_offset> info
1690          * should remain consistent.  The caller (vm_object_do_collapse())
1691          * sets "encrypted_ok" in this case.
1692          */
1693         if (!encrypted_ok && mem->encrypted) {
1694                 panic("vm_page_rename: page %p is encrypted\n", mem);
1695         }
1696
1697         XPR(XPR_VM_PAGE,
1698                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1699                 new_object, new_offset,
1700                 mem, 0,0);
1701
1702         /*
1703          *      Changes to mem->object require the page lock because
1704          *      the pageout daemon uses that lock to get the object.
1705          */
1706         vm_page_lockspin_queues();
1707
1708         internal_to_external = FALSE;
1709         external_to_internal = FALSE;
1710
1711         if (mem->local) {
1712                 /*
1713                  * it's much easier to get the vm_page_pageable_xxx accounting correct
1714                  * if we first move the page to the active queue... it's going to end
1715                  * up there anyway, and we don't do vm_page_rename's frequently enough
1716                  * for this to matter.
1717                  */
1718                 VM_PAGE_QUEUES_REMOVE(mem);
1719                 vm_page_activate(mem);
1720         }
1721         if (mem->active || mem->inactive || mem->speculative) {
1722                 if (mem->object->internal && !new_object->internal) {
1723                         internal_to_external = TRUE;
1724                 }
1725                 if (!mem->object->internal && new_object->internal) {
1726                         external_to_internal = TRUE;
1727                 }
1728         }
1729
1730         vm_page_remove(mem, TRUE);
1731         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1732
1733         if (internal_to_external) {
1734                 vm_page_pageable_internal_count--;
1735                 vm_page_pageable_external_count++;
1736         } else if (external_to_internal) {
1737                 vm_page_pageable_external_count--;
1738                 vm_page_pageable_internal_count++;
1739         }
1740
1741         vm_page_unlock_queues();
1742 }
1743
1744 /*
1745  *      vm_page_init:
1746  *
1747  *      Initialize the fields in a new page.
1748  *      This takes a structure with random values and initializes it
1749  *      so that it can be given to vm_page_release or vm_page_insert.
1750  */
1751 void
1752 vm_page_init(
1753         vm_page_t       mem,
1754         ppnum_t         phys_page,
1755         boolean_t       lopage)
1756 {
1757         assert(phys_page);
1758
1759 #if     DEBUG
1760         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1761                 if (!(pmap_valid_page(phys_page))) {
1762                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1763                 }
1764         }
1765 #endif
1766         *mem = vm_page_template;
1767         mem->phys_page = phys_page;
1768 #if 0
1769         /*
1770          * we're leaving this turned off for now... currently pages
1771          * come off the free list and are either immediately dirtied/referenced
1772          * due to zero-fill or COW faults, or are used to read or write files...
1773          * in the file I/O case, the UPL mechanism takes care of clearing
1774          * the state of the HW ref/mod bits in a somewhat fragile way.
1775          * Since we may change the way this works in the future (to toughen it up),
1776          * I'm leaving this as a reminder of where these bits could get cleared
1777          */
1778
1779         /*
1780          * make sure both the h/w referenced and modified bits are
1781          * clear at this point... we are especially dependent on
1782          * not finding a 'stale' h/w modified in a number of spots
1783          * once this page goes back into use
1784          */
1785         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1786 #endif
1787         mem->lopage = lopage;
1788 }
1789
1790 /*
1791  *      vm_page_grab_fictitious:
1792  *
1793  *      Remove a fictitious page from the free list.
1794  *      Returns VM_PAGE_NULL if there are no free pages.
1795  */
1796 int     c_vm_page_grab_fictitious = 0;
1797 int     c_vm_page_grab_fictitious_failed = 0;
1798 int     c_vm_page_release_fictitious = 0;
1799 int     c_vm_page_more_fictitious = 0;
1800
1801 vm_page_t
1802 vm_page_grab_fictitious_common(
1803         ppnum_t phys_addr)
1804 {
1805         vm_page_t       m;
1806
1807         if ((m = (vm_page_t)zget(vm_page_zone))) {
1808
1809                 vm_page_init(m, phys_addr, FALSE);
1810                 m->fictitious = TRUE;
1811
1812                 c_vm_page_grab_fictitious++;
1813         } else
1814                 c_vm_page_grab_fictitious_failed++;
1815
1816         return m;
1817 }
1818
1819 vm_page_t
1820 vm_page_grab_fictitious(void)
1821 {
1822         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1823 }
1824
1825 vm_page_t
1826 vm_page_grab_guard(void)
1827 {
1828         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1829 }
1830
1831
1832 /*
1833  *      vm_page_release_fictitious:
1834  *
1835  *      Release a fictitious page to the zone pool
1836  */
1837 void
1838 vm_page_release_fictitious(
1839         vm_page_t m)
1840 {
1841         assert(!m->free);
1842         assert(m->fictitious);
1843         assert(m->phys_page == vm_page_fictitious_addr ||
1844                m->phys_page == vm_page_guard_addr);
1845
1846         c_vm_page_release_fictitious++;
1847
1848         zfree(vm_page_zone, m);
1849 }
1850
1851 /*
1852  *      vm_page_more_fictitious:
1853  *
1854  *      Add more fictitious pages to the zone.
1855  *      Allowed to block. This routine is way intimate
1856  *      with the zones code, for several reasons:
1857  *      1. we need to carve some page structures out of physical
1858  *         memory before zones work, so they _cannot_ come from
1859  *         the zone_map.
1860  *      2. the zone needs to be collectable in order to prevent
1861  *         growth without bound. These structures are used by
1862  *         the device pager (by the hundreds and thousands), as
1863  *         private pages for pageout, and as blocking pages for
1864  *         pagein. Temporary bursts in demand should not result in
1865  *         permanent allocation of a resource.
1866  *      3. To smooth allocation humps, we allocate single pages
1867  *         with kernel_memory_allocate(), and cram them into the
1868  *         zone.
1869  */
1870
1871 void vm_page_more_fictitious(void)
1872 {
1873         vm_offset_t     addr;
1874         kern_return_t   retval;
1875
1876         c_vm_page_more_fictitious++;
1877
1878         /*
1879          * Allocate a single page from the zone_map. Do not wait if no physical
1880          * pages are immediately available, and do not zero the space. We need
1881          * our own blocking lock here to prevent having multiple,
1882          * simultaneous requests from piling up on the zone_map lock. Exactly
1883          * one (of our) threads should be potentially waiting on the map lock.
1884          * If winner is not vm-privileged, then the page allocation will fail,
1885          * and it will temporarily block here in the vm_page_wait().
1886          */
1887         lck_mtx_lock(&vm_page_alloc_lock);
1888         /*
1889          * If another thread allocated space, just bail out now.
1890          */
1891         if (zone_free_count(vm_page_zone) > 5) {
1892                 /*
1893                  * The number "5" is a small number that is larger than the
1894                  * number of fictitious pages that any single caller will
1895                  * attempt to allocate. Otherwise, a thread will attempt to
1896                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1897                  * release all of the resources and locks already acquired,
1898                  * and then call this routine. This routine finds the pages
1899                  * that the caller released, so fails to allocate new space.
1900                  * The process repeats infinitely. The largest known number
1901                  * of fictitious pages required in this manner is 2. 5 is
1902                  * simply a somewhat larger number.
1903                  */
1904                 lck_mtx_unlock(&vm_page_alloc_lock);
1905                 return;
1906         }
1907
1908         retval = kernel_memory_allocate(zone_map,
1909                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1910                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1911         if (retval != KERN_SUCCESS) {
1912                 /*
1913                  * No page was available. Drop the
1914                  * lock to give another thread a chance at it, and
1915                  * wait for the pageout daemon to make progress.
1916                  */
1917                 lck_mtx_unlock(&vm_page_alloc_lock);
1918                 vm_page_wait(THREAD_UNINT);
1919                 return;
1920         }
1921
1922         /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1923         OSAddAtomic64(1, &(vm_page_zone->page_count));
1924
1925         zcram(vm_page_zone, addr, PAGE_SIZE);
1926
1927         lck_mtx_unlock(&vm_page_alloc_lock);
1928 }
1929
1930
1931 /*
1932  *      vm_pool_low():
1933  *
1934  *      Return true if it is not likely that a non-vm_privileged thread
1935  *      can get memory without blocking.  Advisory only, since the
1936  *      situation may change under us.
1937  */
1938 int
1939 vm_pool_low(void)
1940 {
1941         /* No locking, at worst we will fib. */
1942         return( vm_page_free_count <= vm_page_free_reserved );
1943 }
1944
1945
1946
1947 /*
1948  * this is an interface to support bring-up of drivers
1949  * on platforms with physical memory > 4G...
1950  */
1951 int             vm_himemory_mode = 2;
1952
1953
1954 /*
1955  * this interface exists to support hardware controllers
1956  * incapable of generating DMAs with more than 32 bits
1957  * of address on platforms with physical memory > 4G...
1958  */
1959 unsigned int    vm_lopages_allocated_q = 0;
1960 unsigned int    vm_lopages_allocated_cpm_success = 0;
1961 unsigned int    vm_lopages_allocated_cpm_failed = 0;
1962 queue_head_t    vm_lopage_queue_free;
1963
1964 vm_page_t
1965 vm_page_grablo(void)
1966 {
1967         vm_page_t       mem;
1968
1969         if (vm_lopage_needed == FALSE)
1970                 return (vm_page_grab());
1971
1972         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1973
1974         if ( !queue_empty(&vm_lopage_queue_free)) {
1975                 queue_remove_first(&vm_lopage_queue_free,
1976                                    mem,
1977                                    vm_page_t,
1978                                    pageq);
1979                 assert(vm_lopage_free_count);
1980
1981                 vm_lopage_free_count--;
1982                 vm_lopages_allocated_q++;
1983
1984                 if (vm_lopage_free_count < vm_lopage_lowater)
1985                         vm_lopage_refill = TRUE;
1986
1987                 lck_mtx_unlock(&vm_page_queue_free_lock);
1988         } else {
1989                 lck_mtx_unlock(&vm_page_queue_free_lock);
1990
1991                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1992
1993                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1994                         vm_lopages_allocated_cpm_failed++;
1995                         lck_mtx_unlock(&vm_page_queue_free_lock);
1996
1997                         return (VM_PAGE_NULL);
1998                 }
1999                 mem->busy = TRUE;
2000
2001                 vm_page_lockspin_queues();
2002
2003                 mem->gobbled = FALSE;
2004                 vm_page_gobble_count--;
2005                 vm_page_wire_count--;
2006
2007                 vm_lopages_allocated_cpm_success++;
2008                 vm_page_unlock_queues();
2009         }
2010         assert(mem->busy);
2011         assert(!mem->free);
2012         assert(!mem->pmapped);
2013         assert(!mem->wpmapped);
2014         assert(!pmap_is_noencrypt(mem->phys_page));
2015
2016         mem->pageq.next = NULL;
2017         mem->pageq.prev = NULL;
2018
2019         return (mem);
2020 }
2021
2022
2023 /*
2024  *      vm_page_grab:
2025  *
2026  *      first try to grab a page from the per-cpu free list...
2027  *      this must be done while pre-emption is disabled... if
2028  *      a page is available, we're done...
2029  *      if no page is available, grab the vm_page_queue_free_lock
2030  *      and see if current number of free pages would allow us
2031  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
2032  *      if there are pages available, disable preemption and
2033  *      recheck the state of the per-cpu free list... we could
2034  *      have been preempted and moved to a different cpu, or
2035  *      some other thread could have re-filled it... if still
2036  *      empty, figure out how many pages we can steal from the
2037  *      global free queue and move to the per-cpu queue...
2038  *      return 1 of these pages when done... only wakeup the
2039  *      pageout_scan thread if we moved pages from the global
2040  *      list... no need for the wakeup if we've satisfied the
2041  *      request from the per-cpu queue.
2042  */
2043
2044
2045 vm_page_t
2046 vm_page_grab( void )
2047 {
2048         vm_page_t       mem;
2049
2050
2051         disable_preemption();
2052
2053         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2054 return_page_from_cpu_list:
2055                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2056                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2057
2058                 enable_preemption();
2059                 mem->pageq.next = NULL;
2060
2061                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2062                 assert(mem->tabled == FALSE);
2063                 assert(mem->object == VM_OBJECT_NULL);
2064                 assert(!mem->laundry);
2065                 assert(!mem->free);
2066                 assert(pmap_verify_free(mem->phys_page));
2067                 assert(mem->busy);
2068                 assert(!mem->encrypted);
2069                 assert(!mem->pmapped);
2070                 assert(!mem->wpmapped);
2071                 assert(!mem->active);
2072                 assert(!mem->inactive);
2073                 assert(!mem->throttled);
2074                 assert(!mem->speculative);
2075                 assert(!pmap_is_noencrypt(mem->phys_page));
2076
2077                 return mem;
2078         }
2079         enable_preemption();
2080
2081
2082         /*
2083          *      Optionally produce warnings if the wire or gobble
2084          *      counts exceed some threshold.
2085          */
2086 #if VM_PAGE_WIRE_COUNT_WARNING
2087         if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2088                 printf("mk: vm_page_grab(): high wired page count of %d\n",
2089                         vm_page_wire_count);
2090         }
2091 #endif
2092 #if VM_PAGE_GOBBLE_COUNT_WARNING
2093         if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2094                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2095                         vm_page_gobble_count);
2096         }
2097 #endif
2098         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2099
2100         /*
2101          *      Only let privileged threads (involved in pageout)
2102          *      dip into the reserved pool.
2103          */
2104         if ((vm_page_free_count < vm_page_free_reserved) &&
2105             !(current_thread()->options & TH_OPT_VMPRIV)) {
2106                 lck_mtx_unlock(&vm_page_queue_free_lock);
2107                 mem = VM_PAGE_NULL;
2108         }
2109         else {
2110                vm_page_t        head;
2111                vm_page_t        tail;
2112                unsigned int     pages_to_steal;
2113                unsigned int     color;
2114
2115                while ( vm_page_free_count == 0 ) {
2116
2117                         lck_mtx_unlock(&vm_page_queue_free_lock);
2118                         /*
2119                          * must be a privileged thread to be
2120                          * in this state since a non-privileged
2121                          * thread would have bailed if we were
2122                          * under the vm_page_free_reserved mark
2123                          */
2124                         VM_PAGE_WAIT();
2125                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2126                 }
2127
2128                 disable_preemption();
2129
2130                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2131                         lck_mtx_unlock(&vm_page_queue_free_lock);
2132
2133                         /*
2134                          * we got preempted and moved to another processor
2135                          * or we got preempted and someone else ran and filled the cache
2136                          */
2137                         goto return_page_from_cpu_list;
2138                 }
2139                 if (vm_page_free_count <= vm_page_free_reserved)
2140                         pages_to_steal = 1;
2141                 else {
2142                         if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2143                                 pages_to_steal = vm_free_magazine_refill_limit;
2144                         else
2145                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2146                 }
2147                 color = PROCESSOR_DATA(current_processor(), start_color);
2148                 head = tail = NULL;
2149
2150                 vm_page_free_count -= pages_to_steal;
2151
2152                 while (pages_to_steal--) {
2153
2154                         while (queue_empty(&vm_page_queue_free[color]))
2155                                 color = (color + 1) & vm_color_mask;
2156
2157                         queue_remove_first(&vm_page_queue_free[color],
2158                                            mem,
2159                                            vm_page_t,
2160                                            pageq);
2161                         mem->pageq.next = NULL;
2162                         mem->pageq.prev = NULL;
2163
2164                         assert(!mem->active);
2165                         assert(!mem->inactive);
2166                         assert(!mem->throttled);
2167                         assert(!mem->speculative);
2168
2169                         color = (color + 1) & vm_color_mask;
2170
2171                         if (head == NULL)
2172                                 head = mem;
2173                         else
2174                                 tail->pageq.next = (queue_t)mem;
2175                         tail = mem;
2176
2177                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2178                         assert(mem->tabled == FALSE);
2179                         assert(mem->object == VM_OBJECT_NULL);
2180                         assert(!mem->laundry);
2181                         assert(mem->free);
2182                         mem->free = FALSE;
2183
2184                         assert(pmap_verify_free(mem->phys_page));
2185                         assert(mem->busy);
2186                         assert(!mem->free);
2187                         assert(!mem->encrypted);
2188                         assert(!mem->pmapped);
2189                         assert(!mem->wpmapped);
2190                         assert(!pmap_is_noencrypt(mem->phys_page));
2191                 }
2192                 lck_mtx_unlock(&vm_page_queue_free_lock);
2193
2194                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2195                 PROCESSOR_DATA(current_processor(), start_color) = color;
2196
2197                 /*
2198                  * satisfy this request
2199                  */
2200                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2201                 mem = head;
2202                 mem->pageq.next = NULL;
2203
2204                 enable_preemption();
2205         }
2206         /*
2207          *      Decide if we should poke the pageout daemon.
2208          *      We do this if the free count is less than the low
2209          *      water mark, or if the free count is less than the high
2210          *      water mark (but above the low water mark) and the inactive
2211          *      count is less than its target.
2212          *
2213          *      We don't have the counts locked ... if they change a little,
2214          *      it doesn't really matter.
2215          */
2216         if ((vm_page_free_count < vm_page_free_min) ||
2217              ((vm_page_free_count < vm_page_free_target) &&
2218               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2219                  thread_wakeup((event_t) &vm_page_free_wanted);
2220
2221         VM_CHECK_MEMORYSTATUS;
2222
2223 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
2224
2225         return mem;
2226 }
2227
2228 /*
2229  *      vm_page_release:
2230  *
2231  *      Return a page to the free list.
2232  */
2233
2234 void
2235 vm_page_release(
2236         register vm_page_t      mem)
2237 {
2238         unsigned int    color;
2239         int     need_wakeup = 0;
2240         int     need_priv_wakeup = 0;
2241
2242
2243         assert(!mem->private && !mem->fictitious);
2244         if (vm_page_free_verify) {
2245                 assert(pmap_verify_free(mem->phys_page));
2246         }
2247 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
2248
2249         pmap_clear_noencrypt(mem->phys_page);
2250
2251         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2252 #if DEBUG
2253         if (mem->free)
2254                 panic("vm_page_release");
2255 #endif
2256
2257         assert(mem->busy);
2258         assert(!mem->laundry);
2259         assert(mem->object == VM_OBJECT_NULL);
2260         assert(mem->pageq.next == NULL &&
2261                mem->pageq.prev == NULL);
2262         assert(mem->listq.next == NULL &&
2263                mem->listq.prev == NULL);
2264
2265         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2266             vm_lopage_free_count < vm_lopage_free_limit &&
2267             mem->phys_page < max_valid_low_ppnum) {
2268                 /*
2269                  * this exists to support hardware controllers
2270                  * incapable of generating DMAs with more than 32 bits
2271                  * of address on platforms with physical memory > 4G...
2272                  */
2273                 queue_enter_first(&vm_lopage_queue_free,
2274                                   mem,
2275                                   vm_page_t,
2276                                   pageq);
2277                 vm_lopage_free_count++;
2278
2279                 if (vm_lopage_free_count >= vm_lopage_free_limit)
2280                         vm_lopage_refill = FALSE;
2281
2282                 mem->lopage = TRUE;
2283         } else {
2284                 mem->lopage = FALSE;
2285                 mem->free = TRUE;
2286
2287                 color = mem->phys_page & vm_color_mask;
2288                 queue_enter_first(&vm_page_queue_free[color],
2289                                   mem,
2290                                   vm_page_t,
2291                                   pageq);
2292                 vm_page_free_count++;
2293                 /*
2294                  *      Check if we should wake up someone waiting for page.
2295                  *      But don't bother waking them unless they can allocate.
2296                  *
2297                  *      We wakeup only one thread, to prevent starvation.
2298                  *      Because the scheduling system handles wait queues FIFO,
2299                  *      if we wakeup all waiting threads, one greedy thread
2300                  *      can starve multiple niceguy threads.  When the threads
2301                  *      all wakeup, the greedy threads runs first, grabs the page,
2302                  *      and waits for another page.  It will be the first to run
2303                  *      when the next page is freed.
2304                  *
2305                  *      However, there is a slight danger here.
2306                  *      The thread we wake might not use the free page.
2307                  *      Then the other threads could wait indefinitely
2308                  *      while the page goes unused.  To forestall this,
2309                  *      the pageout daemon will keep making free pages
2310                  *      as long as vm_page_free_wanted is non-zero.
2311                  */
2312
2313                 assert(vm_page_free_count > 0);
2314                 if (vm_page_free_wanted_privileged > 0) {
2315                         vm_page_free_wanted_privileged--;
2316                         need_priv_wakeup = 1;
2317                 } else if (vm_page_free_wanted > 0 &&
2318                            vm_page_free_count > vm_page_free_reserved) {
2319                         vm_page_free_wanted--;
2320                         need_wakeup = 1;
2321                 }
2322         }
2323         lck_mtx_unlock(&vm_page_queue_free_lock);
2324
2325         if (need_priv_wakeup)
2326                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2327         else if (need_wakeup)
2328                 thread_wakeup_one((event_t) &vm_page_free_count);
2329
2330         VM_CHECK_MEMORYSTATUS;
2331 }
2332
2333 /*
2334  * This version of vm_page_release() is used only at startup
2335  * when we are single-threaded and pages are being released
2336  * for the first time. Hence, no locking or unnecessary checks are made.
2337  * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2338  */
2339 void
2340 vm_page_release_startup(
2341         register vm_page_t      mem)
2342 {
2343         queue_t queue_free;
2344
2345         if (vm_lopage_free_count < vm_lopage_free_limit &&
2346             mem->phys_page < max_valid_low_ppnum) {
2347                 mem->lopage = TRUE;
2348                 vm_lopage_free_count++;
2349                 queue_free = &vm_lopage_queue_free;
2350         } else {
2351                 mem->lopage = FALSE;
2352                 mem->free = TRUE;
2353                 vm_page_free_count++;
2354                 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2355         }
2356         queue_enter_first(queue_free, mem, vm_page_t, pageq);
2357 }
2358
2359 /*
2360  *      vm_page_wait:
2361  *
2362  *      Wait for a page to become available.
2363  *      If there are plenty of free pages, then we don't sleep.
2364  *
2365  *      Returns:
2366  *              TRUE:  There may be another page, try again
2367  *              FALSE: We were interrupted out of our wait, don't try again
2368  */
2369
2370 boolean_t
2371 vm_page_wait(
2372         int     interruptible )
2373 {
2374         /*
2375          *      We can't use vm_page_free_reserved to make this
2376          *      determination.  Consider: some thread might
2377          *      need to allocate two pages.  The first allocation
2378          *      succeeds, the second fails.  After the first page is freed,
2379          *      a call to vm_page_wait must really block.
2380          */
2381         kern_return_t   wait_result;
2382         int             need_wakeup = 0;
2383         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2384
2385         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2386
2387         if (is_privileged && vm_page_free_count) {
2388                 lck_mtx_unlock(&vm_page_queue_free_lock);
2389                 return TRUE;
2390         }
2391         if (vm_page_free_count < vm_page_free_target) {
2392
2393                 if (is_privileged) {
2394                         if (vm_page_free_wanted_privileged++ == 0)
2395                                 need_wakeup = 1;
2396                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2397                 } else {
2398                         if (vm_page_free_wanted++ == 0)
2399                                 need_wakeup = 1;
2400                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2401                 }
2402                 lck_mtx_unlock(&vm_page_queue_free_lock);
2403                 counter(c_vm_page_wait_block++);
2404
2405                 if (need_wakeup)
2406                         thread_wakeup((event_t)&vm_page_free_wanted);
2407
2408                 if (wait_result == THREAD_WAITING) {
2409                         VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2410                                        vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2411                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2412                         VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2413                 }
2414
2415                 return(wait_result == THREAD_AWAKENED);
2416         } else {
2417                 lck_mtx_unlock(&vm_page_queue_free_lock);
2418                 return TRUE;
2419         }
2420 }
2421
2422 /*
2423  *      vm_page_alloc:
2424  *
2425  *      Allocate and return a memory cell associated
2426  *      with this VM object/offset pair.
2427  *
2428  *      Object must be locked.
2429  */
2430
2431 vm_page_t
2432 vm_page_alloc(
2433         vm_object_t             object,
2434         vm_object_offset_t      offset)
2435 {
2436         register vm_page_t      mem;
2437
2438         vm_object_lock_assert_exclusive(object);
2439         mem = vm_page_grab();
2440         if (mem == VM_PAGE_NULL)
2441                 return VM_PAGE_NULL;
2442
2443         vm_page_insert(mem, object, offset);
2444
2445         return(mem);
2446 }
2447
2448 vm_page_t
2449 vm_page_alloclo(
2450         vm_object_t             object,
2451         vm_object_offset_t      offset)
2452 {
2453         register vm_page_t      mem;
2454
2455         vm_object_lock_assert_exclusive(object);
2456         mem = vm_page_grablo();
2457         if (mem == VM_PAGE_NULL)
2458                 return VM_PAGE_NULL;
2459
2460         vm_page_insert(mem, object, offset);
2461
2462         return(mem);
2463 }
2464
2465
2466 /*
2467  *      vm_page_alloc_guard:
2468  *
2469  *      Allocate a fictitious page which will be used
2470  *      as a guard page.  The page will be inserted into
2471  *      the object and returned to the caller.
2472  */
2473
2474 vm_page_t
2475 vm_page_alloc_guard(
2476         vm_object_t             object,
2477         vm_object_offset_t      offset)
2478 {
2479         register vm_page_t      mem;
2480
2481         vm_object_lock_assert_exclusive(object);
2482         mem = vm_page_grab_guard();
2483         if (mem == VM_PAGE_NULL)
2484                 return VM_PAGE_NULL;
2485
2486         vm_page_insert(mem, object, offset);
2487
2488         return(mem);
2489 }
2490
2491
2492 counter(unsigned int c_laundry_pages_freed = 0;)
2493
2494 /*
2495  *      vm_page_free_prepare:
2496  *
2497  *      Removes page from any queue it may be on
2498  *      and disassociates it from its VM object.
2499  *
2500  *      Object and page queues must be locked prior to entry.
2501  */
2502 static void
2503 vm_page_free_prepare(
2504         vm_page_t       mem)
2505 {
2506         vm_page_free_prepare_queues(mem);
2507         vm_page_free_prepare_object(mem, TRUE);
2508 }
2509
2510
2511 void
2512 vm_page_free_prepare_queues(
2513         vm_page_t       mem)
2514 {
2515         VM_PAGE_CHECK(mem);
2516         assert(!mem->free);
2517         assert(!mem->cleaning);
2518
2519 #if MACH_ASSERT || DEBUG
2520         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2521         if (mem->free)
2522                 panic("vm_page_free: freeing page on free list\n");
2523 #endif /* MACH_ASSERT || DEBUG */
2524         if (mem->object) {
2525                 vm_object_lock_assert_exclusive(mem->object);
2526         }
2527         if (mem->laundry) {
2528                 /*
2529                  * We may have to free a page while it's being laundered
2530                  * if we lost its pager (due to a forced unmount, for example).
2531                  * We need to call vm_pageout_steal_laundry() before removing
2532                  * the page from its VM object, so that we can remove it
2533                  * from its pageout queue and adjust the laundry accounting
2534                  */
2535                 vm_pageout_steal_laundry(mem, TRUE);
2536                 counter(++c_laundry_pages_freed);
2537         }
2538
2539         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2540
2541         if (VM_PAGE_WIRED(mem)) {
2542                 if (mem->object) {
2543                         assert(mem->object->wired_page_count > 0);
2544                         mem->object->wired_page_count--;
2545                         assert(mem->object->resident_page_count >=
2546                                mem->object->wired_page_count);
2547
2548                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2549                                 OSAddAtomic(+1, &vm_page_purgeable_count);
2550                                 assert(vm_page_purgeable_wired_count > 0);
2551                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2552                         }
2553                         if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2554                              mem->object->purgable == VM_PURGABLE_EMPTY) &&
2555                             mem->object->vo_purgeable_owner != TASK_NULL) {
2556                                 task_t owner;
2557
2558                                 owner = mem->object->vo_purgeable_owner;
2559                                 /*
2560                                  * While wired, this page was accounted
2561                                  * as "non-volatile" but it should now
2562                                  * be accounted as "volatile".
2563                                  */
2564                                 /* one less "non-volatile"... */
2565                                 ledger_debit(owner->ledger,
2566                                              task_ledgers.purgeable_nonvolatile,
2567                                              PAGE_SIZE);
2568                                 /* ... and "phys_footprint" */
2569                                 ledger_debit(owner->ledger,
2570                                              task_ledgers.phys_footprint,
2571                                              PAGE_SIZE);
2572                                 /* one more "volatile" */
2573                                 ledger_credit(owner->ledger,
2574                                               task_ledgers.purgeable_volatile,
2575                                               PAGE_SIZE);
2576                         }
2577                 }
2578                 if (!mem->private && !mem->fictitious)
2579                         vm_page_wire_count--;
2580                 mem->wire_count = 0;
2581                 assert(!mem->gobbled);
2582         } else if (mem->gobbled) {
2583                 if (!mem->private && !mem->fictitious)
2584                         vm_page_wire_count--;
2585                 vm_page_gobble_count--;
2586         }
2587 }
2588
2589
2590 void
2591 vm_page_free_prepare_object(
2592         vm_page_t       mem,
2593         boolean_t       remove_from_hash)
2594 {
2595         if (mem->tabled)
2596                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2597
2598         PAGE_WAKEUP(mem);               /* clears wanted */
2599
2600         if (mem->private) {
2601                 mem->private = FALSE;
2602                 mem->fictitious = TRUE;
2603                 mem->phys_page = vm_page_fictitious_addr;
2604         }
2605         if ( !mem->fictitious) {
2606                 vm_page_init(mem, mem->phys_page, mem->lopage);
2607         }
2608 }
2609
2610
2611 /*
2612  *      vm_page_free:
2613  *
2614  *      Returns the given page to the free list,
2615  *      disassociating it with any VM object.
2616  *
2617  *      Object and page queues must be locked prior to entry.
2618  */
2619 void
2620 vm_page_free(
2621         vm_page_t       mem)
2622 {
2623         vm_page_free_prepare(mem);
2624
2625         if (mem->fictitious) {
2626                 vm_page_release_fictitious(mem);
2627         } else {
2628                 vm_page_release(mem);
2629         }
2630 }
2631
2632
2633 void
2634 vm_page_free_unlocked(
2635         vm_page_t       mem,
2636         boolean_t       remove_from_hash)
2637 {
2638         vm_page_lockspin_queues();
2639         vm_page_free_prepare_queues(mem);
2640         vm_page_unlock_queues();
2641
2642         vm_page_free_prepare_object(mem, remove_from_hash);
2643
2644         if (mem->fictitious) {
2645                 vm_page_release_fictitious(mem);
2646         } else {
2647                 vm_page_release(mem);
2648         }
2649 }
2650
2651
2652 /*
2653  * Free a list of pages.  The list can be up to several hundred pages,
2654  * as blocked up by vm_pageout_scan().
2655  * The big win is not having to take the free list lock once
2656  * per page.
2657  */
2658 void
2659 vm_page_free_list(
2660         vm_page_t       freeq,
2661         boolean_t       prepare_object)
2662 {
2663         vm_page_t       mem;
2664         vm_page_t       nxt;
2665         vm_page_t       local_freeq;
2666         int             pg_count;
2667
2668         while (freeq) {
2669
2670                 pg_count = 0;
2671                 local_freeq = VM_PAGE_NULL;
2672                 mem = freeq;
2673
2674                 /*
2675                  * break up the processing into smaller chunks so
2676                  * that we can 'pipeline' the pages onto the
2677                  * free list w/o introducing too much
2678                  * contention on the global free queue lock
2679                  */
2680                 while (mem && pg_count < 64) {
2681
2682                         assert(!mem->inactive);
2683                         assert(!mem->active);
2684                         assert(!mem->throttled);
2685                         assert(!mem->free);
2686                         assert(!mem->speculative);
2687                         assert(!VM_PAGE_WIRED(mem));
2688                         assert(mem->pageq.prev == NULL);
2689
2690                         nxt = (vm_page_t)(mem->pageq.next);
2691
2692                         if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2693                                 assert(pmap_verify_free(mem->phys_page));
2694                         }
2695                         if (prepare_object == TRUE)
2696                                 vm_page_free_prepare_object(mem, TRUE);
2697
2698                         if (!mem->fictitious) {
2699                                 assert(mem->busy);
2700
2701                                 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2702                                     vm_lopage_free_count < vm_lopage_free_limit &&
2703                                     mem->phys_page < max_valid_low_ppnum) {
2704                                         mem->pageq.next = NULL;
2705                                         vm_page_release(mem);
2706                                 } else {
2707                                         /*
2708                                          * IMPORTANT: we can't set the page "free" here
2709                                          * because that would make the page eligible for
2710                                          * a physically-contiguous allocation (see
2711                                          * vm_page_find_contiguous()) right away (we don't
2712                                          * hold the vm_page_queue_free lock).  That would
2713                                          * cause trouble because the page is not actually
2714                                          * in the free queue yet...
2715                                          */
2716                                         mem->pageq.next = (queue_entry_t)local_freeq;
2717                                         local_freeq = mem;
2718                                         pg_count++;
2719
2720                                         pmap_clear_noencrypt(mem->phys_page);
2721                                 }
2722                         } else {
2723                                 assert(mem->phys_page == vm_page_fictitious_addr ||
2724                                        mem->phys_page == vm_page_guard_addr);
2725                                 vm_page_release_fictitious(mem);
2726                         }
2727                         mem = nxt;
2728                 }
2729                 freeq = mem;
2730
2731                 if ( (mem = local_freeq) ) {
2732                         unsigned int    avail_free_count;
2733                         unsigned int    need_wakeup = 0;
2734                         unsigned int    need_priv_wakeup = 0;
2735
2736                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2737
2738                         while (mem) {
2739                                 int     color;
2740
2741                                 nxt = (vm_page_t)(mem->pageq.next);
2742
2743                                 assert(!mem->free);
2744                                 assert(mem->busy);
2745                                 mem->free = TRUE;
2746
2747                                 color = mem->phys_page & vm_color_mask;
2748                                 queue_enter_first(&vm_page_queue_free[color],
2749                                                   mem,
2750                                                   vm_page_t,
2751                                                   pageq);
2752                                 mem = nxt;
2753                         }
2754                         vm_page_free_count += pg_count;
2755                         avail_free_count = vm_page_free_count;
2756
2757                         if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2758
2759                                 if (avail_free_count < vm_page_free_wanted_privileged) {
2760                                         need_priv_wakeup = avail_free_count;
2761                                         vm_page_free_wanted_privileged -= avail_free_count;
2762                                         avail_free_count = 0;
2763                                 } else {
2764                                         need_priv_wakeup = vm_page_free_wanted_privileged;
2765                                         vm_page_free_wanted_privileged = 0;
2766                                         avail_free_count -= vm_page_free_wanted_privileged;
2767                                 }
2768                         }
2769                         if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2770                                 unsigned int  available_pages;
2771
2772                                 available_pages = avail_free_count - vm_page_free_reserved;
2773
2774                                 if (available_pages >= vm_page_free_wanted) {
2775                                         need_wakeup = vm_page_free_wanted;
2776                                         vm_page_free_wanted = 0;
2777                                 } else {
2778                                         need_wakeup = available_pages;
2779                                         vm_page_free_wanted -= available_pages;
2780                                 }
2781                         }
2782                         lck_mtx_unlock(&vm_page_queue_free_lock);
2783
2784                         if (need_priv_wakeup != 0) {
2785                                 /*
2786                                  * There shouldn't be that many VM-privileged threads,
2787                                  * so let's wake them all up, even if we don't quite
2788                                  * have enough pages to satisfy them all.
2789                                  */
2790                                 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2791                         }
2792                         if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2793                                 /*
2794                                  * We don't expect to have any more waiters
2795                                  * after this, so let's wake them all up at
2796                                  * once.
2797                                  */
2798                                 thread_wakeup((event_t) &vm_page_free_count);
2799                         } else for (; need_wakeup != 0; need_wakeup--) {
2800                                 /*
2801                                  * Wake up one waiter per page we just released.
2802                                  */
2803                                 thread_wakeup_one((event_t) &vm_page_free_count);
2804                         }
2805
2806                         VM_CHECK_MEMORYSTATUS;
2807                 }
2808         }
2809 }
2810
2811
2812 /*
2813  *      vm_page_wire:
2814  *
2815  *      Mark this page as wired down by yet
2816  *      another map, removing it from paging queues
2817  *      as necessary.
2818  *
2819  *      The page's object and the page queues must be locked.
2820  */
2821 void
2822 vm_page_wire(
2823         register vm_page_t      mem)
2824 {
2825
2826 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2827
2828         VM_PAGE_CHECK(mem);
2829         if (mem->object) {
2830                 vm_object_lock_assert_exclusive(mem->object);
2831         } else {
2832                 /*
2833                  * In theory, the page should be in an object before it
2834                  * gets wired, since we need to hold the object lock
2835                  * to update some fields in the page structure.
2836                  * However, some code (i386 pmap, for example) might want
2837                  * to wire a page before it gets inserted into an object.
2838                  * That's somewhat OK, as long as nobody else can get to
2839                  * that page and update it at the same time.
2840                  */
2841         }
2842 #if DEBUG
2843         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2844 #endif
2845         if ( !VM_PAGE_WIRED(mem)) {
2846
2847                 if (mem->pageout_queue) {
2848                         mem->pageout = FALSE;
2849                         vm_pageout_throttle_up(mem);
2850                 }
2851                 VM_PAGE_QUEUES_REMOVE(mem);
2852
2853                 if (mem->object) {
2854                         mem->object->wired_page_count++;
2855                         assert(mem->object->resident_page_count >=
2856                                mem->object->wired_page_count);
2857                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2858                                 assert(vm_page_purgeable_count > 0);
2859                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2860                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2861                         }
2862                         if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2863                              mem->object->purgable == VM_PURGABLE_EMPTY) &&
2864                             mem->object->vo_purgeable_owner != TASK_NULL) {
2865                                 task_t owner;
2866
2867                                 owner = mem->object->vo_purgeable_owner;
2868                                 /* less volatile bytes */
2869                                 ledger_debit(owner->ledger,
2870                                              task_ledgers.purgeable_volatile,
2871                                              PAGE_SIZE);
2872                                 /* more not-quite-volatile bytes */
2873                                 ledger_credit(owner->ledger,
2874                                               task_ledgers.purgeable_nonvolatile,
2875                                               PAGE_SIZE);
2876                                 /* more footprint */
2877                                 ledger_credit(owner->ledger,
2878                                               task_ledgers.phys_footprint,
2879                                               PAGE_SIZE);
2880                         }
2881                         if (mem->object->all_reusable) {
2882                                 /*
2883                                  * Wired pages are not counted as "re-usable"
2884                                  * in "all_reusable" VM objects, so nothing
2885                                  * to do here.
2886                                  */
2887                         } else if (mem->reusable) {
2888                                 /*
2889                                  * This page is not "re-usable" when it's
2890                                  * wired, so adjust its state and the
2891                                  * accounting.
2892                                  */
2893                                 vm_object_reuse_pages(mem->object,
2894                                                       mem->offset,
2895                                                       mem->offset+PAGE_SIZE_64,
2896                                                       FALSE);
2897                         }
2898                 }
2899                 assert(!mem->reusable);
2900
2901                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2902                         vm_page_wire_count++;
2903                 if (mem->gobbled)
2904                         vm_page_gobble_count--;
2905                 mem->gobbled = FALSE;
2906
2907                 VM_CHECK_MEMORYSTATUS;
2908
2909                 /*
2910                  * ENCRYPTED SWAP:
2911                  * The page could be encrypted, but
2912                  * We don't have to decrypt it here
2913                  * because we don't guarantee that the
2914                  * data is actually valid at this point.
2915                  * The page will get decrypted in
2916                  * vm_fault_wire() if needed.
2917                  */
2918         }
2919         assert(!mem->gobbled);
2920         mem->wire_count++;
2921         VM_PAGE_CHECK(mem);
2922 }
2923
2924 /*
2925  *      vm_page_gobble:
2926  *
2927  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2928  *
2929  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2930  */
2931 void
2932 vm_page_gobble(
2933         register vm_page_t      mem)
2934 {
2935         vm_page_lockspin_queues();
2936         VM_PAGE_CHECK(mem);
2937
2938         assert(!mem->gobbled);
2939         assert( !VM_PAGE_WIRED(mem));
2940
2941         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2942                 if (!mem->private && !mem->fictitious)
2943                         vm_page_wire_count++;
2944         }
2945         vm_page_gobble_count++;
2946         mem->gobbled = TRUE;
2947         vm_page_unlock_queues();
2948 }
2949
2950 /*
2951  *      vm_page_unwire:
2952  *
2953  *      Release one wiring of this page, potentially
2954  *      enabling it to be paged again.
2955  *
2956  *      The page's object and the page queues must be locked.
2957  */
2958 void
2959 vm_page_unwire(
2960         vm_page_t       mem,
2961         boolean_t       queueit)
2962 {
2963
2964 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2965
2966         VM_PAGE_CHECK(mem);
2967         assert(VM_PAGE_WIRED(mem));
2968         assert(mem->object != VM_OBJECT_NULL);
2969 #if DEBUG
2970         vm_object_lock_assert_exclusive(mem->object);
2971         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2972 #endif
2973         if (--mem->wire_count == 0) {
2974                 assert(!mem->private && !mem->fictitious);
2975                 vm_page_wire_count--;
2976                 assert(mem->object->wired_page_count > 0);
2977                 mem->object->wired_page_count--;
2978                 assert(mem->object->resident_page_count >=
2979                        mem->object->wired_page_count);
2980                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2981                         OSAddAtomic(+1, &vm_page_purgeable_count);
2982                         assert(vm_page_purgeable_wired_count > 0);
2983                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2984                 }
2985                 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2986                      mem->object->purgable == VM_PURGABLE_EMPTY) &&
2987                     mem->object->vo_purgeable_owner != TASK_NULL) {
2988                         task_t owner;
2989
2990                         owner = mem->object->vo_purgeable_owner;
2991                         /* more volatile bytes */
2992                         ledger_credit(owner->ledger,
2993                                       task_ledgers.purgeable_volatile,
2994                                       PAGE_SIZE);
2995                         /* less not-quite-volatile bytes */
2996                         ledger_debit(owner->ledger,
2997                                      task_ledgers.purgeable_nonvolatile,
2998                                      PAGE_SIZE);
2999                         /* less footprint */
3000                         ledger_debit(owner->ledger,
3001                                      task_ledgers.phys_footprint,
3002                                      PAGE_SIZE);
3003                 }
3004                 assert(mem->object != kernel_object);
3005                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3006
3007                 if (queueit == TRUE) {
3008                         if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3009                                 vm_page_deactivate(mem);
3010                         } else {
3011                                 vm_page_activate(mem);
3012                         }
3013                 }
3014
3015                 VM_CHECK_MEMORYSTATUS;
3016
3017         }
3018         VM_PAGE_CHECK(mem);
3019 }
3020
3021 /*
3022  *      vm_page_deactivate:
3023  *
3024  *      Returns the given page to the inactive list,
3025  *      indicating that no physical maps have access
3026  *      to this page.  [Used by the physical mapping system.]
3027  *
3028  *      The page queues must be locked.
3029  */
3030 void
3031 vm_page_deactivate(
3032         vm_page_t       m)
3033 {
3034         vm_page_deactivate_internal(m, TRUE);
3035 }
3036
3037
3038 void
3039 vm_page_deactivate_internal(
3040         vm_page_t       m,
3041         boolean_t       clear_hw_reference)
3042 {
3043
3044         VM_PAGE_CHECK(m);
3045         assert(m->object != kernel_object);
3046         assert(m->phys_page != vm_page_guard_addr);
3047
3048 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
3049 #if DEBUG
3050         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051 #endif
3052         /*
3053          *      This page is no longer very interesting.  If it was
3054          *      interesting (active or inactive/referenced), then we
3055          *      clear the reference bit and (re)enter it in the
3056          *      inactive queue.  Note wired pages should not have
3057          *      their reference bit cleared.
3058          */
3059         assert ( !(m->absent && !m->unusual));
3060
3061         if (m->gobbled) {               /* can this happen? */
3062                 assert( !VM_PAGE_WIRED(m));
3063
3064                 if (!m->private && !m->fictitious)
3065                         vm_page_wire_count--;
3066                 vm_page_gobble_count--;
3067                 m->gobbled = FALSE;
3068         }
3069         /*
3070          * if this page is currently on the pageout queue, we can't do the
3071          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3072          * and we can't remove it manually since we would need the object lock
3073          * (which is not required here) to decrement the activity_in_progress
3074          * reference which is held on the object while the page is in the pageout queue...
3075          * just let the normal laundry processing proceed
3076          */
3077         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3078                 return;
3079
3080         if (!m->absent && clear_hw_reference == TRUE)
3081                 pmap_clear_reference(m->phys_page);
3082
3083         m->reference = FALSE;
3084         m->no_cache = FALSE;
3085
3086         if (!m->inactive) {
3087                 VM_PAGE_QUEUES_REMOVE(m);
3088
3089                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3090                     m->dirty && m->object->internal &&
3091                     (m->object->purgable == VM_PURGABLE_DENY ||
3092                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3093                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
3094                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3095                         m->throttled = TRUE;
3096                         vm_page_throttled_count++;
3097                 } else {
3098                         if (m->object->named && m->object->ref_count == 1) {
3099                                 vm_page_speculate(m, FALSE);
3100 #if DEVELOPMENT || DEBUG
3101                                 vm_page_speculative_recreated++;
3102 #endif
3103                         } else {
3104                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3105                         }
3106                 }
3107         }
3108 }
3109
3110 /*
3111  * vm_page_enqueue_cleaned
3112  *
3113  * Put the page on the cleaned queue, mark it cleaned, etc.
3114  * Being on the cleaned queue (and having m->clean_queue set)
3115  * does ** NOT ** guarantee that the page is clean!
3116  *
3117  * Call with the queues lock held.
3118  */
3119
3120 void vm_page_enqueue_cleaned(vm_page_t m)
3121 {
3122         assert(m->phys_page != vm_page_guard_addr);
3123 #if DEBUG
3124         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3125 #endif
3126         assert( !(m->absent && !m->unusual));
3127
3128         if (m->gobbled) {
3129                 assert( !VM_PAGE_WIRED(m));
3130                 if (!m->private && !m->fictitious)
3131                         vm_page_wire_count--;
3132                 vm_page_gobble_count--;
3133                 m->gobbled = FALSE;
3134         }
3135         /*
3136          * if this page is currently on the pageout queue, we can't do the
3137          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3138          * and we can't remove it manually since we would need the object lock
3139          * (which is not required here) to decrement the activity_in_progress
3140          * reference which is held on the object while the page is in the pageout queue...
3141          * just let the normal laundry processing proceed
3142          */
3143         if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3144                 return;
3145
3146         VM_PAGE_QUEUES_REMOVE(m);
3147
3148         queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3149         m->clean_queue = TRUE;
3150         vm_page_cleaned_count++;
3151
3152         m->inactive = TRUE;
3153         vm_page_inactive_count++;
3154         if (m->object->internal) {
3155                 vm_page_pageable_internal_count++;
3156         } else {
3157                 vm_page_pageable_external_count++;
3158         }
3159
3160         vm_pageout_enqueued_cleaned++;
3161 }
3162
3163 /*
3164  *      vm_page_activate:
3165  *
3166  *      Put the specified page on the active list (if appropriate).
3167  *
3168  *      The page queues must be locked.
3169  */
3170
3171 void
3172 vm_page_activate(
3173         register vm_page_t      m)
3174 {
3175         VM_PAGE_CHECK(m);
3176 #ifdef  FIXME_4778297
3177         assert(m->object != kernel_object);
3178 #endif
3179         assert(m->phys_page != vm_page_guard_addr);
3180 #if DEBUG
3181         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3182 #endif
3183         assert( !(m->absent && !m->unusual));
3184
3185         if (m->gobbled) {
3186                 assert( !VM_PAGE_WIRED(m));
3187                 if (!m->private && !m->fictitious)
3188                         vm_page_wire_count--;
3189                 vm_page_gobble_count--;
3190                 m->gobbled = FALSE;
3191         }
3192         /*
3193          * if this page is currently on the pageout queue, we can't do the
3194          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3195          * and we can't remove it manually since we would need the object lock
3196          * (which is not required here) to decrement the activity_in_progress
3197          * reference which is held on the object while the page is in the pageout queue...
3198          * just let the normal laundry processing proceed
3199          */
3200         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3201                 return;
3202
3203 #if DEBUG
3204         if (m->active)
3205                 panic("vm_page_activate: already active");
3206 #endif
3207
3208         if (m->speculative) {
3209                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3210                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3211         }
3212
3213         VM_PAGE_QUEUES_REMOVE(m);
3214
3215         if ( !VM_PAGE_WIRED(m)) {
3216
3217                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3218                     m->dirty && m->object->internal &&
3219                     (m->object->purgable == VM_PURGABLE_DENY ||
3220                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3221                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
3222                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3223                         m->throttled = TRUE;
3224                         vm_page_throttled_count++;
3225                 } else {
3226                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3227                         m->active = TRUE;
3228                         vm_page_active_count++;
3229                         if (m->object->internal) {
3230                                 vm_page_pageable_internal_count++;
3231                         } else {
3232                                 vm_page_pageable_external_count++;
3233                         }
3234                 }
3235                 m->reference = TRUE;
3236                 m->no_cache = FALSE;
3237         }
3238         VM_PAGE_CHECK(m);
3239 }
3240
3241
3242 /*
3243  *      vm_page_speculate:
3244  *
3245  *      Put the specified page on the speculative list (if appropriate).
3246  *
3247  *      The page queues must be locked.
3248  */
3249 void
3250 vm_page_speculate(
3251         vm_page_t       m,
3252         boolean_t       new)
3253 {
3254         struct vm_speculative_age_q     *aq;
3255
3256         VM_PAGE_CHECK(m);
3257         assert(m->object != kernel_object);
3258         assert(m->phys_page != vm_page_guard_addr);
3259 #if DEBUG
3260         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3261 #endif
3262         assert( !(m->absent && !m->unusual));
3263
3264         /*
3265          * if this page is currently on the pageout queue, we can't do the
3266          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3267          * and we can't remove it manually since we would need the object lock
3268          * (which is not required here) to decrement the activity_in_progress
3269          * reference which is held on the object while the page is in the pageout queue...
3270          * just let the normal laundry processing proceed
3271          */
3272         if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3273                 return;
3274
3275         VM_PAGE_QUEUES_REMOVE(m);
3276
3277         if ( !VM_PAGE_WIRED(m)) {
3278                 mach_timespec_t         ts;
3279                 clock_sec_t sec;
3280                 clock_nsec_t nsec;
3281
3282                 clock_get_system_nanotime(&sec, &nsec);
3283                 ts.tv_sec = (unsigned int) sec;
3284                 ts.tv_nsec = nsec;
3285
3286                 if (vm_page_speculative_count == 0) {
3287
3288                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3289                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3290
3291                         aq = &vm_page_queue_speculative[speculative_age_index];
3292
3293                         /*
3294                          * set the timer to begin a new group
3295                          */
3296                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3297                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3298
3299                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3300                 } else {
3301                         aq = &vm_page_queue_speculative[speculative_age_index];
3302
3303                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3304
3305                                 speculative_age_index++;
3306
3307                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3308                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3309                                 if (speculative_age_index == speculative_steal_index) {
3310                                         speculative_steal_index = speculative_age_index + 1;
3311
3312                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3313                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3314                                 }
3315                                 aq = &vm_page_queue_speculative[speculative_age_index];
3316
3317                                 if (!queue_empty(&aq->age_q))
3318                                         vm_page_speculate_ageit(aq);
3319
3320                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3321                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3322
3323                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3324                         }
3325                 }
3326                 enqueue_tail(&aq->age_q, &m->pageq);
3327                 m->speculative = TRUE;
3328                 vm_page_speculative_count++;
3329                 if (m->object->internal) {
3330                         vm_page_pageable_internal_count++;
3331                 } else {
3332                         vm_page_pageable_external_count++;
3333                 }
3334
3335                 if (new == TRUE) {
3336                         vm_object_lock_assert_exclusive(m->object);
3337
3338                         m->object->pages_created++;
3339 #if DEVELOPMENT || DEBUG
3340                         vm_page_speculative_created++;
3341 #endif
3342                 }
3343         }
3344         VM_PAGE_CHECK(m);
3345 }
3346
3347
3348 /*
3349  * move pages from the specified aging bin to
3350  * the speculative bin that pageout_scan claims from
3351  *
3352  *      The page queues must be locked.
3353  */
3354 void
3355 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3356 {
3357         struct vm_speculative_age_q     *sq;
3358         vm_page_t       t;
3359
3360         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3361
3362         if (queue_empty(&sq->age_q)) {
3363                 sq->age_q.next = aq->age_q.next;
3364                 sq->age_q.prev = aq->age_q.prev;
3365
3366                 t = (vm_page_t)sq->age_q.next;
3367                 t->pageq.prev = &sq->age_q;
3368
3369                 t = (vm_page_t)sq->age_q.prev;
3370                 t->pageq.next = &sq->age_q;
3371         } else {
3372                 t = (vm_page_t)sq->age_q.prev;
3373                 t->pageq.next = aq->age_q.next;
3374
3375                 t = (vm_page_t)aq->age_q.next;
3376                 t->pageq.prev = sq->age_q.prev;
3377
3378                 t = (vm_page_t)aq->age_q.prev;
3379                 t->pageq.next = &sq->age_q;
3380
3381                 sq->age_q.prev = aq->age_q.prev;
3382         }
3383         queue_init(&aq->age_q);
3384 }
3385
3386
3387 void
3388 vm_page_lru(
3389         vm_page_t       m)
3390 {
3391         VM_PAGE_CHECK(m);
3392         assert(m->object != kernel_object);
3393         assert(m->phys_page != vm_page_guard_addr);
3394
3395 #if DEBUG
3396         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3397 #endif
3398         /*
3399          * if this page is currently on the pageout queue, we can't do the
3400          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3401          * and we can't remove it manually since we would need the object lock
3402          * (which is not required here) to decrement the activity_in_progress
3403          * reference which is held on the object while the page is in the pageout queue...
3404          * just let the normal laundry processing proceed
3405          */
3406         if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3407                 return;
3408
3409         m->no_cache = FALSE;
3410
3411         VM_PAGE_QUEUES_REMOVE(m);
3412
3413         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3414 }
3415
3416
3417 void
3418 vm_page_reactivate_all_throttled(void)
3419 {
3420         vm_page_t       first_throttled, last_throttled;
3421         vm_page_t       first_active;
3422         vm_page_t       m;
3423         int             extra_active_count;
3424         int             extra_internal_count, extra_external_count;
3425
3426         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3427                 return;
3428
3429         extra_active_count = 0;
3430         extra_internal_count = 0;
3431         extra_external_count = 0;
3432         vm_page_lock_queues();
3433         if (! queue_empty(&vm_page_queue_throttled)) {
3434                 /*
3435                  * Switch "throttled" pages to "active".
3436                  */
3437                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3438                         VM_PAGE_CHECK(m);
3439                         assert(m->throttled);
3440                         assert(!m->active);
3441                         assert(!m->inactive);
3442                         assert(!m->speculative);
3443                         assert(!VM_PAGE_WIRED(m));
3444
3445                         extra_active_count++;
3446                         if (m->object->internal) {
3447                                 extra_internal_count++;
3448                         } else {
3449                                 extra_external_count++;
3450                         }
3451
3452                         m->throttled = FALSE;
3453                         m->active = TRUE;
3454                         VM_PAGE_CHECK(m);
3455                 }
3456
3457                 /*
3458                  * Transfer the entire throttled queue to a regular LRU page queues.
3459                  * We insert it at the head of the active queue, so that these pages
3460                  * get re-evaluated by the LRU algorithm first, since they've been
3461                  * completely out of it until now.
3462                  */
3463                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3464                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3465                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3466                 if (queue_empty(&vm_page_queue_active)) {
3467                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3468                 } else {
3469                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3470                 }
3471                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3472                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3473                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3474
3475 #if DEBUG
3476                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3477 #endif
3478                 queue_init(&vm_page_queue_throttled);
3479                 /*
3480                  * Adjust the global page counts.
3481                  */
3482                 vm_page_active_count += extra_active_count;
3483                 vm_page_pageable_internal_count += extra_internal_count;
3484                 vm_page_pageable_external_count += extra_external_count;
3485                 vm_page_throttled_count = 0;
3486         }
3487         assert(vm_page_throttled_count == 0);
3488         assert(queue_empty(&vm_page_queue_throttled));
3489         vm_page_unlock_queues();
3490 }
3491
3492
3493 /*
3494  * move pages from the indicated local queue to the global active queue
3495  * its ok to fail if we're below the hard limit and force == FALSE
3496  * the nolocks == TRUE case is to allow this function to be run on
3497  * the hibernate path
3498  */
3499
3500 void
3501 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3502 {
3503         struct vpl      *lq;
3504         vm_page_t       first_local, last_local;
3505         vm_page_t       first_active;
3506         vm_page_t       m;
3507         uint32_t        count = 0;
3508
3509         if (vm_page_local_q == NULL)
3510                 return;
3511
3512         lq = &vm_page_local_q[lid].vpl_un.vpl;
3513
3514         if (nolocks == FALSE) {
3515                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3516                         if ( !vm_page_trylockspin_queues())
3517                                 return;
3518                 } else
3519                         vm_page_lockspin_queues();
3520
3521                 VPL_LOCK(&lq->vpl_lock);
3522         }
3523         if (lq->vpl_count) {
3524                 /*
3525                  * Switch "local" pages to "active".
3526                  */
3527                 assert(!queue_empty(&lq->vpl_queue));
3528
3529                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3530                         VM_PAGE_CHECK(m);
3531                         assert(m->local);
3532                         assert(!m->active);
3533                         assert(!m->inactive);
3534                         assert(!m->speculative);
3535                         assert(!VM_PAGE_WIRED(m));
3536                         assert(!m->throttled);
3537                         assert(!m->fictitious);
3538
3539                         if (m->local_id != lid)
3540                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3541
3542                         m->local_id = 0;
3543                         m->local = FALSE;
3544                         m->active = TRUE;
3545                         VM_PAGE_CHECK(m);
3546
3547                         count++;
3548                 }
3549                 if (count != lq->vpl_count)
3550                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3551
3552                 /*
3553                  * Transfer the entire local queue to a regular LRU page queues.
3554                  */
3555                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3556                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3557                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3558
3559                 if (queue_empty(&vm_page_queue_active)) {
3560                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3561                 } else {
3562                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3563                 }
3564                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3565                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3566                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3567
3568                 queue_init(&lq->vpl_queue);
3569                 /*
3570                  * Adjust the global page counts.
3571                  */
3572                 vm_page_active_count += lq->vpl_count;
3573                 vm_page_pageable_internal_count += lq->vpl_internal_count;
3574                 vm_page_pageable_external_count += lq->vpl_external_count;
3575                 lq->vpl_count = 0;
3576                 lq->vpl_internal_count = 0;
3577                 lq->vpl_external_count = 0;
3578         }
3579         assert(queue_empty(&lq->vpl_queue));
3580
3581         if (nolocks == FALSE) {
3582                 VPL_UNLOCK(&lq->vpl_lock);
3583                 vm_page_unlock_queues();
3584         }
3585 }
3586
3587 /*
3588  *      vm_page_part_zero_fill:
3589  *
3590  *      Zero-fill a part of the page.
3591  */
3592 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3593 void
3594 vm_page_part_zero_fill(
3595         vm_page_t       m,
3596         vm_offset_t     m_pa,
3597         vm_size_t       len)
3598 {
3599
3600 #if 0
3601         /*
3602          * we don't hold the page queue lock
3603          * so this check isn't safe to make
3604          */
3605         VM_PAGE_CHECK(m);
3606 #endif
3607
3608 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3609         pmap_zero_part_page(m->phys_page, m_pa, len);
3610 #else
3611         vm_page_t       tmp;
3612         while (1) {
3613                 tmp = vm_page_grab();
3614                 if (tmp == VM_PAGE_NULL) {
3615                         vm_page_wait(THREAD_UNINT);
3616                         continue;
3617                 }
3618                 break;
3619         }
3620         vm_page_zero_fill(tmp);
3621         if(m_pa != 0) {
3622                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3623         }
3624         if((m_pa + len) <  PAGE_SIZE) {
3625                 vm_page_part_copy(m, m_pa + len, tmp,
3626                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3627         }
3628         vm_page_copy(tmp,m);
3629         VM_PAGE_FREE(tmp);
3630 #endif
3631
3632 }
3633
3634 /*
3635  *      vm_page_zero_fill:
3636  *
3637  *      Zero-fill the specified page.
3638  */
3639 void
3640 vm_page_zero_fill(
3641         vm_page_t       m)
3642 {
3643         XPR(XPR_VM_PAGE,
3644                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3645                 m->object, m->offset, m, 0,0);
3646 #if 0
3647         /*
3648          * we don't hold the page queue lock
3649          * so this check isn't safe to make
3650          */
3651         VM_PAGE_CHECK(m);
3652 #endif
3653
3654 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3655         pmap_zero_page(m->phys_page);
3656 }
3657
3658 /*
3659  *      vm_page_part_copy:
3660  *
3661  *      copy part of one page to another
3662  */
3663
3664 void
3665 vm_page_part_copy(
3666         vm_page_t       src_m,
3667         vm_offset_t     src_pa,
3668         vm_page_t       dst_m,
3669         vm_offset_t     dst_pa,
3670         vm_size_t       len)
3671 {
3672 #if 0
3673         /*
3674          * we don't hold the page queue lock
3675          * so this check isn't safe to make
3676          */
3677         VM_PAGE_CHECK(src_m);
3678         VM_PAGE_CHECK(dst_m);
3679 #endif
3680         pmap_copy_part_page(src_m->phys_page, src_pa,
3681                         dst_m->phys_page, dst_pa, len);
3682 }
3683
3684 /*
3685  *      vm_page_copy:
3686  *
3687  *      Copy one page to another
3688  *
3689  * ENCRYPTED SWAP:
3690  * The source page should not be encrypted.  The caller should
3691  * make sure the page is decrypted first, if necessary.
3692  */
3693
3694 int vm_page_copy_cs_validations = 0;
3695 int vm_page_copy_cs_tainted = 0;
3696
3697 void
3698 vm_page_copy(
3699         vm_page_t       src_m,
3700         vm_page_t       dest_m)
3701 {
3702         XPR(XPR_VM_PAGE,
3703         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3704         src_m->object, src_m->offset,
3705         dest_m->object, dest_m->offset,
3706         0);
3707 #if 0
3708         /*
3709          * we don't hold the page queue lock
3710          * so this check isn't safe to make
3711          */
3712         VM_PAGE_CHECK(src_m);
3713         VM_PAGE_CHECK(dest_m);
3714 #endif
3715         vm_object_lock_assert_held(src_m->object);
3716
3717         /*
3718          * ENCRYPTED SWAP:
3719          * The source page should not be encrypted at this point.
3720          * The destination page will therefore not contain encrypted
3721          * data after the copy.
3722          */
3723         if (src_m->encrypted) {
3724                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3725         }
3726         dest_m->encrypted = FALSE;
3727
3728         if (src_m->object != VM_OBJECT_NULL &&
3729             src_m->object->code_signed) {
3730                 /*
3731                  * We're copying a page from a code-signed object.
3732                  * Whoever ends up mapping the copy page might care about
3733                  * the original page's integrity, so let's validate the
3734                  * source page now.
3735                  */
3736                 vm_page_copy_cs_validations++;
3737                 vm_page_validate_cs(src_m);
3738         }
3739
3740         if (vm_page_is_slideable(src_m)) {
3741                 boolean_t was_busy = src_m->busy;
3742                 src_m->busy = TRUE;
3743                 (void) vm_page_slide(src_m, 0);
3744                 assert(src_m->busy);
3745                 if (!was_busy) {
3746                         PAGE_WAKEUP_DONE(src_m);
3747                 }
3748         }
3749
3750         /*
3751          * Propagate the cs_tainted bit to the copy page. Do not propagate
3752          * the cs_validated bit.
3753          */
3754         dest_m->cs_tainted = src_m->cs_tainted;
3755         if (dest_m->cs_tainted) {
3756                 vm_page_copy_cs_tainted++;
3757         }
3758         dest_m->slid = src_m->slid;
3759         dest_m->error = src_m->error; /* sliding src_m might have failed... */
3760         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3761 }
3762
3763 #if MACH_ASSERT
3764 static void
3765 _vm_page_print(
3766         vm_page_t       p)
3767 {
3768         printf("vm_page %p: \n", p);
3769         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3770         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3771         printf("  next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3772         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3773         printf("  wire_count=%u\n", p->wire_count);
3774
3775         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3776                (p->local ? "" : "!"),
3777                (p->inactive ? "" : "!"),
3778                (p->active ? "" : "!"),
3779                (p->pageout_queue ? "" : "!"),
3780                (p->speculative ? "" : "!"),
3781                (p->laundry ? "" : "!"));
3782         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3783                (p->free ? "" : "!"),
3784                (p->reference ? "" : "!"),
3785                (p->gobbled ? "" : "!"),
3786                (p->private ? "" : "!"),
3787                (p->throttled ? "" : "!"));
3788         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3789                 (p->busy ? "" : "!"),
3790                 (p->wanted ? "" : "!"),
3791                 (p->tabled ? "" : "!"),
3792                 (p->fictitious ? "" : "!"),
3793                 (p->pmapped ? "" : "!"),
3794                 (p->wpmapped ? "" : "!"));
3795         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3796                (p->pageout ? "" : "!"),
3797                (p->absent ? "" : "!"),
3798                (p->error ? "" : "!"),
3799                (p->dirty ? "" : "!"),
3800                (p->cleaning ? "" : "!"),
3801                (p->precious ? "" : "!"),
3802                (p->clustered ? "" : "!"));
3803         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3804                (p->overwriting ? "" : "!"),
3805                (p->restart ? "" : "!"),
3806                (p->unusual ? "" : "!"),
3807                (p->encrypted ? "" : "!"),
3808                (p->encrypted_cleaning ? "" : "!"));
3809         printf("  %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
3810                (p->cs_validated ? "" : "!"),
3811                (p->cs_tainted ? "" : "!"),
3812                (p->cs_nx ? "" : "!"),
3813                (p->no_cache ? "" : "!"));
3814
3815         printf("phys_page=0x%x\n", p->phys_page);
3816 }
3817
3818 /*
3819  *      Check that the list of pages is ordered by
3820  *      ascending physical address and has no holes.
3821  */
3822 static int
3823 vm_page_verify_contiguous(
3824         vm_page_t       pages,
3825         unsigned int    npages)
3826 {
3827         register vm_page_t      m;
3828         unsigned int            page_count;
3829         vm_offset_t             prev_addr;
3830
3831         prev_addr = pages->phys_page;
3832         page_count = 1;
3833         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3834                 if (m->phys_page != prev_addr + 1) {
3835                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3836                                m, (long)prev_addr, m->phys_page);
3837                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3838                         panic("vm_page_verify_contiguous:  not contiguous!");
3839                 }
3840                 prev_addr = m->phys_page;
3841                 ++page_count;
3842         }
3843         if (page_count != npages) {
3844                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3845                        pages, page_count, npages);
3846                 panic("vm_page_verify_contiguous:  count error");
3847         }
3848         return 1;
3849 }
3850
3851
3852 /*
3853  *      Check the free lists for proper length etc.
3854  */
3855 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
3856 static unsigned int
3857 vm_page_verify_free_list(
3858         queue_head_t    *vm_page_queue,
3859         unsigned int    color,
3860         vm_page_t       look_for_page,
3861         boolean_t       expect_page)
3862 {
3863         unsigned int    npages;
3864         vm_page_t       m;
3865         vm_page_t       prev_m;
3866         boolean_t       found_page;
3867
3868         if (! vm_page_verify_this_free_list_enabled)
3869                 return 0;
3870
3871         found_page = FALSE;
3872         npages = 0;
3873         prev_m = (vm_page_t) vm_page_queue;
3874         queue_iterate(vm_page_queue,
3875                       m,
3876                       vm_page_t,
3877                       pageq) {
3878
3879                 if (m == look_for_page) {
3880                         found_page = TRUE;
3881                 }
3882                 if ((vm_page_t) m->pageq.prev != prev_m)
3883                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3884                               color, npages, m, m->pageq.prev, prev_m);
3885                 if ( ! m->busy )
3886                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3887                               color, npages, m);
3888                 if (color != (unsigned int) -1) {
3889                         if ((m->phys_page & vm_color_mask) != color)
3890                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3891                                       color, npages, m, m->phys_page & vm_color_mask, color);
3892                         if ( ! m->free )
3893                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3894                                       color, npages, m);
3895                 }
3896                 ++npages;
3897                 prev_m = m;
3898         }
3899         if (look_for_page != VM_PAGE_NULL) {
3900                 unsigned int other_color;
3901
3902                 if (expect_page && !found_page) {
3903                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3904                                color, npages, look_for_page, look_for_page->phys_page);
3905                         _vm_page_print(look_for_page);
3906                         for (other_color = 0;
3907                              other_color < vm_colors;
3908                              other_color++) {
3909                                 if (other_color == color)
3910                                         continue;
3911                                 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3912                                                          other_color, look_for_page, FALSE);
3913                         }
3914                         if (color == (unsigned int) -1) {
3915                                 vm_page_verify_free_list(&vm_lopage_queue_free,
3916                                                          (unsigned int) -1, look_for_page, FALSE);
3917                         }
3918                         panic("vm_page_verify_free_list(color=%u)\n", color);
3919                 }
3920                 if (!expect_page && found_page) {
3921                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3922                                color, npages, look_for_page, look_for_page->phys_page);
3923                 }
3924         }
3925         return npages;
3926 }
3927
3928 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
3929 static void
3930 vm_page_verify_free_lists( void )
3931 {
3932         unsigned int    color, npages, nlopages;
3933         boolean_t       toggle = TRUE;
3934
3935         if (! vm_page_verify_all_free_lists_enabled)
3936                 return;
3937
3938         npages = 0;
3939
3940         lck_mtx_lock(&vm_page_queue_free_lock);
3941
3942         if (vm_page_verify_this_free_list_enabled == TRUE) {
3943                 /*
3944                  * This variable has been set globally for extra checking of
3945                  * each free list Q. Since we didn't set it, we don't own it
3946                  * and we shouldn't toggle it.
3947                  */
3948                 toggle = FALSE;
3949         }
3950
3951         if (toggle == TRUE) {
3952                 vm_page_verify_this_free_list_enabled = TRUE;
3953         }
3954
3955         for( color = 0; color < vm_colors; color++ ) {
3956                 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3957                                                    color, VM_PAGE_NULL, FALSE);
3958         }
3959         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3960                                             (unsigned int) -1,
3961                                             VM_PAGE_NULL, FALSE);
3962         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3963                 panic("vm_page_verify_free_lists:  "
3964                       "npages %u free_count %d nlopages %u lo_free_count %u",
3965                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3966
3967         if (toggle == TRUE) {
3968                 vm_page_verify_this_free_list_enabled = FALSE;
3969         }
3970
3971         lck_mtx_unlock(&vm_page_queue_free_lock);
3972 }
3973
3974 void
3975 vm_page_queues_assert(
3976         vm_page_t       mem,
3977         int             val)
3978 {
3979 #if DEBUG
3980         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3981 #endif
3982         if (mem->free + mem->active + mem->inactive + mem->speculative +
3983             mem->throttled + mem->pageout_queue > (val)) {
3984                 _vm_page_print(mem);
3985                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3986         }
3987         if (VM_PAGE_WIRED(mem)) {
3988                 assert(!mem->active);
3989                 assert(!mem->inactive);
3990                 assert(!mem->speculative);
3991                 assert(!mem->throttled);
3992                 assert(!mem->pageout_queue);
3993         }
3994 }
3995 #endif  /* MACH_ASSERT */
3996
3997
3998 /*
3999  *      CONTIGUOUS PAGE ALLOCATION
4000  *
4001  *      Find a region large enough to contain at least n pages
4002  *      of contiguous physical memory.
4003  *
4004  *      This is done by traversing the vm_page_t array in a linear fashion
4005  *      we assume that the vm_page_t array has the avaiable physical pages in an
4006  *      ordered, ascending list... this is currently true of all our implementations
4007  *      and must remain so... there can be 'holes' in the array...  we also can
4008  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4009  *      which use to happen via 'vm_page_convert'... that function was no longer
4010  *      being called and was removed...
4011  *
4012  *      The basic flow consists of stabilizing some of the interesting state of
4013  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4014  *      sweep at the beginning of the array looking for pages that meet our criterea
4015  *      for a 'stealable' page... currently we are pretty conservative... if the page
4016  *      meets this criterea and is physically contiguous to the previous page in the 'run'
4017  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
4018  *      and start to develop a new run... if at this point we've already considered
4019  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4020  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
4021  *      to other threads trying to acquire free pages (or move pages from q to q),
4022  *      and then continue from the spot we left off... we only make 1 pass through the
4023  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
4024  *      which steals the pages from the queues they're currently on... pages on the free
4025  *      queue can be stolen directly... pages that are on any of the other queues
4026  *      must be removed from the object they are tabled on... this requires taking the
4027  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4028  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
4029  *      dump the pages we've currently stolen back to the free list, and pick up our
4030  *      scan from the point where we aborted the 'current' run.
4031  *
4032  *
4033  *      Requirements:
4034  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
4035  *
4036  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4037  *
4038  * Algorithm:
4039  */
4040
4041 #define MAX_CONSIDERED_BEFORE_YIELD     1000
4042
4043
4044 #define RESET_STATE_OF_RUN()    \
4045         MACRO_BEGIN             \
4046         prevcontaddr = -2;      \
4047         start_pnum = -1;        \
4048         free_considered = 0;    \
4049         substitute_needed = 0;  \
4050         npages = 0;             \
4051         MACRO_END
4052
4053 /*
4054  * Can we steal in-use (i.e. not free) pages when searching for
4055  * physically-contiguous pages ?
4056  */
4057 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4058
4059 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
4060 #if DEBUG
4061 int vm_page_find_contig_debug = 0;
4062 #endif
4063
4064 static vm_page_t
4065 vm_page_find_contiguous(
4066         unsigned int    contig_pages,
4067         ppnum_t         max_pnum,
4068         ppnum_t     pnum_mask,
4069         boolean_t       wire,
4070         int             flags)
4071 {
4072         vm_page_t       m = NULL;
4073         ppnum_t         prevcontaddr;
4074         ppnum_t         start_pnum;
4075         unsigned int    npages, considered, scanned;
4076         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
4077         unsigned int    idx_last_contig_page_found = 0;
4078         int             free_considered, free_available;
4079         int             substitute_needed;
4080         boolean_t       wrapped;
4081 #if DEBUG
4082         clock_sec_t     tv_start_sec, tv_end_sec;
4083         clock_usec_t    tv_start_usec, tv_end_usec;
4084 #endif
4085 #if MACH_ASSERT
4086         int             yielded = 0;
4087         int             dumped_run = 0;
4088         int             stolen_pages = 0;
4089         int             compressed_pages = 0;
4090 #endif
4091
4092         if (contig_pages == 0)
4093                 return VM_PAGE_NULL;
4094
4095 #if MACH_ASSERT
4096         vm_page_verify_free_lists();
4097 #endif
4098 #if DEBUG
4099         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4100 #endif
4101         PAGE_REPLACEMENT_ALLOWED(TRUE);
4102
4103         vm_page_lock_queues();
4104         lck_mtx_lock(&vm_page_queue_free_lock);
4105
4106         RESET_STATE_OF_RUN();
4107
4108         scanned = 0;
4109         considered = 0;
4110         free_available = vm_page_free_count - vm_page_free_reserved;
4111
4112         wrapped = FALSE;
4113
4114         if(flags & KMA_LOMEM)
4115                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4116         else
4117                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
4118
4119         orig_last_idx = idx_last_contig_page_found;
4120         last_idx = orig_last_idx;
4121
4122         for (page_idx = last_idx, start_idx = last_idx;
4123              npages < contig_pages && page_idx < vm_pages_count;
4124              page_idx++) {
4125 retry:
4126                 if (wrapped &&
4127                     npages == 0 &&
4128                     page_idx >= orig_last_idx) {
4129                         /*
4130                          * We're back where we started and we haven't
4131                          * found any suitable contiguous range.  Let's
4132                          * give up.
4133                          */
4134                         break;
4135                 }
4136                 scanned++;
4137                 m = &vm_pages[page_idx];
4138
4139                 assert(!m->fictitious);
4140                 assert(!m->private);
4141
4142                 if (max_pnum && m->phys_page > max_pnum) {
4143                         /* no more low pages... */
4144                         break;
4145                 }
4146                 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4147                         /*
4148                          * not aligned
4149                          */
4150                         RESET_STATE_OF_RUN();
4151
4152                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4153                            m->encrypted_cleaning ||
4154                            m->pageout_queue || m->laundry || m->wanted ||
4155                            m->cleaning || m->overwriting || m->pageout) {
4156                         /*
4157                          * page is in a transient state
4158                          * or a state we don't want to deal
4159                          * with, so don't consider it which
4160                          * means starting a new run
4161                          */
4162                         RESET_STATE_OF_RUN();
4163
4164                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4165                         /*
4166                          * page needs to be on one of our queues
4167                          * or it needs to belong to the compressor pool
4168                          * in order for it to be stable behind the
4169                          * locks we hold at this point...
4170                          * if not, don't consider it which
4171                          * means starting a new run
4172                          */
4173                         RESET_STATE_OF_RUN();
4174
4175                 } else if (!m->free && (!m->tabled || m->busy)) {
4176                         /*
4177                          * pages on the free list are always 'busy'
4178                          * so we couldn't test for 'busy' in the check
4179                          * for the transient states... pages that are
4180                          * 'free' are never 'tabled', so we also couldn't
4181                          * test for 'tabled'.  So we check here to make
4182                          * sure that a non-free page is not busy and is
4183                          * tabled on an object...
4184                          * if not, don't consider it which
4185                          * means starting a new run
4186                          */
4187                         RESET_STATE_OF_RUN();
4188
4189                 } else {
4190                         if (m->phys_page != prevcontaddr + 1) {
4191                                 if ((m->phys_page & pnum_mask) != 0) {
4192                                         RESET_STATE_OF_RUN();
4193                                         goto did_consider;
4194                                 } else {
4195                                         npages = 1;
4196                                         start_idx = page_idx;
4197                                         start_pnum = m->phys_page;
4198                                 }
4199                         } else {
4200                                 npages++;
4201                         }
4202                         prevcontaddr = m->phys_page;
4203
4204                         VM_PAGE_CHECK(m);
4205                         if (m->free) {
4206                                 free_considered++;
4207                         } else {
4208                                 /*
4209                                  * This page is not free.
4210                                  * If we can't steal used pages,
4211                                  * we have to give up this run
4212                                  * and keep looking.
4213                                  * Otherwise, we might need to
4214                                  * move the contents of this page
4215                                  * into a substitute page.
4216                                  */
4217 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4218                                 if (m->pmapped || m->dirty || m->precious) {
4219                                         substitute_needed++;
4220                                 }
4221 #else
4222                                 RESET_STATE_OF_RUN();
4223 #endif
4224                         }
4225
4226                         if ((free_considered + substitute_needed) > free_available) {
4227                                 /*
4228                                  * if we let this run continue
4229                                  * we will end up dropping the vm_page_free_count
4230                                  * below the reserve limit... we need to abort
4231                                  * this run, but we can at least re-consider this
4232                                  * page... thus the jump back to 'retry'
4233                                  */
4234                                 RESET_STATE_OF_RUN();
4235
4236                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4237                                         considered++;
4238                                         goto retry;
4239                                 }
4240                                 /*
4241                                  * free_available == 0
4242                                  * so can't consider any free pages... if
4243                                  * we went to retry in this case, we'd
4244                                  * get stuck looking at the same page
4245                                  * w/o making any forward progress
4246                                  * we also want to take this path if we've already
4247                                  * reached our limit that controls the lock latency
4248                                  */
4249                         }
4250                 }
4251 did_consider:
4252                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4253
4254                         PAGE_REPLACEMENT_ALLOWED(FALSE);
4255
4256                         lck_mtx_unlock(&vm_page_queue_free_lock);
4257                         vm_page_unlock_queues();
4258
4259                         mutex_pause(0);
4260
4261                         PAGE_REPLACEMENT_ALLOWED(TRUE);
4262
4263                         vm_page_lock_queues();
4264                         lck_mtx_lock(&vm_page_queue_free_lock);
4265
4266                         RESET_STATE_OF_RUN();
4267                         /*
4268                          * reset our free page limit since we
4269                          * dropped the lock protecting the vm_page_free_queue
4270                          */
4271                         free_available = vm_page_free_count - vm_page_free_reserved;
4272                         considered = 0;
4273 #if MACH_ASSERT
4274                         yielded++;
4275 #endif
4276                         goto retry;
4277                 }
4278                 considered++;
4279         }
4280         m = VM_PAGE_NULL;
4281
4282         if (npages != contig_pages) {
4283                 if (!wrapped) {
4284                         /*
4285                          * We didn't find a contiguous range but we didn't
4286                          * start from the very first page.
4287                          * Start again from the very first page.
4288                          */
4289                         RESET_STATE_OF_RUN();
4290                         if( flags & KMA_LOMEM)
4291                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
4292                         else
4293                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4294                         last_idx = 0;
4295                         page_idx = last_idx;
4296                         wrapped = TRUE;
4297                         goto retry;
4298                 }
4299                 lck_mtx_unlock(&vm_page_queue_free_lock);
4300         } else {
4301                 vm_page_t       m1;
4302                 vm_page_t       m2;
4303                 unsigned int    cur_idx;
4304                 unsigned int    tmp_start_idx;
4305                 vm_object_t     locked_object = VM_OBJECT_NULL;
4306                 boolean_t       abort_run = FALSE;
4307
4308                 assert(page_idx - start_idx == contig_pages);
4309
4310                 tmp_start_idx = start_idx;
4311
4312                 /*
4313                  * first pass through to pull the free pages
4314                  * off of the free queue so that in case we
4315                  * need substitute pages, we won't grab any
4316                  * of the free pages in the run... we'll clear
4317                  * the 'free' bit in the 2nd pass, and even in
4318                  * an abort_run case, we'll collect all of the
4319                  * free pages in this run and return them to the free list
4320                  */
4321                 while (start_idx < page_idx) {
4322
4323                         m1 = &vm_pages[start_idx++];
4324
4325 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4326                         assert(m1->free);
4327 #endif
4328
4329                         if (m1->free) {
4330                                 unsigned int color;
4331
4332                                 color = m1->phys_page & vm_color_mask;
4333 #if MACH_ASSERT
4334                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4335 #endif
4336                                 queue_remove(&vm_page_queue_free[color],
4337                                              m1,
4338                                              vm_page_t,
4339                                              pageq);
4340                                 m1->pageq.next = NULL;
4341                                 m1->pageq.prev = NULL;
4342 #if MACH_ASSERT
4343                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4344 #endif
4345                                 /*
4346                                  * Clear the "free" bit so that this page
4347                                  * does not get considered for another
4348                                  * concurrent physically-contiguous allocation.
4349                                  */
4350                                 m1->free = FALSE;
4351                                 assert(m1->busy);
4352
4353                                 vm_page_free_count--;
4354                         }
4355                 }
4356                 if( flags & KMA_LOMEM)
4357                         vm_page_lomem_find_contiguous_last_idx = page_idx;
4358                 else
4359                         vm_page_find_contiguous_last_idx = page_idx;
4360
4361                 /*
4362                  * we can drop the free queue lock at this point since
4363                  * we've pulled any 'free' candidates off of the list
4364                  * we need it dropped so that we can do a vm_page_grab
4365                  * when substituing for pmapped/dirty pages
4366                  */
4367                 lck_mtx_unlock(&vm_page_queue_free_lock);
4368
4369                 start_idx = tmp_start_idx;
4370                 cur_idx = page_idx - 1;
4371
4372                 while (start_idx++ < page_idx) {
4373                         /*
4374                          * must go through the list from back to front
4375                          * so that the page list is created in the
4376                          * correct order - low -> high phys addresses
4377                          */
4378                         m1 = &vm_pages[cur_idx--];
4379
4380                         assert(!m1->free);
4381
4382                         if (m1->object == VM_OBJECT_NULL) {
4383                                 /*
4384                                  * page has already been removed from
4385                                  * the free list in the 1st pass
4386                                  */
4387                                 assert(m1->offset == (vm_object_offset_t) -1);
4388                                 assert(m1->busy);
4389                                 assert(!m1->wanted);
4390                                 assert(!m1->laundry);
4391                         } else {
4392                                 vm_object_t object;
4393                                 int refmod;
4394                                 boolean_t disconnected, reusable;
4395
4396                                 if (abort_run == TRUE)
4397                                         continue;
4398
4399                                 object = m1->object;
4400
4401                                 if (object != locked_object) {
4402                                         if (locked_object) {
4403                                                 vm_object_unlock(locked_object);
4404                                                 locked_object = VM_OBJECT_NULL;
4405                                         }
4406                                         if (vm_object_lock_try(object))
4407                                                 locked_object = object;
4408                                 }
4409                                 if (locked_object == VM_OBJECT_NULL ||
4410                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
4411                                      m1->encrypted_cleaning ||
4412                                      m1->pageout_queue || m1->laundry || m1->wanted ||
4413                                      m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4414
4415                                         if (locked_object) {
4416                                                 vm_object_unlock(locked_object);
4417                                                 locked_object = VM_OBJECT_NULL;
4418                                         }
4419                                         tmp_start_idx = cur_idx;
4420                                         abort_run = TRUE;
4421                                         continue;
4422                                 }
4423
4424                                 disconnected = FALSE;
4425                                 reusable = FALSE;
4426
4427                                 if ((m1->reusable ||
4428                                      m1->object->all_reusable) &&
4429                                     m1->inactive &&
4430                                     !m1->dirty &&
4431                                     !m1->reference) {
4432                                         /* reusable page... */
4433                                         refmod = pmap_disconnect(m1->phys_page);
4434                                         disconnected = TRUE;
4435                                         if (refmod == 0) {
4436                                                 /*
4437                                                  * ... not reused: can steal
4438                                                  * without relocating contents.
4439                                                  */
4440                                                 reusable = TRUE;
4441                                         }
4442                                 }
4443
4444                                 if ((m1->pmapped &&
4445                                      ! reusable) ||
4446                                     m1->dirty ||
4447                                     m1->precious) {
4448                                         vm_object_offset_t offset;
4449
4450                                         m2 = vm_page_grab();
4451
4452                                         if (m2 == VM_PAGE_NULL) {
4453                                                 if (locked_object) {
4454                                                         vm_object_unlock(locked_object);
4455                                                         locked_object = VM_OBJECT_NULL;
4456                                                 }
4457                                                 tmp_start_idx = cur_idx;
4458                                                 abort_run = TRUE;
4459                                                 continue;
4460                                         }
4461                                         if (! disconnected) {
4462                                                 if (m1->pmapped)
4463                                                         refmod = pmap_disconnect(m1->phys_page);
4464                                                 else
4465                                                         refmod = 0;
4466                                         }
4467
4468                                         /* copy the page's contents */
4469                                         pmap_copy_page(m1->phys_page, m2->phys_page);
4470                                         /* copy the page's state */
4471                                         assert(!VM_PAGE_WIRED(m1));
4472                                         assert(!m1->free);
4473                                         assert(!m1->pageout_queue);
4474                                         assert(!m1->laundry);
4475                                         m2->reference   = m1->reference;
4476                                         assert(!m1->gobbled);
4477                                         assert(!m1->private);
4478                                         m2->no_cache    = m1->no_cache;
4479                                         m2->xpmapped    = 0;
4480                                         assert(!m1->busy);
4481                                         assert(!m1->wanted);
4482                                         assert(!m1->fictitious);
4483                                         m2->pmapped     = m1->pmapped; /* should flush cache ? */
4484                                         m2->wpmapped    = m1->wpmapped;
4485                                         assert(!m1->pageout);
4486                                         m2->absent      = m1->absent;
4487                                         m2->error       = m1->error;
4488                                         m2->dirty       = m1->dirty;
4489                                         assert(!m1->cleaning);
4490                                         m2->precious    = m1->precious;
4491                                         m2->clustered   = m1->clustered;
4492                                         assert(!m1->overwriting);
4493                                         m2->restart     = m1->restart;
4494                                         m2->unusual     = m1->unusual;
4495                                         m2->encrypted   = m1->encrypted;
4496                                         assert(!m1->encrypted_cleaning);
4497                                         m2->cs_validated = m1->cs_validated;
4498                                         m2->cs_tainted  = m1->cs_tainted;
4499                                         m2->cs_nx       = m1->cs_nx;
4500
4501                                         /*
4502                                          * If m1 had really been reusable,
4503                                          * we would have just stolen it, so
4504                                          * let's not propagate it's "reusable"
4505                                          * bit and assert that m2 is not
4506                                          * marked as "reusable".
4507                                          */
4508                                         // m2->reusable = m1->reusable;
4509                                         assert(!m2->reusable);
4510
4511                                         assert(!m1->lopage);
4512                                         m2->slid        = m1->slid;
4513                                         m2->compressor  = m1->compressor;
4514
4515                                         /*
4516                                          * page may need to be flushed if
4517                                          * it is marshalled into a UPL
4518                                          * that is going to be used by a device
4519                                          * that doesn't support coherency
4520                                          */
4521                                         m2->written_by_kernel = TRUE;
4522
4523                                         /*
4524                                          * make sure we clear the ref/mod state
4525                                          * from the pmap layer... else we risk
4526                                          * inheriting state from the last time
4527                                          * this page was used...
4528                                          */
4529                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4530
4531                                         if (refmod & VM_MEM_REFERENCED)
4532                                                 m2->reference = TRUE;
4533                                         if (refmod & VM_MEM_MODIFIED) {
4534                                                 SET_PAGE_DIRTY(m2, TRUE);
4535                                         }
4536                                         offset = m1->offset;
4537
4538                                         /*
4539                                          * completely cleans up the state
4540                                          * of the page so that it is ready
4541                                          * to be put onto the free list, or
4542                                          * for this purpose it looks like it
4543                                          * just came off of the free list
4544                                          */
4545                                         vm_page_free_prepare(m1);
4546
4547                                         /*
4548                                          * now put the substitute page
4549                                          * on the object
4550                                          */
4551                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4552
4553                                         if (m2->compressor) {
4554                                                 m2->pmapped = TRUE;
4555                                                 m2->wpmapped = TRUE;
4556
4557                                                 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4558                                                            VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4559 #if MACH_ASSERT
4560                                                 compressed_pages++;
4561 #endif
4562                                         } else {
4563                                                 if (m2->reference)
4564                                                         vm_page_activate(m2);
4565                                                 else
4566                                                         vm_page_deactivate(m2);
4567                                         }
4568                                         PAGE_WAKEUP_DONE(m2);
4569
4570                                 } else {
4571                                         assert(!m1->compressor);
4572
4573                                         /*
4574                                          * completely cleans up the state
4575                                          * of the page so that it is ready
4576                                          * to be put onto the free list, or
4577                                          * for this purpose it looks like it
4578                                          * just came off of the free list
4579                                          */
4580                                         vm_page_free_prepare(m1);
4581                                 }
4582 #if MACH_ASSERT
4583                                 stolen_pages++;
4584 #endif
4585                         }
4586                         m1->pageq.next = (queue_entry_t) m;
4587                         m1->pageq.prev = NULL;
4588                         m = m1;
4589                 }
4590                 if (locked_object) {
4591                         vm_object_unlock(locked_object);
4592                         locked_object = VM_OBJECT_NULL;
4593                 }
4594
4595                 if (abort_run == TRUE) {
4596                         if (m != VM_PAGE_NULL) {
4597                                 vm_page_free_list(m, FALSE);
4598                         }
4599 #if MACH_ASSERT
4600                         dumped_run++;
4601 #endif
4602                         /*
4603                          * want the index of the last
4604                          * page in this run that was
4605                          * successfully 'stolen', so back
4606                          * it up 1 for the auto-decrement on use
4607                          * and 1 more to bump back over this page
4608                          */
4609                         page_idx = tmp_start_idx + 2;
4610                         if (page_idx >= vm_pages_count) {
4611                                 if (wrapped)
4612                                         goto done_scanning;
4613                                 page_idx = last_idx = 0;
4614                                 wrapped = TRUE;
4615                         }
4616                         abort_run = FALSE;
4617
4618                         /*
4619                          * We didn't find a contiguous range but we didn't
4620                          * start from the very first page.
4621                          * Start again from the very first page.
4622                          */
4623                         RESET_STATE_OF_RUN();
4624
4625                         if( flags & KMA_LOMEM)
4626                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4627                         else
4628                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4629
4630                         last_idx = page_idx;
4631
4632                         lck_mtx_lock(&vm_page_queue_free_lock);
4633                         /*
4634                         * reset our free page limit since we
4635                         * dropped the lock protecting the vm_page_free_queue
4636                         */
4637                         free_available = vm_page_free_count - vm_page_free_reserved;
4638                         goto retry;
4639                 }
4640
4641                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4642
4643                         if (wire == TRUE)
4644                                 m1->wire_count++;
4645                         else
4646                                 m1->gobbled = TRUE;
4647                 }
4648                 if (wire == FALSE)
4649                         vm_page_gobble_count += npages;
4650
4651                 /*
4652                  * gobbled pages are also counted as wired pages
4653                  */
4654                 vm_page_wire_count += npages;
4655
4656                 assert(vm_page_verify_contiguous(m, npages));
4657         }
4658 done_scanning:
4659         PAGE_REPLACEMENT_ALLOWED(FALSE);
4660
4661         vm_page_unlock_queues();
4662
4663 #if DEBUG
4664         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4665
4666         tv_end_sec -= tv_start_sec;
4667         if (tv_end_usec < tv_start_usec) {
4668                 tv_end_sec--;
4669                 tv_end_usec += 1000000;
4670         }
4671         tv_end_usec -= tv_start_usec;
4672         if (tv_end_usec >= 1000000) {
4673                 tv_end_sec++;
4674                 tv_end_sec -= 1000000;
4675         }
4676         if (vm_page_find_contig_debug) {
4677                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages\n",
4678                        __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4679                        (long)tv_end_sec, tv_end_usec, orig_last_idx,
4680                        scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4681         }
4682
4683 #endif
4684 #if MACH_ASSERT
4685         vm_page_verify_free_lists();
4686 #endif
4687         return m;
4688 }
4689
4690 /*
4691  *      Allocate a list of contiguous, wired pages.
4692  */
4693 kern_return_t
4694 cpm_allocate(
4695         vm_size_t       size,
4696         vm_page_t       *list,
4697         ppnum_t         max_pnum,
4698         ppnum_t         pnum_mask,
4699         boolean_t       wire,
4700         int             flags)
4701 {
4702         vm_page_t               pages;
4703         unsigned int            npages;
4704
4705         if (size % PAGE_SIZE != 0)
4706                 return KERN_INVALID_ARGUMENT;
4707
4708         npages = (unsigned int) (size / PAGE_SIZE);
4709         if (npages != size / PAGE_SIZE) {
4710                 /* 32-bit overflow */
4711                 return KERN_INVALID_ARGUMENT;
4712         }
4713
4714         /*
4715          *      Obtain a pointer to a subset of the free
4716          *      list large enough to satisfy the request;
4717          *      the region will be physically contiguous.
4718          */
4719         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4720
4721         if (pages == VM_PAGE_NULL)
4722                 return KERN_NO_SPACE;
4723         /*
4724          * determine need for wakeups
4725          */
4726         if ((vm_page_free_count < vm_page_free_min) ||
4727              ((vm_page_free_count < vm_page_free_target) &&
4728               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4729                  thread_wakeup((event_t) &vm_page_free_wanted);
4730
4731         VM_CHECK_MEMORYSTATUS;
4732
4733         /*
4734          *      The CPM pages should now be available and
4735          *      ordered by ascending physical address.
4736          */
4737         assert(vm_page_verify_contiguous(pages, npages));
4738
4739         *list = pages;
4740         return KERN_SUCCESS;
4741 }
4742
4743
4744 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4745
4746 /*
4747  * when working on a 'run' of pages, it is necessary to hold
4748  * the vm_page_queue_lock (a hot global lock) for certain operations
4749  * on the page... however, the majority of the work can be done
4750  * while merely holding the object lock... in fact there are certain
4751  * collections of pages that don't require any work brokered by the
4752  * vm_page_queue_lock... to mitigate the time spent behind the global
4753  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4754  * while doing all of the work that doesn't require the vm_page_queue_lock...
4755  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4756  * necessary work for each page... we will grab the busy bit on the page
4757  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4758  * if it can't immediately take the vm_page_queue_lock in order to compete
4759  * for the locks in the same order that vm_pageout_scan takes them.
4760  * the operation names are modeled after the names of the routines that
4761  * need to be called in order to make the changes very obvious in the
4762  * original loop
4763  */
4764
4765 void
4766 vm_page_do_delayed_work(
4767         vm_object_t     object,
4768         struct vm_page_delayed_work *dwp,
4769         int             dw_count)
4770 {
4771         int             j;
4772         vm_page_t       m;
4773         vm_page_t       local_free_q = VM_PAGE_NULL;
4774
4775         /*
4776          * pageout_scan takes the vm_page_lock_queues first
4777          * then tries for the object lock... to avoid what
4778          * is effectively a lock inversion, we'll go to the
4779          * trouble of taking them in that same order... otherwise
4780          * if this object contains the majority of the pages resident
4781          * in the UBC (or a small set of large objects actively being
4782          * worked on contain the majority of the pages), we could
4783          * cause the pageout_scan thread to 'starve' in its attempt
4784          * to find pages to move to the free queue, since it has to
4785          * successfully acquire the object lock of any candidate page
4786          * before it can steal/clean it.
4787          */
4788         if (!vm_page_trylockspin_queues()) {
4789                 vm_object_unlock(object);
4790
4791                 vm_page_lockspin_queues();
4792
4793                 for (j = 0; ; j++) {
4794                         if (!vm_object_lock_avoid(object) &&
4795                             _vm_object_lock_try(object))
4796                                 break;
4797                         vm_page_unlock_queues();
4798                         mutex_pause(j);
4799                         vm_page_lockspin_queues();
4800                 }
4801         }
4802         for (j = 0; j < dw_count; j++, dwp++) {
4803
4804                 m = dwp->dw_m;
4805
4806                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4807                         vm_pageout_throttle_up(m);
4808 #if CONFIG_PHANTOM_CACHE
4809                 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4810                         vm_phantom_cache_update(m);
4811 #endif
4812                 if (dwp->dw_mask & DW_vm_page_wire)
4813                         vm_page_wire(m);
4814                 else if (dwp->dw_mask & DW_vm_page_unwire) {
4815                         boolean_t       queueit;
4816
4817                         queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4818
4819                         vm_page_unwire(m, queueit);
4820                 }
4821                 if (dwp->dw_mask & DW_vm_page_free) {
4822                         vm_page_free_prepare_queues(m);
4823
4824                         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4825                         /*
4826                          * Add this page to our list of reclaimed pages,
4827                          * to be freed later.
4828                          */
4829                         m->pageq.next = (queue_entry_t) local_free_q;
4830                         local_free_q = m;
4831                 } else {
4832                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4833                                 vm_page_deactivate_internal(m, FALSE);
4834                         else if (dwp->dw_mask & DW_vm_page_activate) {
4835                                 if (m->active == FALSE) {
4836                                         vm_page_activate(m);
4837                                 }
4838                         }
4839                         else if (dwp->dw_mask & DW_vm_page_speculate)
4840                                 vm_page_speculate(m, TRUE);
4841                         else if (dwp->dw_mask & DW_enqueue_cleaned) {
4842                                 /*
4843                                  * if we didn't hold the object lock and did this,
4844                                  * we might disconnect the page, then someone might
4845                                  * soft fault it back in, then we would put it on the
4846                                  * cleaned queue, and so we would have a referenced (maybe even dirty)
4847                                  * page on that queue, which we don't want
4848                                  */
4849                                 int refmod_state = pmap_disconnect(m->phys_page);
4850
4851                                 if ((refmod_state & VM_MEM_REFERENCED)) {
4852                                         /*
4853                                          * this page has been touched since it got cleaned; let's activate it
4854                                          * if it hasn't already been
4855                                          */
4856                                         vm_pageout_enqueued_cleaned++;
4857                                         vm_pageout_cleaned_reactivated++;
4858                                         vm_pageout_cleaned_commit_reactivated++;
4859
4860                                         if (m->active == FALSE)
4861                                                 vm_page_activate(m);
4862                                 } else {
4863                                         m->reference = FALSE;
4864                                         vm_page_enqueue_cleaned(m);
4865                                 }
4866                         }
4867                         else if (dwp->dw_mask & DW_vm_page_lru)
4868                                 vm_page_lru(m);
4869                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4870                                 if ( !m->pageout_queue)
4871                                         VM_PAGE_QUEUES_REMOVE(m);
4872                         }
4873                         if (dwp->dw_mask & DW_set_reference)
4874                                 m->reference = TRUE;
4875                         else if (dwp->dw_mask & DW_clear_reference)
4876                                 m->reference = FALSE;
4877
4878                         if (dwp->dw_mask & DW_move_page) {
4879                                 if ( !m->pageout_queue) {
4880                                         VM_PAGE_QUEUES_REMOVE(m);
4881
4882                                         assert(m->object != kernel_object);
4883
4884                                         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4885                                 }
4886                         }
4887                         if (dwp->dw_mask & DW_clear_busy)
4888                                 m->busy = FALSE;
4889
4890                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
4891                                 PAGE_WAKEUP(m);
4892                 }
4893         }
4894         vm_page_unlock_queues();
4895
4896         if (local_free_q)
4897                 vm_page_free_list(local_free_q, TRUE);
4898
4899         VM_CHECK_MEMORYSTATUS;
4900
4901 }
4902
4903 kern_return_t
4904 vm_page_alloc_list(
4905         int     page_count,
4906         int     flags,
4907         vm_page_t *list)
4908 {
4909         vm_page_t       lo_page_list = VM_PAGE_NULL;
4910         vm_page_t       mem;
4911         int             i;
4912
4913         if ( !(flags & KMA_LOMEM))
4914                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4915
4916         for (i = 0; i < page_count; i++) {
4917
4918                 mem = vm_page_grablo();
4919
4920                 if (mem == VM_PAGE_NULL) {
4921                         if (lo_page_list)
4922                                 vm_page_free_list(lo_page_list, FALSE);
4923
4924                         *list = VM_PAGE_NULL;
4925
4926                         return (KERN_RESOURCE_SHORTAGE);
4927                 }
4928                 mem->pageq.next = (queue_entry_t) lo_page_list;
4929                 lo_page_list = mem;
4930         }
4931         *list = lo_page_list;
4932
4933         return (KERN_SUCCESS);
4934 }
4935
4936 void
4937 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4938 {
4939         page->offset = offset;
4940 }
4941
4942 vm_page_t
4943 vm_page_get_next(vm_page_t page)
4944 {
4945         return ((vm_page_t) page->pageq.next);
4946 }
4947
4948 vm_object_offset_t
4949 vm_page_get_offset(vm_page_t page)
4950 {
4951         return (page->offset);
4952 }
4953
4954 ppnum_t
4955 vm_page_get_phys_page(vm_page_t page)
4956 {
4957         return (page->phys_page);
4958 }
4959
4960
4961 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4962
4963 #if HIBERNATION
4964
4965 static vm_page_t hibernate_gobble_queue;
4966
4967 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4968
4969 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4970 static int  hibernate_flush_dirty_pages(int);
4971 static int  hibernate_flush_queue(queue_head_t *, int);
4972
4973 void hibernate_flush_wait(void);
4974 void hibernate_mark_in_progress(void);
4975 void hibernate_clear_in_progress(void);
4976
4977 void            hibernate_free_range(int, int);
4978 void            hibernate_hash_insert_page(vm_page_t);
4979 uint32_t        hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4980 void            hibernate_rebuild_vm_structs(void);
4981 uint32_t        hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4982 ppnum_t         hibernate_lookup_paddr(unsigned int);
4983
4984 struct hibernate_statistics {
4985         int hibernate_considered;
4986         int hibernate_reentered_on_q;
4987         int hibernate_found_dirty;
4988         int hibernate_skipped_cleaning;
4989         int hibernate_skipped_transient;
4990         int hibernate_skipped_precious;
4991         int hibernate_skipped_external;
4992         int hibernate_queue_nolock;
4993         int hibernate_queue_paused;
4994         int hibernate_throttled;
4995         int hibernate_throttle_timeout;
4996         int hibernate_drained;
4997         int hibernate_drain_timeout;
4998         int cd_lock_failed;
4999         int cd_found_precious;
5000         int cd_found_wired;
5001         int cd_found_busy;
5002         int cd_found_unusual;
5003         int cd_found_cleaning;
5004         int cd_found_laundry;
5005         int cd_found_dirty;
5006         int cd_found_xpmapped;
5007         int cd_skipped_xpmapped;
5008         int cd_local_free;
5009         int cd_total_free;
5010         int cd_vm_page_wire_count;
5011         int cd_vm_struct_pages_unneeded;
5012         int cd_pages;
5013         int cd_discarded;
5014         int cd_count_wire;
5015 } hibernate_stats;
5016
5017
5018 /*
5019  * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5020  * so that we don't overrun the estimated image size, which would
5021  * result in a hibernation failure.
5022  */
5023 #define HIBERNATE_XPMAPPED_LIMIT        40000
5024
5025
5026 static int
5027 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5028 {
5029         wait_result_t   wait_result;
5030
5031         vm_page_lock_queues();
5032
5033         while ( !queue_empty(&q->pgo_pending) ) {
5034
5035                 q->pgo_draining = TRUE;
5036
5037                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5038
5039                 vm_page_unlock_queues();
5040
5041                 wait_result = thread_block(THREAD_CONTINUE_NULL);
5042
5043                 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5044                         hibernate_stats.hibernate_drain_timeout++;
5045
5046                         if (q == &vm_pageout_queue_external)
5047                                 return (0);
5048
5049                         return (1);
5050                 }
5051                 vm_page_lock_queues();
5052
5053                 hibernate_stats.hibernate_drained++;
5054         }
5055         vm_page_unlock_queues();
5056
5057         return (0);
5058 }
5059
5060
5061 boolean_t hibernate_skip_external = FALSE;
5062
5063 static int
5064 hibernate_flush_queue(queue_head_t *q, int qcount)
5065 {
5066         vm_page_t       m;
5067         vm_object_t     l_object = NULL;
5068         vm_object_t     m_object = NULL;
5069         int             refmod_state = 0;
5070         int             try_failed_count = 0;
5071         int             retval = 0;
5072         int             current_run = 0;
5073         struct  vm_pageout_queue *iq;
5074         struct  vm_pageout_queue *eq;
5075         struct  vm_pageout_queue *tq;
5076
5077
5078         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5079
5080         iq = &vm_pageout_queue_internal;
5081         eq = &vm_pageout_queue_external;
5082
5083         vm_page_lock_queues();
5084
5085         while (qcount && !queue_empty(q)) {
5086
5087                 if (current_run++ == 1000) {
5088                         if (hibernate_should_abort()) {
5089                                 retval = 1;
5090                                 break;
5091                         }
5092                         current_run = 0;
5093                 }
5094
5095                 m = (vm_page_t) queue_first(q);
5096                 m_object = m->object;
5097
5098                 /*
5099                  * check to see if we currently are working
5100                  * with the same object... if so, we've
5101                  * already got the lock
5102                  */
5103                 if (m_object != l_object) {
5104                         /*
5105                          * the object associated with candidate page is
5106                          * different from the one we were just working
5107                          * with... dump the lock if we still own it
5108                          */
5109                         if (l_object != NULL) {
5110                                 vm_object_unlock(l_object);
5111                                 l_object = NULL;
5112                         }
5113                         /*
5114                          * Try to lock object; since we've alread got the
5115                          * page queues lock, we can only 'try' for this one.
5116                          * if the 'try' fails, we need to do a mutex_pause
5117                          * to allow the owner of the object lock a chance to
5118                          * run...
5119                          */
5120                         if ( !vm_object_lock_try_scan(m_object)) {
5121
5122                                 if (try_failed_count > 20) {
5123                                         hibernate_stats.hibernate_queue_nolock++;
5124
5125                                         goto reenter_pg_on_q;
5126                                 }
5127
5128                                 vm_page_unlock_queues();
5129                                 mutex_pause(try_failed_count++);
5130                                 vm_page_lock_queues();
5131
5132                                 hibernate_stats.hibernate_queue_paused++;
5133                                 continue;
5134                         } else {
5135                                 l_object = m_object;
5136                         }
5137                 }
5138                 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5139                         /*
5140                          * page is not to be cleaned
5141                          * put it back on the head of its queue
5142                          */
5143                         if (m->cleaning)
5144                                 hibernate_stats.hibernate_skipped_cleaning++;
5145                         else
5146                                 hibernate_stats.hibernate_skipped_transient++;
5147
5148                         goto reenter_pg_on_q;
5149                 }
5150                 if (m_object->copy == VM_OBJECT_NULL) {
5151                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5152                                 /*
5153                                  * let the normal hibernate image path
5154                                  * deal with these
5155                                  */
5156                                 goto reenter_pg_on_q;
5157                         }
5158                 }
5159                 if ( !m->dirty && m->pmapped) {
5160                         refmod_state = pmap_get_refmod(m->phys_page);
5161
5162                         if ((refmod_state & VM_MEM_MODIFIED)) {
5163                                 SET_PAGE_DIRTY(m, FALSE);
5164                         }
5165                 } else
5166                         refmod_state = 0;
5167
5168                 if ( !m->dirty) {
5169                         /*
5170                          * page is not to be cleaned
5171                          * put it back on the head of its queue
5172                          */
5173                         if (m->precious)
5174                                 hibernate_stats.hibernate_skipped_precious++;
5175
5176                         goto reenter_pg_on_q;
5177                 }
5178
5179                 if (hibernate_skip_external == TRUE && !m_object->internal) {
5180
5181                         hibernate_stats.hibernate_skipped_external++;
5182
5183                         goto reenter_pg_on_q;
5184                 }
5185                 tq = NULL;
5186
5187                 if (m_object->internal) {
5188                         if (VM_PAGE_Q_THROTTLED(iq))
5189                                 tq = iq;
5190                 } else if (VM_PAGE_Q_THROTTLED(eq))
5191                         tq = eq;
5192
5193                 if (tq != NULL) {
5194                         wait_result_t   wait_result;
5195                         int             wait_count = 5;
5196
5197                         if (l_object != NULL) {
5198                                 vm_object_unlock(l_object);
5199                                 l_object = NULL;
5200                         }
5201
5202                         while (retval == 0) {
5203
5204                                 tq->pgo_throttled = TRUE;
5205
5206                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5207
5208                                 vm_page_unlock_queues();
5209
5210                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
5211
5212                                 vm_page_lock_queues();
5213
5214                                 if (wait_result != THREAD_TIMED_OUT)
5215                                         break;
5216                                 if (!VM_PAGE_Q_THROTTLED(tq))
5217                                         break;
5218
5219                                 if (hibernate_should_abort())
5220                                         retval = 1;
5221
5222                                 if (--wait_count == 0) {
5223
5224                                         hibernate_stats.hibernate_throttle_timeout++;
5225
5226                                         if (tq == eq) {
5227                                                 hibernate_skip_external = TRUE;
5228                                                 break;
5229                                         }
5230                                         retval = 1;
5231                                 }
5232                         }
5233                         if (retval)
5234                                 break;
5235
5236                         hibernate_stats.hibernate_throttled++;
5237
5238                         continue;
5239                 }
5240                 /*
5241                  * we've already factored out pages in the laundry which
5242                  * means this page can't be on the pageout queue so it's
5243                  * safe to do the VM_PAGE_QUEUES_REMOVE
5244                  */
5245                 assert(!m->pageout_queue);
5246
5247                 VM_PAGE_QUEUES_REMOVE(m);
5248
5249                 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5250                         pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5251
5252                 vm_pageout_cluster(m, FALSE);
5253
5254                 hibernate_stats.hibernate_found_dirty++;
5255
5256                 goto next_pg;
5257
5258 reenter_pg_on_q:
5259                 queue_remove(q, m, vm_page_t, pageq);
5260                 queue_enter(q, m, vm_page_t, pageq);
5261
5262                 hibernate_stats.hibernate_reentered_on_q++;
5263 next_pg:
5264                 hibernate_stats.hibernate_considered++;
5265
5266                 qcount--;
5267                 try_failed_count = 0;
5268         }
5269         if (l_object != NULL) {
5270                 vm_object_unlock(l_object);
5271                 l_object = NULL;
5272         }
5273
5274         vm_page_unlock_queues();
5275
5276         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5277
5278         return (retval);
5279 }
5280
5281
5282 static int
5283 hibernate_flush_dirty_pages(int pass)
5284 {
5285         struct vm_speculative_age_q     *aq;
5286         uint32_t        i;
5287
5288         if (vm_page_local_q) {
5289                 for (i = 0; i < vm_page_local_q_count; i++)
5290                         vm_page_reactivate_local(i, TRUE, FALSE);
5291         }
5292
5293         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5294                 int             qcount;
5295                 vm_page_t       m;
5296
5297                 aq = &vm_page_queue_speculative[i];
5298
5299                 if (queue_empty(&aq->age_q))
5300                         continue;
5301                 qcount = 0;
5302
5303                 vm_page_lockspin_queues();
5304
5305                 queue_iterate(&aq->age_q,
5306                               m,
5307                               vm_page_t,
5308                               pageq)
5309                 {
5310                         qcount++;
5311                 }
5312                 vm_page_unlock_queues();
5313
5314                 if (qcount) {
5315                         if (hibernate_flush_queue(&aq->age_q, qcount))
5316                                 return (1);
5317                 }
5318         }
5319         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5320                 return (1);
5321         if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5322                 return (1);
5323         if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5324                 return (1);
5325         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5326                 return (1);
5327
5328         if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5329                 vm_compressor_record_warmup_start();
5330
5331         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5332                 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5333                         vm_compressor_record_warmup_end();
5334                 return (1);
5335         }
5336         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5337                 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5338                         vm_compressor_record_warmup_end();
5339                 return (1);
5340         }
5341         if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5342                 vm_compressor_record_warmup_end();
5343
5344         if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5345                 return (1);
5346
5347         return (0);
5348 }
5349
5350
5351 void
5352 hibernate_reset_stats()
5353 {
5354         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5355 }
5356
5357
5358 int
5359 hibernate_flush_memory()
5360 {
5361         int     retval;
5362
5363         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5364
5365         hibernate_cleaning_in_progress = TRUE;
5366         hibernate_skip_external = FALSE;
5367
5368         if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5369
5370                 if (COMPRESSED_PAGER_IS_ACTIVE) {
5371
5372                                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5373
5374                                 vm_compressor_flush();
5375
5376                                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5377                 }
5378                 if (consider_buffer_cache_collect != NULL) {
5379                         unsigned int orig_wire_count;
5380
5381                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5382                         orig_wire_count = vm_page_wire_count;
5383
5384                         (void)(*consider_buffer_cache_collect)(1);
5385                         consider_zone_gc(TRUE);
5386
5387                         HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5388
5389                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5390                 }
5391         }
5392         hibernate_cleaning_in_progress = FALSE;
5393
5394         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5395
5396         if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5397                 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5398
5399
5400     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5401                 hibernate_stats.hibernate_considered,
5402                 hibernate_stats.hibernate_reentered_on_q,
5403                 hibernate_stats.hibernate_found_dirty);
5404     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5405                 hibernate_stats.hibernate_skipped_cleaning,
5406                 hibernate_stats.hibernate_skipped_transient,
5407                 hibernate_stats.hibernate_skipped_precious,
5408                 hibernate_stats.hibernate_skipped_external,
5409                 hibernate_stats.hibernate_queue_nolock);
5410     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5411                 hibernate_stats.hibernate_queue_paused,
5412                 hibernate_stats.hibernate_throttled,
5413                 hibernate_stats.hibernate_throttle_timeout,
5414                 hibernate_stats.hibernate_drained,
5415                 hibernate_stats.hibernate_drain_timeout);
5416
5417         return (retval);
5418 }
5419
5420
5421 static void
5422 hibernate_page_list_zero(hibernate_page_list_t *list)
5423 {
5424     uint32_t             bank;
5425     hibernate_bitmap_t * bitmap;
5426
5427     bitmap = &list->bank_bitmap[0];
5428     for (bank = 0; bank < list->bank_count; bank++)
5429     {
5430         uint32_t last_bit;
5431
5432         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5433         // set out-of-bound bits at end of bitmap.
5434         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5435         if (last_bit)
5436             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5437
5438         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5439     }
5440 }
5441
5442 void
5443 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5444 {
5445     uint32_t i;
5446     vm_page_t m;
5447     uint64_t start, end, timeout, nsec;
5448     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5449     clock_get_uptime(&start);
5450
5451     for (i = 0; i < gobble_count; i++)
5452     {
5453         while (VM_PAGE_NULL == (m = vm_page_grab()))
5454         {
5455             clock_get_uptime(&end);
5456             if (end >= timeout)
5457                 break;
5458             VM_PAGE_WAIT();
5459         }
5460         if (!m)
5461             break;
5462         m->busy = FALSE;
5463         vm_page_gobble(m);
5464
5465         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5466         hibernate_gobble_queue = m;
5467     }
5468
5469     clock_get_uptime(&end);
5470     absolutetime_to_nanoseconds(end - start, &nsec);
5471     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5472 }
5473
5474 void
5475 hibernate_free_gobble_pages(void)
5476 {
5477     vm_page_t m, next;
5478     uint32_t  count = 0;
5479
5480     m = (vm_page_t) hibernate_gobble_queue;
5481     while(m)
5482     {
5483         next = (vm_page_t) m->pageq.next;
5484         vm_page_free(m);
5485         count++;
5486         m = next;
5487     }
5488     hibernate_gobble_queue = VM_PAGE_NULL;
5489
5490     if (count)
5491         HIBLOG("Freed %d pages\n", count);
5492 }
5493
5494 static boolean_t
5495 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5496 {
5497     vm_object_t object = NULL;
5498     int                  refmod_state;
5499     boolean_t            discard = FALSE;
5500
5501     do
5502     {
5503         if (m->private)
5504             panic("hibernate_consider_discard: private");
5505
5506         if (!vm_object_lock_try(m->object)) {
5507             if (!preflight) hibernate_stats.cd_lock_failed++;
5508             break;
5509         }
5510         object = m->object;
5511
5512         if (VM_PAGE_WIRED(m)) {
5513             if (!preflight) hibernate_stats.cd_found_wired++;
5514             break;
5515         }
5516         if (m->precious) {
5517             if (!preflight) hibernate_stats.cd_found_precious++;
5518             break;
5519         }
5520         if (m->busy || !object->alive) {
5521            /*
5522             *   Somebody is playing with this page.
5523             */
5524             if (!preflight) hibernate_stats.cd_found_busy++;
5525             break;
5526         }
5527         if (m->absent || m->unusual || m->error) {
5528            /*
5529             * If it's unusual in anyway, ignore it
5530             */
5531             if (!preflight) hibernate_stats.cd_found_unusual++;
5532             break;
5533         }
5534         if (m->cleaning) {
5535             if (!preflight) hibernate_stats.cd_found_cleaning++;
5536             break;
5537         }
5538         if (m->laundry) {
5539             if (!preflight) hibernate_stats.cd_found_laundry++;
5540             break;
5541         }
5542         if (!m->dirty)
5543         {
5544             refmod_state = pmap_get_refmod(m->phys_page);
5545
5546             if (refmod_state & VM_MEM_REFERENCED)
5547                 m->reference = TRUE;
5548             if (refmod_state & VM_MEM_MODIFIED) {
5549                 SET_PAGE_DIRTY(m, FALSE);
5550             }
5551         }
5552
5553         /*
5554          * If it's clean or purgeable we can discard the page on wakeup.
5555          */
5556         discard = (!m->dirty)
5557                     || (VM_PURGABLE_VOLATILE == object->purgable)
5558                     || (VM_PURGABLE_EMPTY    == object->purgable);
5559
5560
5561         if (discard == FALSE) {
5562                 if (!preflight)
5563                         hibernate_stats.cd_found_dirty++;
5564         } else if (m->xpmapped && m->reference && !object->internal) {
5565                 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5566                         if (!preflight)
5567                                 hibernate_stats.cd_found_xpmapped++;
5568                         discard = FALSE;
5569                 } else {
5570                         if (!preflight)
5571                                 hibernate_stats.cd_skipped_xpmapped++;
5572                 }
5573         }
5574     }
5575     while (FALSE);
5576
5577     if (object)
5578         vm_object_unlock(object);
5579
5580     return (discard);
5581 }
5582
5583
5584 static void
5585 hibernate_discard_page(vm_page_t m)
5586 {
5587     if (m->absent || m->unusual || m->error)
5588        /*
5589         * If it's unusual in anyway, ignore
5590         */
5591         return;
5592
5593 #if MACH_ASSERT || DEBUG
5594     vm_object_t object = m->object;
5595     if (!vm_object_lock_try(m->object))
5596         panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5597 #else
5598     /* No need to lock page queue for token delete, hibernate_vm_unlock()
5599        makes sure these locks are uncontended before sleep */
5600 #endif /* MACH_ASSERT || DEBUG */
5601
5602     if (m->pmapped == TRUE)
5603     {
5604         __unused int refmod_state = pmap_disconnect(m->phys_page);
5605     }
5606
5607     if (m->laundry)
5608         panic("hibernate_discard_page(%p) laundry", m);
5609     if (m->private)
5610         panic("hibernate_discard_page(%p) private", m);
5611     if (m->fictitious)
5612         panic("hibernate_discard_page(%p) fictitious", m);
5613
5614     if (VM_PURGABLE_VOLATILE == m->object->purgable)
5615     {
5616         /* object should be on a queue */
5617         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5618         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5619         assert(old_queue);
5620         if (m->object->purgeable_when_ripe) {
5621                 vm_purgeable_token_delete_first(old_queue);
5622         }
5623         m->object->purgable = VM_PURGABLE_EMPTY;
5624
5625         /*
5626          * Purgeable ledgers:  pages of VOLATILE and EMPTY objects are
5627          * accounted in the "volatile" ledger, so no change here.
5628          * We have to update vm_page_purgeable_count, though, since we're
5629          * effectively purging this object.
5630          */
5631         unsigned int delta;
5632         assert(m->object->resident_page_count >= m->object->wired_page_count);
5633         delta = (m->object->resident_page_count - m->object->wired_page_count);
5634         assert(vm_page_purgeable_count >= delta);
5635         assert(delta > 0);
5636         OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5637     }
5638
5639     vm_page_free(m);
5640
5641 #if MACH_ASSERT || DEBUG
5642     vm_object_unlock(object);
5643 #endif  /* MACH_ASSERT || DEBUG */
5644 }
5645
5646 /*
5647  Grab locks for hibernate_page_list_setall()
5648 */
5649 void
5650 hibernate_vm_lock_queues(void)
5651 {
5652     vm_object_lock(compressor_object);
5653     vm_page_lock_queues();
5654     lck_mtx_lock(&vm_page_queue_free_lock);
5655
5656     if (vm_page_local_q) {
5657         uint32_t  i;
5658         for (i = 0; i < vm_page_local_q_count; i++) {
5659             struct vpl  *lq;
5660             lq = &vm_page_local_q[i].vpl_un.vpl;
5661             VPL_LOCK(&lq->vpl_lock);
5662         }
5663     }
5664 }
5665
5666 void
5667 hibernate_vm_unlock_queues(void)
5668 {
5669     if (vm_page_local_q) {
5670         uint32_t  i;
5671         for (i = 0; i < vm_page_local_q_count; i++) {
5672             struct vpl  *lq;
5673             lq = &vm_page_local_q[i].vpl_un.vpl;
5674             VPL_UNLOCK(&lq->vpl_lock);
5675         }
5676     }
5677     lck_mtx_unlock(&vm_page_queue_free_lock);
5678     vm_page_unlock_queues();
5679     vm_object_unlock(compressor_object);
5680 }
5681
5682 /*
5683  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5684  pages known to VM to not need saving are subtracted.
5685  Wired pages to be saved are present in page_list_wired, pageable in page_list.
5686 */
5687
5688 void
5689 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5690                            hibernate_page_list_t * page_list_wired,
5691                            hibernate_page_list_t * page_list_pal,
5692                            boolean_t preflight,
5693                            boolean_t will_discard,
5694                            uint32_t * pagesOut)
5695 {
5696     uint64_t start, end, nsec;
5697     vm_page_t m;
5698     vm_page_t next;
5699     uint32_t pages = page_list->page_count;
5700     uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5701     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5702     uint32_t count_wire = pages;
5703     uint32_t count_discard_active    = 0;
5704     uint32_t count_discard_inactive  = 0;
5705     uint32_t count_discard_cleaned   = 0;
5706     uint32_t count_discard_purgeable = 0;
5707     uint32_t count_discard_speculative = 0;
5708     uint32_t count_discard_vm_struct_pages = 0;
5709     uint32_t i;
5710     uint32_t             bank;
5711     hibernate_bitmap_t * bitmap;
5712     hibernate_bitmap_t * bitmap_wired;
5713     boolean_t                    discard_all;
5714     boolean_t            discard;
5715
5716     HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5717
5718     if (preflight) {
5719         page_list       = NULL;
5720         page_list_wired = NULL;
5721         page_list_pal   = NULL;
5722                 discard_all     = FALSE;
5723     } else {
5724                 discard_all     = will_discard;
5725     }
5726
5727 #if MACH_ASSERT || DEBUG
5728     if (!preflight)
5729     {
5730         vm_page_lock_queues();
5731         if (vm_page_local_q) {
5732             for (i = 0; i < vm_page_local_q_count; i++) {
5733                 struct vpl      *lq;
5734                 lq = &vm_page_local_q[i].vpl_un.vpl;
5735                 VPL_LOCK(&lq->vpl_lock);
5736             }
5737         }
5738     }
5739 #endif  /* MACH_ASSERT || DEBUG */
5740
5741
5742     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5743
5744     clock_get_uptime(&start);
5745
5746     if (!preflight) {
5747         hibernate_page_list_zero(page_list);
5748         hibernate_page_list_zero(page_list_wired);
5749         hibernate_page_list_zero(page_list_pal);
5750
5751         hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5752         hibernate_stats.cd_pages = pages;
5753     }
5754
5755     if (vm_page_local_q) {
5756             for (i = 0; i < vm_page_local_q_count; i++)
5757                     vm_page_reactivate_local(i, TRUE, !preflight);
5758     }
5759
5760     if (preflight) {
5761         vm_object_lock(compressor_object);
5762         vm_page_lock_queues();
5763         lck_mtx_lock(&vm_page_queue_free_lock);
5764     }
5765
5766     m = (vm_page_t) hibernate_gobble_queue;
5767     while (m)
5768     {
5769         pages--;
5770         count_wire--;
5771         if (!preflight) {
5772             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5773             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5774         }
5775         m = (vm_page_t) m->pageq.next;
5776     }
5777
5778     if (!preflight) for( i = 0; i < real_ncpus; i++ )
5779     {
5780         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5781         {
5782             for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5783             {
5784                 pages--;
5785                 count_wire--;
5786                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5787                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5788
5789                 hibernate_stats.cd_local_free++;
5790                 hibernate_stats.cd_total_free++;
5791             }
5792         }
5793     }
5794
5795     for( i = 0; i < vm_colors; i++ )
5796     {
5797         queue_iterate(&vm_page_queue_free[i],
5798                       m,
5799                       vm_page_t,
5800                       pageq)
5801         {
5802             pages--;
5803             count_wire--;
5804             if (!preflight) {
5805                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5806                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5807
5808                 hibernate_stats.cd_total_free++;
5809             }
5810         }
5811     }
5812
5813     queue_iterate(&vm_lopage_queue_free,
5814                   m,
5815                   vm_page_t,
5816                   pageq)
5817     {
5818         pages--;
5819         count_wire--;
5820         if (!preflight) {
5821             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5822             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5823
5824             hibernate_stats.cd_total_free++;
5825         }
5826     }
5827
5828     m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5829     while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5830     {
5831         next = (vm_page_t) m->pageq.next;
5832         discard = FALSE;
5833         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5834          && hibernate_consider_discard(m, preflight))
5835         {
5836             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5837             count_discard_inactive++;
5838             discard = discard_all;
5839         }
5840         else
5841             count_throttled++;
5842         count_wire--;
5843         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5844
5845         if (discard) hibernate_discard_page(m);
5846         m = next;
5847     }
5848
5849     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5850     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5851     {
5852         next = (vm_page_t) m->pageq.next;
5853         discard = FALSE;
5854         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5855          && hibernate_consider_discard(m, preflight))
5856         {
5857             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5858             if (m->dirty)
5859                 count_discard_purgeable++;
5860             else
5861                 count_discard_inactive++;
5862             discard = discard_all;
5863         }
5864         else
5865             count_anonymous++;
5866         count_wire--;
5867         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5868         if (discard)    hibernate_discard_page(m);
5869         m = next;
5870     }
5871
5872     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5873     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5874     {
5875         next = (vm_page_t) m->pageq.next;
5876         discard = FALSE;
5877         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5878          && hibernate_consider_discard(m, preflight))
5879         {
5880             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5881             if (m->dirty)
5882                 count_discard_purgeable++;
5883             else
5884                 count_discard_cleaned++;
5885             discard = discard_all;
5886         }
5887         else
5888             count_cleaned++;
5889         count_wire--;
5890         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5891         if (discard)    hibernate_discard_page(m);
5892         m = next;
5893     }
5894
5895     m = (vm_page_t) queue_first(&vm_page_queue_active);
5896     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5897     {
5898         next = (vm_page_t) m->pageq.next;
5899         discard = FALSE;
5900         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5901          && hibernate_consider_discard(m, preflight))
5902         {
5903             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5904             if (m->dirty)
5905                 count_discard_purgeable++;
5906             else
5907                 count_discard_active++;
5908             discard = discard_all;
5909         }
5910         else
5911             count_active++;
5912         count_wire--;
5913         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5914         if (discard)    hibernate_discard_page(m);
5915         m = next;
5916     }
5917
5918     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5919     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5920     {
5921         next = (vm_page_t) m->pageq.next;
5922         discard = FALSE;
5923         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5924          && hibernate_consider_discard(m, preflight))
5925         {
5926             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5927             if (m->dirty)
5928                 count_discard_purgeable++;
5929             else
5930                 count_discard_inactive++;
5931             discard = discard_all;
5932         }
5933         else
5934             count_inactive++;
5935         count_wire--;
5936         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5937         if (discard)    hibernate_discard_page(m);
5938         m = next;
5939     }
5940
5941     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5942     {
5943         m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5944         while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5945         {
5946             next = (vm_page_t) m->pageq.next;
5947             discard = FALSE;
5948             if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5949              && hibernate_consider_discard(m, preflight))
5950             {
5951                 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5952                 count_discard_speculative++;
5953                 discard = discard_all;
5954             }
5955             else
5956                 count_speculative++;
5957             count_wire--;
5958             if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5959             if (discard)    hibernate_discard_page(m);
5960             m = next;
5961         }
5962     }
5963
5964     queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5965     {
5966         count_compressor++;
5967         count_wire--;
5968         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5969     }
5970
5971     if (preflight == FALSE && discard_all == TRUE) {
5972             KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5973
5974             HIBLOG("hibernate_teardown started\n");
5975             count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5976             HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5977
5978             pages -= count_discard_vm_struct_pages;
5979             count_wire -= count_discard_vm_struct_pages;
5980
5981             hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5982
5983             KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5984     }
5985
5986     if (!preflight) {
5987         // pull wired from hibernate_bitmap
5988         bitmap = &page_list->bank_bitmap[0];
5989         bitmap_wired = &page_list_wired->bank_bitmap[0];
5990         for (bank = 0; bank < page_list->bank_count; bank++)
5991         {
5992             for (i = 0; i < bitmap->bitmapwords; i++)
5993                 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5994             bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5995             bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5996         }
5997     }
5998
5999     // machine dependent adjustments
6000     hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6001
6002     if (!preflight) {
6003         hibernate_stats.cd_count_wire = count_wire;
6004         hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6005                 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6006     }
6007
6008     clock_get_uptime(&end);
6009     absolutetime_to_nanoseconds(end - start, &nsec);
6010     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6011
6012     HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n  %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6013            pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6014                 discard_all ? "did" : "could",
6015                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6016
6017     if (hibernate_stats.cd_skipped_xpmapped)
6018             HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6019
6020     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6021
6022     if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6023
6024 #if MACH_ASSERT || DEBUG
6025     if (!preflight)
6026     {
6027         if (vm_page_local_q) {
6028             for (i = 0; i < vm_page_local_q_count; i++) {
6029                 struct vpl      *lq;
6030                 lq = &vm_page_local_q[i].vpl_un.vpl;
6031                 VPL_UNLOCK(&lq->vpl_lock);
6032             }
6033         }
6034         vm_page_unlock_queues();
6035     }
6036 #endif  /* MACH_ASSERT || DEBUG */
6037
6038     if (preflight) {
6039         lck_mtx_unlock(&vm_page_queue_free_lock);
6040         vm_page_unlock_queues();
6041         vm_object_unlock(compressor_object);
6042     }
6043
6044     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6045 }
6046
6047 void
6048 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6049 {
6050     uint64_t  start, end, nsec;
6051     vm_page_t m;
6052     vm_page_t next;
6053     uint32_t  i;
6054     uint32_t  count_discard_active    = 0;
6055     uint32_t  count_discard_inactive  = 0;
6056     uint32_t  count_discard_purgeable = 0;
6057     uint32_t  count_discard_cleaned   = 0;
6058     uint32_t  count_discard_speculative = 0;
6059
6060
6061 #if MACH_ASSERT || DEBUG
6062         vm_page_lock_queues();
6063         if (vm_page_local_q) {
6064             for (i = 0; i < vm_page_local_q_count; i++) {
6065                 struct vpl      *lq;
6066                 lq = &vm_page_local_q[i].vpl_un.vpl;
6067                 VPL_LOCK(&lq->vpl_lock);
6068             }
6069         }
6070 #endif  /* MACH_ASSERT || DEBUG */
6071
6072     clock_get_uptime(&start);
6073
6074     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6075     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6076     {
6077         next = (vm_page_t) m->pageq.next;
6078         if (hibernate_page_bittst(page_list, m->phys_page))
6079         {
6080             if (m->dirty)
6081                 count_discard_purgeable++;
6082             else
6083                 count_discard_inactive++;
6084             hibernate_discard_page(m);
6085         }
6086         m = next;
6087     }
6088
6089     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6090     {
6091        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6092        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6093        {
6094            next = (vm_page_t) m->pageq.next;
6095            if (hibernate_page_bittst(page_list, m->phys_page))
6096            {
6097                count_discard_speculative++;
6098                hibernate_discard_page(m);
6099            }
6100            m = next;
6101        }
6102     }
6103
6104     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6105     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6106     {
6107         next = (vm_page_t) m->pageq.next;
6108         if (hibernate_page_bittst(page_list, m->phys_page))
6109         {
6110             if (m->dirty)
6111                 count_discard_purgeable++;
6112             else
6113                 count_discard_inactive++;
6114             hibernate_discard_page(m);
6115         }
6116         m = next;
6117     }
6118
6119     m = (vm_page_t) queue_first(&vm_page_queue_active);
6120     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6121     {
6122         next = (vm_page_t) m->pageq.next;
6123         if (hibernate_page_bittst(page_list, m->phys_page))
6124         {
6125             if (m->dirty)
6126                 count_discard_purgeable++;
6127             else
6128                 count_discard_active++;
6129             hibernate_discard_page(m);
6130         }
6131         m = next;
6132     }
6133
6134     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6135     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6136     {
6137         next = (vm_page_t) m->pageq.next;
6138         if (hibernate_page_bittst(page_list, m->phys_page))
6139         {
6140             if (m->dirty)
6141                 count_discard_purgeable++;
6142             else
6143                 count_discard_cleaned++;
6144             hibernate_discard_page(m);
6145         }
6146         m = next;
6147     }
6148
6149 #if MACH_ASSERT || DEBUG
6150         if (vm_page_local_q) {
6151             for (i = 0; i < vm_page_local_q_count; i++) {
6152                 struct vpl      *lq;
6153                 lq = &vm_page_local_q[i].vpl_un.vpl;
6154                 VPL_UNLOCK(&lq->vpl_lock);
6155             }
6156         }
6157         vm_page_unlock_queues();
6158 #endif  /* MACH_ASSERT || DEBUG */
6159
6160     clock_get_uptime(&end);
6161     absolutetime_to_nanoseconds(end - start, &nsec);
6162     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6163                 nsec / 1000000ULL,
6164                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6165 }
6166
6167 boolean_t       hibernate_paddr_map_inited = FALSE;
6168 boolean_t       hibernate_rebuild_needed = FALSE;
6169 unsigned int    hibernate_teardown_last_valid_compact_indx = -1;
6170 vm_page_t       hibernate_rebuild_hash_list = NULL;
6171
6172 unsigned int    hibernate_teardown_found_tabled_pages = 0;
6173 unsigned int    hibernate_teardown_found_created_pages = 0;
6174 unsigned int    hibernate_teardown_found_free_pages = 0;
6175 unsigned int    hibernate_teardown_vm_page_free_count;
6176
6177
6178 struct ppnum_mapping {
6179         struct ppnum_mapping    *ppnm_next;
6180         ppnum_t                 ppnm_base_paddr;
6181         unsigned int            ppnm_sindx;
6182         unsigned int            ppnm_eindx;
6183 };
6184
6185 struct ppnum_mapping    *ppnm_head;
6186 struct ppnum_mapping    *ppnm_last_found = NULL;
6187
6188
6189 void
6190 hibernate_create_paddr_map()
6191 {
6192         unsigned int    i;
6193         ppnum_t         next_ppnum_in_run = 0;
6194         struct ppnum_mapping *ppnm = NULL;
6195
6196         if (hibernate_paddr_map_inited == FALSE) {
6197
6198                 for (i = 0; i < vm_pages_count; i++) {
6199
6200                         if (ppnm)
6201                                 ppnm->ppnm_eindx = i;
6202
6203                         if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6204
6205                                 ppnm = kalloc(sizeof(struct ppnum_mapping));
6206
6207                                 ppnm->ppnm_next = ppnm_head;
6208                                 ppnm_head = ppnm;
6209
6210                                 ppnm->ppnm_sindx = i;
6211                                 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6212                         }
6213                         next_ppnum_in_run = vm_pages[i].phys_page + 1;
6214                 }
6215                 ppnm->ppnm_eindx++;
6216
6217                 hibernate_paddr_map_inited = TRUE;
6218         }
6219 }
6220
6221 ppnum_t
6222 hibernate_lookup_paddr(unsigned int indx)
6223 {
6224         struct ppnum_mapping *ppnm = NULL;
6225
6226         ppnm = ppnm_last_found;
6227
6228         if (ppnm) {
6229                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6230                         goto done;
6231         }
6232         for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6233
6234                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6235                         ppnm_last_found = ppnm;
6236                         break;
6237                 }
6238         }
6239         if (ppnm == NULL)
6240                 panic("hibernate_lookup_paddr of %d failed\n", indx);
6241 done:
6242         return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6243 }
6244
6245
6246 uint32_t
6247 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6248 {
6249         addr64_t        saddr_aligned;
6250         addr64_t        eaddr_aligned;
6251         addr64_t        addr;
6252         ppnum_t         paddr;
6253         unsigned int    mark_as_unneeded_pages = 0;
6254
6255         saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6256         eaddr_aligned = eaddr & ~PAGE_MASK_64;
6257
6258         for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6259
6260                 paddr = pmap_find_phys(kernel_pmap, addr);
6261
6262                 assert(paddr);
6263
6264                 hibernate_page_bitset(page_list,       TRUE, paddr);
6265                 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6266
6267                 mark_as_unneeded_pages++;
6268         }
6269         return (mark_as_unneeded_pages);
6270 }
6271
6272
6273 void
6274 hibernate_hash_insert_page(vm_page_t mem)
6275 {
6276         vm_page_bucket_t *bucket;
6277         int             hash_id;
6278
6279         assert(mem->hashed);
6280         assert(mem->object);
6281         assert(mem->offset != (vm_object_offset_t) -1);
6282
6283         /*
6284          *      Insert it into the object_object/offset hash table
6285          */
6286         hash_id = vm_page_hash(mem->object, mem->offset);
6287         bucket = &vm_page_buckets[hash_id];
6288
6289         mem->next_m = bucket->page_list;
6290         bucket->page_list = VM_PAGE_PACK_PTR(mem);
6291 }
6292
6293
6294 void
6295 hibernate_free_range(int sindx, int eindx)
6296 {
6297         vm_page_t       mem;
6298         unsigned int    color;
6299
6300         while (sindx < eindx) {
6301                 mem = &vm_pages[sindx];
6302
6303                 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6304
6305                 mem->lopage = FALSE;
6306                 mem->free = TRUE;
6307
6308                 color = mem->phys_page & vm_color_mask;
6309                 queue_enter_first(&vm_page_queue_free[color],
6310                                   mem,
6311                                   vm_page_t,
6312                                   pageq);
6313                 vm_page_free_count++;
6314
6315                 sindx++;
6316         }
6317 }
6318
6319
6320 extern void hibernate_rebuild_pmap_structs(void);
6321
6322 void
6323 hibernate_rebuild_vm_structs(void)
6324 {
6325         int             cindx, sindx, eindx;
6326         vm_page_t       mem, tmem, mem_next;
6327         AbsoluteTime    startTime, endTime;
6328         uint64_t        nsec;
6329
6330         if (hibernate_rebuild_needed == FALSE)
6331                 return;
6332
6333         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6334         HIBLOG("hibernate_rebuild started\n");
6335
6336         clock_get_uptime(&startTime);
6337
6338         hibernate_rebuild_pmap_structs();
6339
6340         bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6341         eindx = vm_pages_count;
6342
6343         for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6344
6345                 mem = &vm_pages[cindx];
6346                 /*
6347                  * hibernate_teardown_vm_structs leaves the location where
6348                  * this vm_page_t must be located in "next".
6349                  */
6350                 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6351                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6352
6353                 sindx = (int)(tmem - &vm_pages[0]);
6354
6355                 if (mem != tmem) {
6356                         /*
6357                          * this vm_page_t was moved by hibernate_teardown_vm_structs,
6358                          * so move it back to its real location
6359                          */
6360                         *tmem = *mem;
6361                         mem = tmem;
6362                 }
6363                 if (mem->hashed)
6364                         hibernate_hash_insert_page(mem);
6365                 /*
6366                  * the 'hole' between this vm_page_t and the previous
6367                  * vm_page_t we moved needs to be initialized as
6368                  * a range of free vm_page_t's
6369                  */
6370                 hibernate_free_range(sindx + 1, eindx);
6371
6372                 eindx = sindx;
6373         }
6374         if (sindx)
6375                 hibernate_free_range(0, sindx);
6376
6377         assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6378
6379         /*
6380          * process the list of vm_page_t's that were entered in the hash,
6381          * but were not located in the vm_pages arrary... these are
6382          * vm_page_t's that were created on the fly (i.e. fictitious)
6383          */
6384         for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6385                 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6386
6387                 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6388                 hibernate_hash_insert_page(mem);
6389         }
6390         hibernate_rebuild_hash_list = NULL;
6391
6392         clock_get_uptime(&endTime);
6393         SUB_ABSOLUTETIME(&endTime, &startTime);
6394         absolutetime_to_nanoseconds(endTime, &nsec);
6395
6396         HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6397
6398         hibernate_rebuild_needed = FALSE;
6399
6400         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6401 }
6402
6403
6404 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6405
6406 uint32_t
6407 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6408 {
6409         unsigned int    i;
6410         unsigned int    compact_target_indx;
6411         vm_page_t       mem, mem_next;
6412         vm_page_bucket_t *bucket;
6413         unsigned int    mark_as_unneeded_pages = 0;
6414         unsigned int    unneeded_vm_page_bucket_pages = 0;
6415         unsigned int    unneeded_vm_pages_pages = 0;
6416         unsigned int    unneeded_pmap_pages = 0;
6417         addr64_t        start_of_unneeded = 0;
6418         addr64_t        end_of_unneeded = 0;
6419
6420
6421         if (hibernate_should_abort())
6422                 return (0);
6423
6424         HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6425                vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6426                vm_page_cleaned_count, compressor_object->resident_page_count);
6427
6428         for (i = 0; i < vm_page_bucket_count; i++) {
6429
6430                 bucket = &vm_page_buckets[i];
6431
6432                 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6433                         assert(mem->hashed);
6434
6435                         mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6436
6437                         if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6438                                 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6439                                 hibernate_rebuild_hash_list = mem;
6440                         }
6441                 }
6442         }
6443         unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6444         mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6445
6446         hibernate_teardown_vm_page_free_count = vm_page_free_count;
6447
6448         compact_target_indx = 0;
6449
6450         for (i = 0; i < vm_pages_count; i++) {
6451
6452                 mem = &vm_pages[i];
6453
6454                 if (mem->free) {
6455                         unsigned int color;
6456
6457                         assert(mem->busy);
6458                         assert(!mem->lopage);
6459
6460                         color = mem->phys_page & vm_color_mask;
6461
6462                         queue_remove(&vm_page_queue_free[color],
6463                                      mem,
6464                                      vm_page_t,
6465                                      pageq);
6466                         mem->pageq.next = NULL;
6467                         mem->pageq.prev = NULL;
6468
6469                         vm_page_free_count--;
6470
6471                         hibernate_teardown_found_free_pages++;
6472
6473                         if ( !vm_pages[compact_target_indx].free)
6474                                 compact_target_indx = i;
6475                 } else {
6476                         /*
6477                          * record this vm_page_t's original location
6478                          * we need this even if it doesn't get moved
6479                          * as an indicator to the rebuild function that
6480                          * we don't have to move it
6481                          */
6482                         mem->next_m = VM_PAGE_PACK_PTR(mem);
6483
6484                         if (vm_pages[compact_target_indx].free) {
6485                                 /*
6486                                  * we've got a hole to fill, so
6487                                  * move this vm_page_t to it's new home
6488                                  */
6489                                 vm_pages[compact_target_indx] = *mem;
6490                                 mem->free = TRUE;
6491
6492                                 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6493                                 compact_target_indx++;
6494                         } else
6495                                 hibernate_teardown_last_valid_compact_indx = i;
6496                 }
6497         }
6498         unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6499                                                              (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6500         mark_as_unneeded_pages += unneeded_vm_pages_pages;
6501
6502         hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6503
6504         if (start_of_unneeded) {
6505                 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6506                 mark_as_unneeded_pages += unneeded_pmap_pages;
6507         }
6508         HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6509
6510         hibernate_rebuild_needed = TRUE;
6511
6512         return (mark_as_unneeded_pages);
6513 }
6514
6515
6516 #endif /* HIBERNATION */
6517
6518 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6519
6520 #include <mach_vm_debug.h>
6521 #if     MACH_VM_DEBUG
6522
6523 #include <mach_debug/hash_info.h>
6524 #include <vm/vm_debug.h>
6525
6526 /*
6527  *      Routine:        vm_page_info
6528  *      Purpose:
6529  *              Return information about the global VP table.
6530  *              Fills the buffer with as much information as possible
6531  *              and returns the desired size of the buffer.
6532  *      Conditions:
6533  *              Nothing locked.  The caller should provide
6534  *              possibly-pageable memory.
6535  */
6536
6537 unsigned int
6538 vm_page_info(
6539         hash_info_bucket_t *info,
6540         unsigned int count)
6541 {
6542         unsigned int i;
6543         lck_spin_t      *bucket_lock;
6544
6545         if (vm_page_bucket_count < count)
6546                 count = vm_page_bucket_count;
6547
6548         for (i = 0; i < count; i++) {
6549                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6550                 unsigned int bucket_count = 0;
6551                 vm_page_t m;
6552
6553                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6554                 lck_spin_lock(bucket_lock);
6555
6556                 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6557                         bucket_count++;
6558
6559                 lck_spin_unlock(bucket_lock);
6560
6561                 /* don't touch pageable memory while holding locks */
6562                 info[i].hib_count = bucket_count;
6563         }
6564
6565         return vm_page_bucket_count;
6566 }
6567 #endif  /* MACH_VM_DEBUG */
6568
6569 #if VM_PAGE_BUCKETS_CHECK
6570 void
6571 vm_page_buckets_check(void)
6572 {
6573         unsigned int i;
6574         vm_page_t p;
6575         unsigned int p_hash;
6576         vm_page_bucket_t *bucket;
6577         lck_spin_t      *bucket_lock;
6578
6579         if (!vm_page_buckets_check_ready) {
6580                 return;
6581         }
6582
6583 #if HIBERNATION
6584         if (hibernate_rebuild_needed ||
6585             hibernate_rebuild_hash_list) {
6586                 panic("BUCKET_CHECK: hibernation in progress: "
6587                       "rebuild_needed=%d rebuild_hash_list=%p\n",
6588                       hibernate_rebuild_needed,
6589                       hibernate_rebuild_hash_list);
6590         }
6591 #endif /* HIBERNATION */
6592
6593 #if VM_PAGE_FAKE_BUCKETS
6594         char *cp;
6595         for (cp = (char *) vm_page_fake_buckets_start;
6596              cp < (char *) vm_page_fake_buckets_end;
6597              cp++) {
6598                 if (*cp != 0x5a) {
6599                         panic("BUCKET_CHECK: corruption at %p in fake buckets "
6600                               "[0x%llx:0x%llx]\n",
6601                               cp,
6602                               (uint64_t) vm_page_fake_buckets_start,
6603                               (uint64_t) vm_page_fake_buckets_end);
6604                 }
6605         }
6606 #endif /* VM_PAGE_FAKE_BUCKETS */
6607
6608         for (i = 0; i < vm_page_bucket_count; i++) {
6609                 bucket = &vm_page_buckets[i];
6610                 if (!bucket->page_list) {
6611                         continue;
6612                 }
6613
6614                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6615                 lck_spin_lock(bucket_lock);
6616                 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6617                 while (p != VM_PAGE_NULL) {
6618                         if (!p->hashed) {
6619                                 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6620                                       "hash %d in bucket %d at %p "
6621                                       "is not hashed\n",
6622                                       p, p->object, p->offset,
6623                                       p_hash, i, bucket);
6624                         }
6625                         p_hash = vm_page_hash(p->object, p->offset);
6626                         if (p_hash != i) {
6627                                 panic("BUCKET_CHECK: corruption in bucket %d "
6628                                       "at %p: page %p object %p offset 0x%llx "
6629                                       "hash %d\n",
6630                                       i, bucket, p, p->object, p->offset,
6631                                       p_hash);
6632                         }
6633                         p = VM_PAGE_UNPACK_PTR(p->next_m);
6634                 }
6635                 lck_spin_unlock(bucket_lock);
6636         }
6637
6638 //      printf("BUCKET_CHECK: checked buckets\n");
6639 }
6640 #endif /* VM_PAGE_BUCKETS_CHECK */