osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <vm/pmap.h>
  80 #include <vm/vm_init.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_page.h>
  83 #include <vm/vm_pageout.h>
  84 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  85 #include <kern/misc_protos.h>
  86 #include <zone_debug.h>
  87 #include <vm/cpm.h>
  88 #include <pexpert/pexpert.h>
  89
  90 #include <vm/vm_protos.h>
  91 #include <vm/memory_object.h>
  92 #include <vm/vm_purgeable_internal.h>
  93 #include <vm/vm_compressor.h>
  94
  95 #include <IOKit/IOHibernatePrivate.h>
  96
  97 #include <sys/kdebug.h>
  98
  99 boolean_t       hibernate_cleaning_in_progress = FALSE;
 100 boolean_t       vm_page_free_verify = TRUE;
 101
 102 uint32_t        vm_lopage_free_count = 0;
 103 uint32_t        vm_lopage_free_limit = 0;
 104 uint32_t        vm_lopage_lowater    = 0;
 105 boolean_t       vm_lopage_refill = FALSE;
 106 boolean_t       vm_lopage_needed = FALSE;
 107
 108 lck_mtx_ext_t   vm_page_queue_lock_ext;
 109 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 110 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 111
 112 int             speculative_age_index = 0;
 113 int             speculative_steal_index = 0;
 114 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 115
 116
 117 __private_extern__ void         vm_page_init_lck_grp(void);
 118
 119 static void             vm_page_free_prepare(vm_page_t  page);
 120 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 121
 122
 123
 124
 125 /*
 126  *      Associated with page of user-allocatable memory is a
 127  *      page structure.
 128  */
 129
 130 /*
 131  *      These variables record the values returned by vm_page_bootstrap,
 132  *      for debugging purposes.  The implementation of pmap_steal_memory
 133  *      and pmap_startup here also uses them internally.
 134  */
 135
 136 vm_offset_t virtual_space_start;
 137 vm_offset_t virtual_space_end;
 138 uint32_t        vm_page_pages;
 139
 140 /*
 141  *      The vm_page_lookup() routine, which provides for fast
 142  *      (virtual memory object, offset) to page lookup, employs
 143  *      the following hash table.  The vm_page_{insert,remove}
 144  *      routines install and remove associations in the table.
 145  *      [This table is often called the virtual-to-physical,
 146  *      or VP, table.]
 147  */
 148 typedef struct {
 149         vm_page_t       pages;
 150 #if     MACH_PAGE_HASH_STATS
 151         int             cur_count;              /* current count */
 152         int             hi_count;               /* high water mark */
 153 #endif /* MACH_PAGE_HASH_STATS */
 154 } vm_page_bucket_t;
 155
 156
 157 #define BUCKETS_PER_LOCK        16
 158
 159 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 160 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 161 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 162 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 163 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 164 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 165
 166 lck_spin_t      *vm_page_bucket_locks;
 167
 168
 169 #if     MACH_PAGE_HASH_STATS
 170 /* This routine is only for debug.  It is intended to be called by
 171  * hand by a developer using a kernel debugger.  This routine prints
 172  * out vm_page_hash table statistics to the kernel debug console.
 173  */
 174 void
 175 hash_debug(void)
 176 {
 177         int     i;
 178         int     numbuckets = 0;
 179         int     highsum = 0;
 180         int     maxdepth = 0;
 181
 182         for (i = 0; i < vm_page_bucket_count; i++) {
 183                 if (vm_page_buckets[i].hi_count) {
 184                         numbuckets++;
 185                         highsum += vm_page_buckets[i].hi_count;
 186                         if (vm_page_buckets[i].hi_count > maxdepth)
 187                                 maxdepth = vm_page_buckets[i].hi_count;
 188                 }
 189         }
 190         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 191         printf("Number used buckets:     %d = %d%%\n",
 192                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 193         printf("Number unused buckets:   %d = %d%%\n",
 194                 vm_page_bucket_count - numbuckets,
 195                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 196         printf("Sum of bucket max depth: %d\n", highsum);
 197         printf("Average bucket depth:    %d.%2d\n",
 198                 highsum/vm_page_bucket_count,
 199                 highsum%vm_page_bucket_count);
 200         printf("Maximum bucket depth:    %d\n", maxdepth);
 201 }
 202 #endif /* MACH_PAGE_HASH_STATS */
 203
 204 /*
 205  *      The virtual page size is currently implemented as a runtime
 206  *      variable, but is constant once initialized using vm_set_page_size.
 207  *      This initialization must be done in the machine-dependent
 208  *      bootstrap sequence, before calling other machine-independent
 209  *      initializations.
 210  *
 211  *      All references to the virtual page size outside this
 212  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 213  *      constants.
 214  */
 215 vm_size_t       page_size  = PAGE_SIZE;
 216 vm_size_t       page_mask  = PAGE_MASK;
 217 int             page_shift = PAGE_SHIFT;
 218
 219 /*
 220  *      Resident page structures are initialized from
 221  *      a template (see vm_page_alloc).
 222  *
 223  *      When adding a new field to the virtual memory
 224  *      object structure, be sure to add initialization
 225  *      (see vm_page_bootstrap).
 226  */
 227 struct vm_page  vm_page_template;
 228
 229 vm_page_t       vm_pages = VM_PAGE_NULL;
 230 unsigned int    vm_pages_count = 0;
 231 ppnum_t         vm_page_lowest = 0;
 232
 233 /*
 234  *      Resident pages that represent real memory
 235  *      are allocated from a set of free lists,
 236  *      one per color.
 237  */
 238 unsigned int    vm_colors;
 239 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 240 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 241 queue_head_t    vm_page_queue_free[MAX_COLORS];
 242 unsigned int    vm_page_free_wanted;
 243 unsigned int    vm_page_free_wanted_privileged;
 244 unsigned int    vm_page_free_count;
 245 unsigned int    vm_page_fictitious_count;
 246
 247 unsigned int    vm_page_free_count_minimum;     /* debugging */
 248
 249 /*
 250  *      Occasionally, the virtual memory system uses
 251  *      resident page structures that do not refer to
 252  *      real pages, for example to leave a page with
 253  *      important state information in the VP table.
 254  *
 255  *      These page structures are allocated the way
 256  *      most other kernel structures are.
 257  */
 258 zone_t  vm_page_zone;
 259 vm_locks_array_t vm_page_locks;
 260 decl_lck_mtx_data(,vm_page_alloc_lock)
 261 lck_mtx_ext_t vm_page_alloc_lock_ext;
 262
 263 unsigned int io_throttle_zero_fill;
 264
 265 unsigned int    vm_page_local_q_count = 0;
 266 unsigned int    vm_page_local_q_soft_limit = 250;
 267 unsigned int    vm_page_local_q_hard_limit = 500;
 268 struct vplq     *vm_page_local_q = NULL;
 269
 270 /* N.B. Guard and fictitious pages must not
 271  * be assigned a zero phys_page value.
 272  */
 273 /*
 274  *      Fictitious pages don't have a physical address,
 275  *      but we must initialize phys_page to something.
 276  *      For debugging, this should be a strange value
 277  *      that the pmap module can recognize in assertions.
 278  */
 279 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 280
 281 /*
 282  *      Guard pages are not accessible so they don't
 283  *      need a physical address, but we need to enter
 284  *      one in the pmap.
 285  *      Let's make it recognizable and make sure that
 286  *      we don't use a real physical page with that
 287  *      physical address.
 288  */
 289 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 290
 291 /*
 292  *      Resident page structures are also chained on
 293  *      queues that are used by the page replacement
 294  *      system (pageout daemon).  These queues are
 295  *      defined here, but are shared by the pageout
 296  *      module.  The inactive queue is broken into
 297  *      file backed and anonymous for convenience as the
 298  *      pageout daemon often assignes a higher
 299  *      importance to anonymous pages (less likely to pick)
 300  */
 301 queue_head_t    vm_page_queue_active;
 302 queue_head_t    vm_page_queue_inactive;
 303 queue_head_t    vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
 304 queue_head_t    vm_page_queue_throttled;
 305
 306 unsigned int    vm_page_active_count;
 307 unsigned int    vm_page_inactive_count;
 308 unsigned int    vm_page_anonymous_count;
 309 unsigned int    vm_page_throttled_count;
 310 unsigned int    vm_page_speculative_count;
 311 unsigned int    vm_page_wire_count;
 312 unsigned int    vm_page_wire_count_initial;
 313 unsigned int    vm_page_gobble_count = 0;
 314 unsigned int    vm_page_wire_count_warning = 0;
 315 unsigned int    vm_page_gobble_count_warning = 0;
 316
 317 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 318 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 319 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 320
 321 unsigned int    vm_page_external_count = 0;
 322 unsigned int    vm_page_internal_count = 0;
 323 unsigned int    vm_page_pageable_external_count = 0;
 324 unsigned int    vm_page_pageable_internal_count = 0;
 325
 326 #if DEVELOPMENT || DEBUG
 327 unsigned int    vm_page_speculative_recreated = 0;
 328 unsigned int    vm_page_speculative_created = 0;
 329 unsigned int    vm_page_speculative_used = 0;
 330 #endif
 331
 332 queue_head_t    vm_page_queue_cleaned;
 333
 334 unsigned int    vm_page_cleaned_count = 0;
 335 unsigned int    vm_pageout_enqueued_cleaned = 0;
 336
 337 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 338 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 339
 340
 341 /*
 342  *      Several page replacement parameters are also
 343  *      shared with this module, so that page allocation
 344  *      (done here in vm_page_alloc) can trigger the
 345  *      pageout daemon.
 346  */
 347 unsigned int    vm_page_free_target = 0;
 348 unsigned int    vm_page_free_min = 0;
 349 unsigned int    vm_page_throttle_limit = 0;
 350 uint32_t        vm_page_creation_throttle = 0;
 351 unsigned int    vm_page_inactive_target = 0;
 352 unsigned int    vm_page_anonymous_min = 0;
 353 unsigned int    vm_page_inactive_min = 0;
 354 unsigned int    vm_page_free_reserved = 0;
 355 unsigned int    vm_page_throttle_count = 0;
 356
 357
 358 /*
 359  *      The VM system has a couple of heuristics for deciding
 360  *      that pages are "uninteresting" and should be placed
 361  *      on the inactive queue as likely candidates for replacement.
 362  *      These variables let the heuristics be controlled at run-time
 363  *      to make experimentation easier.
 364  */
 365
 366 boolean_t vm_page_deactivate_hint = TRUE;
 367
 368 struct vm_page_stats_reusable vm_page_stats_reusable;
 369
 370 /*
 371  *      vm_set_page_size:
 372  *
 373  *      Sets the page size, perhaps based upon the memory
 374  *      size.  Must be called before any use of page-size
 375  *      dependent functions.
 376  *
 377  *      Sets page_shift and page_mask from page_size.
 378  */
 379 void
 380 vm_set_page_size(void)
 381 {
 382         page_mask = page_size - 1;
 383
 384         if ((page_mask & page_size) != 0)
 385                 panic("vm_set_page_size: page size not a power of two");
 386
 387         for (page_shift = 0; ; page_shift++)
 388                 if ((1U << page_shift) == page_size)
 389                         break;
 390 }
 391
 392
 393 /* Called once during statup, once the cache geometry is known.
 394  */
 395 static void
 396 vm_page_set_colors( void )
 397 {
 398         unsigned int    n, override;
 399
 400         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 401                 n = override;
 402         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 403                 n = vm_cache_geometry_colors;
 404         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 405
 406         if ( n == 0 )
 407                 n = 1;
 408         if ( n > MAX_COLORS )
 409                 n = MAX_COLORS;
 410
 411         /* the count must be a power of 2  */
 412         if ( ( n & (n - 1)) != 0  )
 413                 panic("vm_page_set_colors");
 414
 415         vm_colors = n;
 416         vm_color_mask = n - 1;
 417 }
 418
 419
 420 lck_grp_t               vm_page_lck_grp_free;
 421 lck_grp_t               vm_page_lck_grp_queue;
 422 lck_grp_t               vm_page_lck_grp_local;
 423 lck_grp_t               vm_page_lck_grp_purge;
 424 lck_grp_t               vm_page_lck_grp_alloc;
 425 lck_grp_t               vm_page_lck_grp_bucket;
 426 lck_grp_attr_t          vm_page_lck_grp_attr;
 427 lck_attr_t              vm_page_lck_attr;
 428
 429
 430 __private_extern__ void
 431 vm_page_init_lck_grp(void)
 432 {
 433         /*
 434          * initialze the vm_page lock world
 435          */
 436         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 437         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 438         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 439         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 440         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 441         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 442         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 443         lck_attr_setdefault(&vm_page_lck_attr);
 444         lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 445
 446         vm_compressor_init_locks();
 447 }
 448
 449 void
 450 vm_page_init_local_q()
 451 {
 452         unsigned int            num_cpus;
 453         unsigned int            i;
 454         struct vplq             *t_local_q;
 455
 456         num_cpus = ml_get_max_cpus();
 457
 458         /*
 459          * no point in this for a uni-processor system
 460          */
 461         if (num_cpus >= 2) {
 462                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 463
 464                 for (i = 0; i < num_cpus; i++) {
 465                         struct vpl      *lq;
 466
 467                         lq = &t_local_q[i].vpl_un.vpl;
 468                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 469                         queue_init(&lq->vpl_queue);
 470                         lq->vpl_count = 0;
 471                         lq->vpl_internal_count = 0;
 472                         lq->vpl_external_count = 0;
 473                 }
 474                 vm_page_local_q_count = num_cpus;
 475
 476                 vm_page_local_q = (struct vplq *)t_local_q;
 477         }
 478 }
 479
 480
 481 /*
 482  *      vm_page_bootstrap:
 483  *
 484  *      Initializes the resident memory module.
 485  *
 486  *      Allocates memory for the page cells, and
 487  *      for the object/offset-to-page hash table headers.
 488  *      Each page cell is initialized and placed on the free list.
 489  *      Returns the range of available kernel virtual memory.
 490  */
 491
 492 void
 493 vm_page_bootstrap(
 494         vm_offset_t             *startp,
 495         vm_offset_t             *endp)
 496 {
 497         register vm_page_t      m;
 498         unsigned int            i;
 499         unsigned int            log1;
 500         unsigned int            log2;
 501         unsigned int            size;
 502
 503         /*
 504          *      Initialize the vm_page template.
 505          */
 506
 507         m = &vm_page_template;
 508         bzero(m, sizeof (*m));
 509
 510         m->pageq.next = NULL;
 511         m->pageq.prev = NULL;
 512         m->listq.next = NULL;
 513         m->listq.prev = NULL;
 514         m->next = VM_PAGE_NULL;
 515
 516         m->object = VM_OBJECT_NULL;             /* reset later */
 517         m->offset = (vm_object_offset_t) -1;    /* reset later */
 518
 519         m->wire_count = 0;
 520         m->local = FALSE;
 521         m->inactive = FALSE;
 522         m->active = FALSE;
 523         m->pageout_queue = FALSE;
 524         m->speculative = FALSE;
 525         m->laundry = FALSE;
 526         m->free = FALSE;
 527         m->reference = FALSE;
 528         m->gobbled = FALSE;
 529         m->private = FALSE;
 530         m->throttled = FALSE;
 531         m->__unused_pageq_bits = 0;
 532
 533         m->phys_page = 0;               /* reset later */
 534
 535         m->busy = TRUE;
 536         m->wanted = FALSE;
 537         m->tabled = FALSE;
 538         m->fictitious = FALSE;
 539         m->pmapped = FALSE;
 540         m->wpmapped = FALSE;
 541         m->pageout = FALSE;
 542         m->absent = FALSE;
 543         m->error = FALSE;
 544         m->dirty = FALSE;
 545         m->cleaning = FALSE;
 546         m->precious = FALSE;
 547         m->clustered = FALSE;
 548         m->overwriting = FALSE;
 549         m->restart = FALSE;
 550         m->unusual = FALSE;
 551         m->encrypted = FALSE;
 552         m->encrypted_cleaning = FALSE;
 553         m->cs_validated = FALSE;
 554         m->cs_tainted = FALSE;
 555         m->no_cache = FALSE;
 556         m->reusable = FALSE;
 557         m->slid = FALSE;
 558         m->was_dirty = FALSE;
 559         m->xpmapped = FALSE;
 560         m->compressor = FALSE;
 561         m->__unused_object_bits = 0;
 562
 563         /*
 564          *      Initialize the page queues.
 565          */
 566         vm_page_init_lck_grp();
 567
 568         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 569         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 570         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 571
 572         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 573                 int group;
 574
 575                 purgeable_queues[i].token_q_head = 0;
 576                 purgeable_queues[i].token_q_tail = 0;
 577                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 578                         queue_init(&purgeable_queues[i].objq[group]);
 579
 580                 purgeable_queues[i].type = i;
 581                 purgeable_queues[i].new_pages = 0;
 582 #if MACH_ASSERT
 583                 purgeable_queues[i].debug_count_tokens = 0;
 584                 purgeable_queues[i].debug_count_objects = 0;
 585 #endif
 586         };
 587
 588         for (i = 0; i < MAX_COLORS; i++ )
 589                 queue_init(&vm_page_queue_free[i]);
 590
 591         queue_init(&vm_lopage_queue_free);
 592         queue_init(&vm_page_queue_active);
 593         queue_init(&vm_page_queue_inactive);
 594         queue_init(&vm_page_queue_cleaned);
 595         queue_init(&vm_page_queue_throttled);
 596         queue_init(&vm_page_queue_anonymous);
 597
 598         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 599                 queue_init(&vm_page_queue_speculative[i].age_q);
 600
 601                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 602                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 603         }
 604         vm_page_free_wanted = 0;
 605         vm_page_free_wanted_privileged = 0;
 606
 607         vm_page_set_colors();
 608
 609
 610         /*
 611          *      Steal memory for the map and zone subsystems.
 612          */
 613         zone_steal_memory();
 614         vm_map_steal_memory();
 615
 616         /*
 617          *      Allocate (and initialize) the virtual-to-physical
 618          *      table hash buckets.
 619          *
 620          *      The number of buckets should be a power of two to
 621          *      get a good hash function.  The following computation
 622          *      chooses the first power of two that is greater
 623          *      than the number of physical pages in the system.
 624          */
 625
 626         if (vm_page_bucket_count == 0) {
 627                 unsigned int npages = pmap_free_pages();
 628
 629                 vm_page_bucket_count = 1;
 630                 while (vm_page_bucket_count < npages)
 631                         vm_page_bucket_count <<= 1;
 632         }
 633         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 634
 635         vm_page_hash_mask = vm_page_bucket_count - 1;
 636
 637         /*
 638          *      Calculate object shift value for hashing algorithm:
 639          *              O = log2(sizeof(struct vm_object))
 640          *              B = log2(vm_page_bucket_count)
 641          *              hash shifts the object left by
 642          *              B/2 - O
 643          */
 644         size = vm_page_bucket_count;
 645         for (log1 = 0; size > 1; log1++)
 646                 size /= 2;
 647         size = sizeof(struct vm_object);
 648         for (log2 = 0; size > 1; log2++)
 649                 size /= 2;
 650         vm_page_hash_shift = log1/2 - log2 + 1;
 651
 652         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 653         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 654         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 655
 656         if (vm_page_hash_mask & vm_page_bucket_count)
 657                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 658
 659         vm_page_buckets = (vm_page_bucket_t *)
 660                 pmap_steal_memory(vm_page_bucket_count *
 661                                   sizeof(vm_page_bucket_t));
 662
 663         vm_page_bucket_locks = (lck_spin_t *)
 664                 pmap_steal_memory(vm_page_bucket_lock_count *
 665                                   sizeof(lck_spin_t));
 666
 667         for (i = 0; i < vm_page_bucket_count; i++) {
 668                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 669
 670                 bucket->pages = VM_PAGE_NULL;
 671 #if     MACH_PAGE_HASH_STATS
 672                 bucket->cur_count = 0;
 673                 bucket->hi_count = 0;
 674 #endif /* MACH_PAGE_HASH_STATS */
 675         }
 676
 677         for (i = 0; i < vm_page_bucket_lock_count; i++)
 678                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 679
 680         /*
 681          *      Machine-dependent code allocates the resident page table.
 682          *      It uses vm_page_init to initialize the page frames.
 683          *      The code also returns to us the virtual space available
 684          *      to the kernel.  We don't trust the pmap module
 685          *      to get the alignment right.
 686          */
 687
 688         pmap_startup(&virtual_space_start, &virtual_space_end);
 689         virtual_space_start = round_page(virtual_space_start);
 690         virtual_space_end = trunc_page(virtual_space_end);
 691
 692         *startp = virtual_space_start;
 693         *endp = virtual_space_end;
 694
 695         /*
 696          *      Compute the initial "wire" count.
 697          *      Up until now, the pages which have been set aside are not under
 698          *      the VM system's control, so although they aren't explicitly
 699          *      wired, they nonetheless can't be moved. At this moment,
 700          *      all VM managed pages are "free", courtesy of pmap_startup.
 701          */
 702         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 703         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 704         vm_page_wire_count_initial = vm_page_wire_count;
 705         vm_page_free_count_minimum = vm_page_free_count;
 706
 707         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 708                vm_page_free_count, vm_page_wire_count);
 709
 710         simple_lock_init(&vm_paging_lock, 0);
 711 }
 712
 713 #ifndef MACHINE_PAGES
 714 /*
 715  *      We implement pmap_steal_memory and pmap_startup with the help
 716  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 717  */
 718
 719 void *
 720 pmap_steal_memory(
 721         vm_size_t size)
 722 {
 723         vm_offset_t addr, vaddr;
 724         ppnum_t phys_page;
 725
 726         /*
 727          *      We round the size to a round multiple.
 728          */
 729
 730         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 731
 732         /*
 733          *      If this is the first call to pmap_steal_memory,
 734          *      we have to initialize ourself.
 735          */
 736
 737         if (virtual_space_start == virtual_space_end) {
 738                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 739
 740                 /*
 741                  *      The initial values must be aligned properly, and
 742                  *      we don't trust the pmap module to do it right.
 743                  */
 744
 745                 virtual_space_start = round_page(virtual_space_start);
 746                 virtual_space_end = trunc_page(virtual_space_end);
 747         }
 748
 749         /*
 750          *      Allocate virtual memory for this request.
 751          */
 752
 753         addr = virtual_space_start;
 754         virtual_space_start += size;
 755
 756         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
 757
 758         /*
 759          *      Allocate and map physical pages to back new virtual pages.
 760          */
 761
 762         for (vaddr = round_page(addr);
 763              vaddr < addr + size;
 764              vaddr += PAGE_SIZE) {
 765
 766                 if (!pmap_next_page_hi(&phys_page))
 767                         panic("pmap_steal_memory");
 768
 769                 /*
 770                  *      XXX Logically, these mappings should be wired,
 771                  *      but some pmap modules barf if they are.
 772                  */
 773 #if defined(__LP64__)
 774                 pmap_pre_expand(kernel_pmap, vaddr);
 775 #endif
 776
 777                 pmap_enter(kernel_pmap, vaddr, phys_page,
 778                            VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
 779                                 VM_WIMG_USE_DEFAULT, FALSE);
 780                 /*
 781                  * Account for newly stolen memory
 782                  */
 783                 vm_page_wire_count++;
 784
 785         }
 786
 787         return (void *) addr;
 788 }
 789
 790 void
 791 pmap_startup(
 792         vm_offset_t *startp,
 793         vm_offset_t *endp)
 794 {
 795         unsigned int i, npages, pages_initialized, fill, fillval;
 796         ppnum_t         phys_page;
 797         addr64_t        tmpaddr;
 798
 799         /*
 800          *      We calculate how many page frames we will have
 801          *      and then allocate the page structures in one chunk.
 802          */
 803
 804         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 805         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 806         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 807
 808         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 809
 810         /*
 811          *      Initialize the page frames.
 812          */
 813         for (i = 0, pages_initialized = 0; i < npages; i++) {
 814                 if (!pmap_next_page(&phys_page))
 815                         break;
 816                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
 817                         vm_page_lowest = phys_page;
 818
 819                 vm_page_init(&vm_pages[i], phys_page, FALSE);
 820                 vm_page_pages++;
 821                 pages_initialized++;
 822         }
 823         vm_pages_count = pages_initialized;
 824
 825         /*
 826          * Check if we want to initialize pages to a known value
 827          */
 828         fill = 0;                                                               /* Assume no fill */
 829         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 830 #if     DEBUG
 831         /* This slows down booting the DEBUG kernel, particularly on
 832          * large memory systems, but is worthwhile in deterministically
 833          * trapping uninitialized memory usage.
 834          */
 835         if (fill == 0) {
 836                 fill = 1;
 837                 fillval = 0xDEB8F177;
 838         }
 839 #endif
 840         if (fill)
 841                 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
 842         // -debug code remove
 843         if (2 == vm_himemory_mode) {
 844                 // free low -> high so high is preferred
 845                 for (i = 1; i <= pages_initialized; i++) {
 846                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 847                         vm_page_release(&vm_pages[i - 1]);
 848                 }
 849         }
 850         else
 851         // debug code remove-
 852
 853         /*
 854          * Release pages in reverse order so that physical pages
 855          * initially get allocated in ascending addresses. This keeps
 856          * the devices (which must address physical memory) happy if
 857          * they require several consecutive pages.
 858          */
 859         for (i = pages_initialized; i > 0; i--) {
 860                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 861                 vm_page_release(&vm_pages[i - 1]);
 862         }
 863
 864 #if 0
 865         {
 866                 vm_page_t xx, xxo, xxl;
 867                 int i, j, k, l;
 868
 869                 j = 0;                                                                                                  /* (BRINGUP) */
 870                 xxl = 0;
 871
 872                 for( i = 0; i < vm_colors; i++ ) {
 873                         queue_iterate(&vm_page_queue_free[i],
 874                                       xx,
 875                                       vm_page_t,
 876                                       pageq) {  /* BRINGUP */
 877                                 j++;                                                                                            /* (BRINGUP) */
 878                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 879                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 880                                 }
 881
 882                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 883                                 k = 0;                                                                                          /* (BRINGUP) */
 884
 885                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 886
 887                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 888                                         k++;
 889                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 890                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 891                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 892                                         }
 893                                 }
 894
 895                                 xxl = xx;
 896                         }
 897                 }
 898
 899                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 900                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 901                 }
 902         }
 903 #endif
 904
 905
 906         /*
 907          *      We have to re-align virtual_space_start,
 908          *      because pmap_steal_memory has been using it.
 909          */
 910
 911         virtual_space_start = round_page(virtual_space_start);
 912
 913         *startp = virtual_space_start;
 914         *endp = virtual_space_end;
 915 }
 916 #endif  /* MACHINE_PAGES */
 917
 918 /*
 919  *      Routine:        vm_page_module_init
 920  *      Purpose:
 921  *              Second initialization pass, to be done after
 922  *              the basic VM system is ready.
 923  */
 924 void
 925 vm_page_module_init(void)
 926 {
 927         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
 928                              0, PAGE_SIZE, "vm pages");
 929
 930 #if     ZONE_DEBUG
 931         zone_debug_disable(vm_page_zone);
 932 #endif  /* ZONE_DEBUG */
 933
 934         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
 935         zone_change(vm_page_zone, Z_EXPAND, FALSE);
 936         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
 937         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
 938         zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
 939         /*
 940          * Adjust zone statistics to account for the real pages allocated
 941          * in vm_page_create(). [Q: is this really what we want?]
 942          */
 943         vm_page_zone->count += vm_page_pages;
 944         vm_page_zone->sum_count += vm_page_pages;
 945         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
 946 }
 947
 948 /*
 949  *      Routine:        vm_page_create
 950  *      Purpose:
 951  *              After the VM system is up, machine-dependent code
 952  *              may stumble across more physical memory.  For example,
 953  *              memory that it was reserving for a frame buffer.
 954  *              vm_page_create turns this memory into available pages.
 955  */
 956
 957 void
 958 vm_page_create(
 959         ppnum_t start,
 960         ppnum_t end)
 961 {
 962         ppnum_t         phys_page;
 963         vm_page_t       m;
 964
 965         for (phys_page = start;
 966              phys_page < end;
 967              phys_page++) {
 968                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
 969                         == VM_PAGE_NULL)
 970                         vm_page_more_fictitious();
 971
 972                 m->fictitious = FALSE;
 973                 pmap_clear_noencrypt(phys_page);
 974
 975                 vm_page_pages++;
 976                 vm_page_release(m);
 977         }
 978 }
 979
 980 /*
 981  *      vm_page_hash:
 982  *
 983  *      Distributes the object/offset key pair among hash buckets.
 984  *
 985  *      NOTE:   The bucket count must be a power of 2
 986  */
 987 #define vm_page_hash(object, offset) (\
 988         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
 989          & vm_page_hash_mask)
 990
 991
 992 /*
 993  *      vm_page_insert:         [ internal use only ]
 994  *
 995  *      Inserts the given mem entry into the object/object-page
 996  *      table and object list.
 997  *
 998  *      The object must be locked.
 999  */
1000 void
1001 vm_page_insert(
1002         vm_page_t               mem,
1003         vm_object_t             object,
1004         vm_object_offset_t      offset)
1005 {
1006         vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1007 }
1008
1009 void
1010 vm_page_insert_internal(
1011         vm_page_t               mem,
1012         vm_object_t             object,
1013         vm_object_offset_t      offset,
1014         boolean_t               queues_lock_held,
1015         boolean_t               insert_in_hash,
1016         boolean_t               batch_pmap_op)
1017 {
1018         vm_page_bucket_t *bucket;
1019         lck_spin_t      *bucket_lock;
1020         int     hash_id;
1021
1022         XPR(XPR_VM_PAGE,
1023                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1024                 object, offset, mem, 0,0);
1025 #if 0
1026         /*
1027          * we may not hold the page queue lock
1028          * so this check isn't safe to make
1029          */
1030         VM_PAGE_CHECK(mem);
1031 #endif
1032
1033         assert(page_aligned(offset));
1034
1035         if (object == vm_submap_object) {
1036                 /* the vm_submap_object is only a placeholder for submaps */
1037                 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1038         }
1039
1040         vm_object_lock_assert_exclusive(object);
1041 #if DEBUG
1042         lck_mtx_assert(&vm_page_queue_lock,
1043                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1044                                         : LCK_MTX_ASSERT_NOTOWNED);
1045 #endif  /* DEBUG */
1046
1047         if (insert_in_hash == TRUE) {
1048 #if DEBUG
1049                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1050                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1051                               "already in (obj=%p,off=0x%llx)",
1052                               mem, object, offset, mem->object, mem->offset);
1053 #endif
1054                 assert(!object->internal || offset < object->vo_size);
1055
1056                 /* only insert "pageout" pages into "pageout" objects,
1057                  * and normal pages into normal objects */
1058                 assert(object->pageout == mem->pageout);
1059
1060                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1061
1062                 /*
1063                  *      Record the object/offset pair in this page
1064                  */
1065
1066                 mem->object = object;
1067                 mem->offset = offset;
1068
1069                 /*
1070                  *      Insert it into the object_object/offset hash table
1071                  */
1072                 hash_id = vm_page_hash(object, offset);
1073                 bucket = &vm_page_buckets[hash_id];
1074                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1075
1076                 lck_spin_lock(bucket_lock);
1077
1078                 mem->next = bucket->pages;
1079                 bucket->pages = mem;
1080 #if     MACH_PAGE_HASH_STATS
1081                 if (++bucket->cur_count > bucket->hi_count)
1082                         bucket->hi_count = bucket->cur_count;
1083 #endif /* MACH_PAGE_HASH_STATS */
1084
1085                 lck_spin_unlock(bucket_lock);
1086         }
1087
1088         {
1089                 unsigned int    cache_attr;
1090
1091                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1092
1093                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1094                         PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1095                 }
1096         }
1097         /*
1098          *      Now link into the object's list of backed pages.
1099          */
1100         VM_PAGE_INSERT(mem, object);
1101         mem->tabled = TRUE;
1102
1103         /*
1104          *      Show that the object has one more resident page.
1105          */
1106
1107         object->resident_page_count++;
1108         if (VM_PAGE_WIRED(mem)) {
1109                 object->wired_page_count++;
1110         }
1111         assert(object->resident_page_count >= object->wired_page_count);
1112
1113         if (object->internal) {
1114                 OSAddAtomic(1, &vm_page_internal_count);
1115         } else {
1116                 OSAddAtomic(1, &vm_page_external_count);
1117         }
1118
1119         /*
1120          * It wouldn't make sense to insert a "reusable" page in
1121          * an object (the page would have been marked "reusable" only
1122          * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1123          * in the object at that time).
1124          * But a page could be inserted in a "all_reusable" object, if
1125          * something faults it in (a vm_read() from another task or a
1126          * "use-after-free" issue in user space, for example).  It can
1127          * also happen if we're relocating a page from that object to
1128          * a different physical page during a physically-contiguous
1129          * allocation.
1130          */
1131         assert(!mem->reusable);
1132         if (mem->object->all_reusable) {
1133                 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1134         }
1135
1136         if (object->purgable == VM_PURGABLE_VOLATILE) {
1137                 if (VM_PAGE_WIRED(mem)) {
1138                         OSAddAtomic(1, &vm_page_purgeable_wired_count);
1139                 } else {
1140                         OSAddAtomic(1, &vm_page_purgeable_count);
1141                 }
1142         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1143                    mem->throttled) {
1144                 /*
1145                  * This page belongs to a purged VM object but hasn't
1146                  * been purged (because it was "busy").
1147                  * It's in the "throttled" queue and hence not
1148                  * visible to vm_pageout_scan().  Move it to a pageable
1149                  * queue, so that it can eventually be reclaimed, instead
1150                  * of lingering in the "empty" object.
1151                  */
1152                 if (queues_lock_held == FALSE)
1153                         vm_page_lockspin_queues();
1154                 vm_page_deactivate(mem);
1155                 if (queues_lock_held == FALSE)
1156                         vm_page_unlock_queues();
1157         }
1158 }
1159
1160 /*
1161  *      vm_page_replace:
1162  *
1163  *      Exactly like vm_page_insert, except that we first
1164  *      remove any existing page at the given offset in object.
1165  *
1166  *      The object must be locked.
1167  */
1168 void
1169 vm_page_replace(
1170         register vm_page_t              mem,
1171         register vm_object_t            object,
1172         register vm_object_offset_t     offset)
1173 {
1174         vm_page_bucket_t *bucket;
1175         vm_page_t        found_m = VM_PAGE_NULL;
1176         lck_spin_t      *bucket_lock;
1177         int             hash_id;
1178
1179 #if 0
1180         /*
1181          * we don't hold the page queue lock
1182          * so this check isn't safe to make
1183          */
1184         VM_PAGE_CHECK(mem);
1185 #endif
1186         vm_object_lock_assert_exclusive(object);
1187 #if DEBUG
1188         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1189                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1190                       "already in (obj=%p,off=0x%llx)",
1191                       mem, object, offset, mem->object, mem->offset);
1192         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1193 #endif
1194         /*
1195          *      Record the object/offset pair in this page
1196          */
1197
1198         mem->object = object;
1199         mem->offset = offset;
1200
1201         /*
1202          *      Insert it into the object_object/offset hash table,
1203          *      replacing any page that might have been there.
1204          */
1205
1206         hash_id = vm_page_hash(object, offset);
1207         bucket = &vm_page_buckets[hash_id];
1208         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1209
1210         lck_spin_lock(bucket_lock);
1211
1212         if (bucket->pages) {
1213                 vm_page_t *mp = &bucket->pages;
1214                 vm_page_t m = *mp;
1215
1216                 do {
1217                         if (m->object == object && m->offset == offset) {
1218                                 /*
1219                                  * Remove old page from hash list
1220                                  */
1221                                 *mp = m->next;
1222
1223                                 found_m = m;
1224                                 break;
1225                         }
1226                         mp = &m->next;
1227                 } while ((m = *mp));
1228
1229                 mem->next = bucket->pages;
1230         } else {
1231                 mem->next = VM_PAGE_NULL;
1232         }
1233         /*
1234          * insert new page at head of hash list
1235          */
1236         bucket->pages = mem;
1237
1238         lck_spin_unlock(bucket_lock);
1239
1240         if (found_m) {
1241                 /*
1242                  * there was already a page at the specified
1243                  * offset for this object... remove it from
1244                  * the object and free it back to the free list
1245                  */
1246                 vm_page_free_unlocked(found_m, FALSE);
1247         }
1248         vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1249 }
1250
1251 /*
1252  *      vm_page_remove:         [ internal use only ]
1253  *
1254  *      Removes the given mem entry from the object/offset-page
1255  *      table and the object page list.
1256  *
1257  *      The object must be locked.
1258  */
1259
1260 void
1261 vm_page_remove(
1262         vm_page_t       mem,
1263         boolean_t       remove_from_hash)
1264 {
1265         vm_page_bucket_t *bucket;
1266         vm_page_t       this;
1267         lck_spin_t      *bucket_lock;
1268         int             hash_id;
1269
1270         XPR(XPR_VM_PAGE,
1271                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1272                 mem->object, mem->offset,
1273                 mem, 0,0);
1274
1275         vm_object_lock_assert_exclusive(mem->object);
1276         assert(mem->tabled);
1277         assert(!mem->cleaning);
1278         assert(!mem->laundry);
1279 #if 0
1280         /*
1281          * we don't hold the page queue lock
1282          * so this check isn't safe to make
1283          */
1284         VM_PAGE_CHECK(mem);
1285 #endif
1286         if (remove_from_hash == TRUE) {
1287                 /*
1288                  *      Remove from the object_object/offset hash table
1289                  */
1290                 hash_id = vm_page_hash(mem->object, mem->offset);
1291                 bucket = &vm_page_buckets[hash_id];
1292                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1293
1294                 lck_spin_lock(bucket_lock);
1295
1296                 if ((this = bucket->pages) == mem) {
1297                         /* optimize for common case */
1298
1299                         bucket->pages = mem->next;
1300                 } else {
1301                         vm_page_t       *prev;
1302
1303                         for (prev = &this->next;
1304                              (this = *prev) != mem;
1305                              prev = &this->next)
1306                                 continue;
1307                         *prev = this->next;
1308                 }
1309 #if     MACH_PAGE_HASH_STATS
1310                 bucket->cur_count--;
1311 #endif /* MACH_PAGE_HASH_STATS */
1312
1313                 lck_spin_unlock(bucket_lock);
1314         }
1315         /*
1316          *      Now remove from the object's list of backed pages.
1317          */
1318
1319         VM_PAGE_REMOVE(mem);
1320
1321         /*
1322          *      And show that the object has one fewer resident
1323          *      page.
1324          */
1325
1326         assert(mem->object->resident_page_count > 0);
1327         mem->object->resident_page_count--;
1328
1329         if (mem->object->internal) {
1330                 assert(vm_page_internal_count);
1331                 OSAddAtomic(-1, &vm_page_internal_count);
1332         } else {
1333                 assert(vm_page_external_count);
1334                 OSAddAtomic(-1, &vm_page_external_count);
1335         }
1336         if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1337                 if (mem->object->resident_page_count == 0)
1338                         vm_object_cache_remove(mem->object);
1339         }
1340
1341         if (VM_PAGE_WIRED(mem)) {
1342                 assert(mem->object->wired_page_count > 0);
1343                 mem->object->wired_page_count--;
1344         }
1345         assert(mem->object->resident_page_count >=
1346                mem->object->wired_page_count);
1347         if (mem->reusable) {
1348                 assert(mem->object->reusable_page_count > 0);
1349                 mem->object->reusable_page_count--;
1350                 assert(mem->object->reusable_page_count <=
1351                        mem->object->resident_page_count);
1352                 mem->reusable = FALSE;
1353                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1354                 vm_page_stats_reusable.reused_remove++;
1355         } else if (mem->object->all_reusable) {
1356                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1357                 vm_page_stats_reusable.reused_remove++;
1358         }
1359
1360         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1361                 if (VM_PAGE_WIRED(mem)) {
1362                         assert(vm_page_purgeable_wired_count > 0);
1363                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1364                 } else {
1365                         assert(vm_page_purgeable_count > 0);
1366                         OSAddAtomic(-1, &vm_page_purgeable_count);
1367                 }
1368         }
1369         if (mem->object->set_cache_attr == TRUE)
1370                 pmap_set_cache_attributes(mem->phys_page, 0);
1371
1372         mem->tabled = FALSE;
1373         mem->object = VM_OBJECT_NULL;
1374         mem->offset = (vm_object_offset_t) -1;
1375 }
1376
1377
1378 /*
1379  *      vm_page_lookup:
1380  *
1381  *      Returns the page associated with the object/offset
1382  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1383  *
1384  *      The object must be locked.  No side effects.
1385  */
1386
1387 unsigned long vm_page_lookup_hint = 0;
1388 unsigned long vm_page_lookup_hint_next = 0;
1389 unsigned long vm_page_lookup_hint_prev = 0;
1390 unsigned long vm_page_lookup_hint_miss = 0;
1391 unsigned long vm_page_lookup_bucket_NULL = 0;
1392 unsigned long vm_page_lookup_miss = 0;
1393
1394
1395 vm_page_t
1396 vm_page_lookup(
1397         vm_object_t             object,
1398         vm_object_offset_t      offset)
1399 {
1400         vm_page_t       mem;
1401         vm_page_bucket_t *bucket;
1402         queue_entry_t   qe;
1403         lck_spin_t      *bucket_lock;
1404         int             hash_id;
1405
1406         vm_object_lock_assert_held(object);
1407         mem = object->memq_hint;
1408
1409         if (mem != VM_PAGE_NULL) {
1410                 assert(mem->object == object);
1411
1412                 if (mem->offset == offset) {
1413                         vm_page_lookup_hint++;
1414                         return mem;
1415                 }
1416                 qe = queue_next(&mem->listq);
1417
1418                 if (! queue_end(&object->memq, qe)) {
1419                         vm_page_t       next_page;
1420
1421                         next_page = (vm_page_t) qe;
1422                         assert(next_page->object == object);
1423
1424                         if (next_page->offset == offset) {
1425                                 vm_page_lookup_hint_next++;
1426                                 object->memq_hint = next_page; /* new hint */
1427                                 return next_page;
1428                         }
1429                 }
1430                 qe = queue_prev(&mem->listq);
1431
1432                 if (! queue_end(&object->memq, qe)) {
1433                         vm_page_t prev_page;
1434
1435                         prev_page = (vm_page_t) qe;
1436                         assert(prev_page->object == object);
1437
1438                         if (prev_page->offset == offset) {
1439                                 vm_page_lookup_hint_prev++;
1440                                 object->memq_hint = prev_page; /* new hint */
1441                                 return prev_page;
1442                         }
1443                 }
1444         }
1445         /*
1446          * Search the hash table for this object/offset pair
1447          */
1448         hash_id = vm_page_hash(object, offset);
1449         bucket = &vm_page_buckets[hash_id];
1450
1451         /*
1452          * since we hold the object lock, we are guaranteed that no
1453          * new pages can be inserted into this object... this in turn
1454          * guarantess that the page we're looking for can't exist
1455          * if the bucket it hashes to is currently NULL even when looked
1456          * at outside the scope of the hash bucket lock... this is a
1457          * really cheap optimiztion to avoid taking the lock
1458          */
1459         if (bucket->pages == VM_PAGE_NULL) {
1460                 vm_page_lookup_bucket_NULL++;
1461
1462                 return (VM_PAGE_NULL);
1463         }
1464         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1465
1466         lck_spin_lock(bucket_lock);
1467
1468         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1469 #if 0
1470                 /*
1471                  * we don't hold the page queue lock
1472                  * so this check isn't safe to make
1473                  */
1474                 VM_PAGE_CHECK(mem);
1475 #endif
1476                 if ((mem->object == object) && (mem->offset == offset))
1477                         break;
1478         }
1479         lck_spin_unlock(bucket_lock);
1480
1481         if (mem != VM_PAGE_NULL) {
1482                 if (object->memq_hint != VM_PAGE_NULL) {
1483                         vm_page_lookup_hint_miss++;
1484                 }
1485                 assert(mem->object == object);
1486                 object->memq_hint = mem;
1487         } else
1488                 vm_page_lookup_miss++;
1489
1490         return(mem);
1491 }
1492
1493
1494 /*
1495  *      vm_page_rename:
1496  *
1497  *      Move the given memory entry from its
1498  *      current object to the specified target object/offset.
1499  *
1500  *      The object must be locked.
1501  */
1502 void
1503 vm_page_rename(
1504         register vm_page_t              mem,
1505         register vm_object_t            new_object,
1506         vm_object_offset_t              new_offset,
1507         boolean_t                       encrypted_ok)
1508 {
1509         boolean_t       internal_to_external, external_to_internal;
1510
1511         assert(mem->object != new_object);
1512
1513         /*
1514          * ENCRYPTED SWAP:
1515          * The encryption key is based on the page's memory object
1516          * (aka "pager") and paging offset.  Moving the page to
1517          * another VM object changes its "pager" and "paging_offset"
1518          * so it has to be decrypted first, or we would lose the key.
1519          *
1520          * One exception is VM object collapsing, where we transfer pages
1521          * from one backing object to its parent object.  This operation also
1522          * transfers the paging information, so the <pager,paging_offset> info
1523          * should remain consistent.  The caller (vm_object_do_collapse())
1524          * sets "encrypted_ok" in this case.
1525          */
1526         if (!encrypted_ok && mem->encrypted) {
1527                 panic("vm_page_rename: page %p is encrypted\n", mem);
1528         }
1529
1530         XPR(XPR_VM_PAGE,
1531                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1532                 new_object, new_offset,
1533                 mem, 0,0);
1534
1535         /*
1536          *      Changes to mem->object require the page lock because
1537          *      the pageout daemon uses that lock to get the object.
1538          */
1539         vm_page_lockspin_queues();
1540
1541         internal_to_external = FALSE;
1542         external_to_internal = FALSE;
1543
1544         if (mem->local) {
1545                 /*
1546                  * it's much easier to get the vm_page_pageable_xxx accounting correct
1547                  * if we first move the page to the active queue... it's going to end
1548                  * up there anyway, and we don't do vm_page_rename's frequently enough
1549                  * for this to matter.
1550                  */
1551                 VM_PAGE_QUEUES_REMOVE(mem);
1552                 vm_page_activate(mem);
1553         }
1554         if (mem->active || mem->inactive || mem->speculative) {
1555                 if (mem->object->internal && !new_object->internal) {
1556                         internal_to_external = TRUE;
1557                 }
1558                 if (!mem->object->internal && new_object->internal) {
1559                         external_to_internal = TRUE;
1560                 }
1561         }
1562
1563         vm_page_remove(mem, TRUE);
1564         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1565
1566         if (internal_to_external) {
1567                 vm_page_pageable_internal_count--;
1568                 vm_page_pageable_external_count++;
1569         } else if (external_to_internal) {
1570                 vm_page_pageable_external_count--;
1571                 vm_page_pageable_internal_count++;
1572         }
1573
1574         vm_page_unlock_queues();
1575 }
1576
1577 /*
1578  *      vm_page_init:
1579  *
1580  *      Initialize the fields in a new page.
1581  *      This takes a structure with random values and initializes it
1582  *      so that it can be given to vm_page_release or vm_page_insert.
1583  */
1584 void
1585 vm_page_init(
1586         vm_page_t       mem,
1587         ppnum_t         phys_page,
1588         boolean_t       lopage)
1589 {
1590         assert(phys_page);
1591
1592 #if     DEBUG
1593         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1594                 if (!(pmap_valid_page(phys_page))) {
1595                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1596                 }
1597         }
1598 #endif
1599         *mem = vm_page_template;
1600         mem->phys_page = phys_page;
1601 #if 0
1602         /*
1603          * we're leaving this turned off for now... currently pages
1604          * come off the free list and are either immediately dirtied/referenced
1605          * due to zero-fill or COW faults, or are used to read or write files...
1606          * in the file I/O case, the UPL mechanism takes care of clearing
1607          * the state of the HW ref/mod bits in a somewhat fragile way.
1608          * Since we may change the way this works in the future (to toughen it up),
1609          * I'm leaving this as a reminder of where these bits could get cleared
1610          */
1611
1612         /*
1613          * make sure both the h/w referenced and modified bits are
1614          * clear at this point... we are especially dependent on
1615          * not finding a 'stale' h/w modified in a number of spots
1616          * once this page goes back into use
1617          */
1618         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1619 #endif
1620         mem->lopage = lopage;
1621 }
1622
1623 /*
1624  *      vm_page_grab_fictitious:
1625  *
1626  *      Remove a fictitious page from the free list.
1627  *      Returns VM_PAGE_NULL if there are no free pages.
1628  */
1629 int     c_vm_page_grab_fictitious = 0;
1630 int     c_vm_page_grab_fictitious_failed = 0;
1631 int     c_vm_page_release_fictitious = 0;
1632 int     c_vm_page_more_fictitious = 0;
1633
1634 vm_page_t
1635 vm_page_grab_fictitious_common(
1636         ppnum_t phys_addr)
1637 {
1638         vm_page_t       m;
1639
1640         if ((m = (vm_page_t)zget(vm_page_zone))) {
1641
1642                 vm_page_init(m, phys_addr, FALSE);
1643                 m->fictitious = TRUE;
1644
1645                 c_vm_page_grab_fictitious++;
1646         } else
1647                 c_vm_page_grab_fictitious_failed++;
1648
1649         return m;
1650 }
1651
1652 vm_page_t
1653 vm_page_grab_fictitious(void)
1654 {
1655         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1656 }
1657
1658 vm_page_t
1659 vm_page_grab_guard(void)
1660 {
1661         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1662 }
1663
1664
1665 /*
1666  *      vm_page_release_fictitious:
1667  *
1668  *      Release a fictitious page to the zone pool
1669  */
1670 void
1671 vm_page_release_fictitious(
1672         vm_page_t m)
1673 {
1674         assert(!m->free);
1675         assert(m->fictitious);
1676         assert(m->phys_page == vm_page_fictitious_addr ||
1677                m->phys_page == vm_page_guard_addr);
1678
1679         c_vm_page_release_fictitious++;
1680
1681         zfree(vm_page_zone, m);
1682 }
1683
1684 /*
1685  *      vm_page_more_fictitious:
1686  *
1687  *      Add more fictitious pages to the zone.
1688  *      Allowed to block. This routine is way intimate
1689  *      with the zones code, for several reasons:
1690  *      1. we need to carve some page structures out of physical
1691  *         memory before zones work, so they _cannot_ come from
1692  *         the zone_map.
1693  *      2. the zone needs to be collectable in order to prevent
1694  *         growth without bound. These structures are used by
1695  *         the device pager (by the hundreds and thousands), as
1696  *         private pages for pageout, and as blocking pages for
1697  *         pagein. Temporary bursts in demand should not result in
1698  *         permanent allocation of a resource.
1699  *      3. To smooth allocation humps, we allocate single pages
1700  *         with kernel_memory_allocate(), and cram them into the
1701  *         zone.
1702  */
1703
1704 void vm_page_more_fictitious(void)
1705 {
1706         vm_offset_t     addr;
1707         kern_return_t   retval;
1708
1709         c_vm_page_more_fictitious++;
1710
1711         /*
1712          * Allocate a single page from the zone_map. Do not wait if no physical
1713          * pages are immediately available, and do not zero the space. We need
1714          * our own blocking lock here to prevent having multiple,
1715          * simultaneous requests from piling up on the zone_map lock. Exactly
1716          * one (of our) threads should be potentially waiting on the map lock.
1717          * If winner is not vm-privileged, then the page allocation will fail,
1718          * and it will temporarily block here in the vm_page_wait().
1719          */
1720         lck_mtx_lock(&vm_page_alloc_lock);
1721         /*
1722          * If another thread allocated space, just bail out now.
1723          */
1724         if (zone_free_count(vm_page_zone) > 5) {
1725                 /*
1726                  * The number "5" is a small number that is larger than the
1727                  * number of fictitious pages that any single caller will
1728                  * attempt to allocate. Otherwise, a thread will attempt to
1729                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1730                  * release all of the resources and locks already acquired,
1731                  * and then call this routine. This routine finds the pages
1732                  * that the caller released, so fails to allocate new space.
1733                  * The process repeats infinitely. The largest known number
1734                  * of fictitious pages required in this manner is 2. 5 is
1735                  * simply a somewhat larger number.
1736                  */
1737                 lck_mtx_unlock(&vm_page_alloc_lock);
1738                 return;
1739         }
1740
1741         retval = kernel_memory_allocate(zone_map,
1742                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1743                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1744         if (retval != KERN_SUCCESS) {
1745                 /*
1746                  * No page was available. Drop the
1747                  * lock to give another thread a chance at it, and
1748                  * wait for the pageout daemon to make progress.
1749                  */
1750                 lck_mtx_unlock(&vm_page_alloc_lock);
1751                 vm_page_wait(THREAD_UNINT);
1752                 return;
1753         }
1754
1755         /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1756         OSAddAtomic64(1, &(vm_page_zone->page_count));
1757
1758         zcram(vm_page_zone, addr, PAGE_SIZE);
1759
1760         lck_mtx_unlock(&vm_page_alloc_lock);
1761 }
1762
1763
1764 /*
1765  *      vm_pool_low():
1766  *
1767  *      Return true if it is not likely that a non-vm_privileged thread
1768  *      can get memory without blocking.  Advisory only, since the
1769  *      situation may change under us.
1770  */
1771 int
1772 vm_pool_low(void)
1773 {
1774         /* No locking, at worst we will fib. */
1775         return( vm_page_free_count <= vm_page_free_reserved );
1776 }
1777
1778
1779
1780 /*
1781  * this is an interface to support bring-up of drivers
1782  * on platforms with physical memory > 4G...
1783  */
1784 int             vm_himemory_mode = 0;
1785
1786
1787 /*
1788  * this interface exists to support hardware controllers
1789  * incapable of generating DMAs with more than 32 bits
1790  * of address on platforms with physical memory > 4G...
1791  */
1792 unsigned int    vm_lopages_allocated_q = 0;
1793 unsigned int    vm_lopages_allocated_cpm_success = 0;
1794 unsigned int    vm_lopages_allocated_cpm_failed = 0;
1795 queue_head_t    vm_lopage_queue_free;
1796
1797 vm_page_t
1798 vm_page_grablo(void)
1799 {
1800         vm_page_t       mem;
1801
1802         if (vm_lopage_needed == FALSE)
1803                 return (vm_page_grab());
1804
1805         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1806
1807         if ( !queue_empty(&vm_lopage_queue_free)) {
1808                 queue_remove_first(&vm_lopage_queue_free,
1809                                    mem,
1810                                    vm_page_t,
1811                                    pageq);
1812                 assert(vm_lopage_free_count);
1813
1814                 vm_lopage_free_count--;
1815                 vm_lopages_allocated_q++;
1816
1817                 if (vm_lopage_free_count < vm_lopage_lowater)
1818                         vm_lopage_refill = TRUE;
1819
1820                 lck_mtx_unlock(&vm_page_queue_free_lock);
1821         } else {
1822                 lck_mtx_unlock(&vm_page_queue_free_lock);
1823
1824                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1825
1826                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1827                         vm_lopages_allocated_cpm_failed++;
1828                         lck_mtx_unlock(&vm_page_queue_free_lock);
1829
1830                         return (VM_PAGE_NULL);
1831                 }
1832                 mem->busy = TRUE;
1833
1834                 vm_page_lockspin_queues();
1835
1836                 mem->gobbled = FALSE;
1837                 vm_page_gobble_count--;
1838                 vm_page_wire_count--;
1839
1840                 vm_lopages_allocated_cpm_success++;
1841                 vm_page_unlock_queues();
1842         }
1843         assert(mem->busy);
1844         assert(!mem->free);
1845         assert(!mem->pmapped);
1846         assert(!mem->wpmapped);
1847         assert(!pmap_is_noencrypt(mem->phys_page));
1848
1849         mem->pageq.next = NULL;
1850         mem->pageq.prev = NULL;
1851
1852         return (mem);
1853 }
1854
1855
1856 /*
1857  *      vm_page_grab:
1858  *
1859  *      first try to grab a page from the per-cpu free list...
1860  *      this must be done while pre-emption is disabled... if
1861  *      a page is available, we're done...
1862  *      if no page is available, grab the vm_page_queue_free_lock
1863  *      and see if current number of free pages would allow us
1864  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
1865  *      if there are pages available, disable preemption and
1866  *      recheck the state of the per-cpu free list... we could
1867  *      have been preempted and moved to a different cpu, or
1868  *      some other thread could have re-filled it... if still
1869  *      empty, figure out how many pages we can steal from the
1870  *      global free queue and move to the per-cpu queue...
1871  *      return 1 of these pages when done... only wakeup the
1872  *      pageout_scan thread if we moved pages from the global
1873  *      list... no need for the wakeup if we've satisfied the
1874  *      request from the per-cpu queue.
1875  */
1876
1877 #define COLOR_GROUPS_TO_STEAL   4
1878
1879
1880 vm_page_t
1881 vm_page_grab( void )
1882 {
1883         vm_page_t       mem;
1884
1885
1886         disable_preemption();
1887
1888         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1889 return_page_from_cpu_list:
1890                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1891                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1892                 mem->pageq.next = NULL;
1893
1894                 enable_preemption();
1895
1896                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1897                 assert(mem->tabled == FALSE);
1898                 assert(mem->object == VM_OBJECT_NULL);
1899                 assert(!mem->laundry);
1900                 assert(!mem->free);
1901                 assert(pmap_verify_free(mem->phys_page));
1902                 assert(mem->busy);
1903                 assert(!mem->encrypted);
1904                 assert(!mem->pmapped);
1905                 assert(!mem->wpmapped);
1906                 assert(!mem->active);
1907                 assert(!mem->inactive);
1908                 assert(!mem->throttled);
1909                 assert(!mem->speculative);
1910                 assert(!pmap_is_noencrypt(mem->phys_page));
1911
1912                 return mem;
1913         }
1914         enable_preemption();
1915
1916
1917         /*
1918          *      Optionally produce warnings if the wire or gobble
1919          *      counts exceed some threshold.
1920          */
1921         if (vm_page_wire_count_warning > 0
1922             && vm_page_wire_count >= vm_page_wire_count_warning) {
1923                 printf("mk: vm_page_grab(): high wired page count of %d\n",
1924                         vm_page_wire_count);
1925                 assert(vm_page_wire_count < vm_page_wire_count_warning);
1926         }
1927         if (vm_page_gobble_count_warning > 0
1928             && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1929                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1930                         vm_page_gobble_count);
1931                 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1932         }
1933
1934         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1935
1936         /*
1937          *      Only let privileged threads (involved in pageout)
1938          *      dip into the reserved pool.
1939          */
1940         if ((vm_page_free_count < vm_page_free_reserved) &&
1941             !(current_thread()->options & TH_OPT_VMPRIV)) {
1942                 lck_mtx_unlock(&vm_page_queue_free_lock);
1943                 mem = VM_PAGE_NULL;
1944         }
1945         else {
1946                vm_page_t        head;
1947                vm_page_t        tail;
1948                unsigned int     pages_to_steal;
1949                unsigned int     color;
1950
1951                while ( vm_page_free_count == 0 ) {
1952
1953                         lck_mtx_unlock(&vm_page_queue_free_lock);
1954                         /*
1955                          * must be a privileged thread to be
1956                          * in this state since a non-privileged
1957                          * thread would have bailed if we were
1958                          * under the vm_page_free_reserved mark
1959                          */
1960                         VM_PAGE_WAIT();
1961                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1962                 }
1963
1964                 disable_preemption();
1965
1966                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1967                         lck_mtx_unlock(&vm_page_queue_free_lock);
1968
1969                         /*
1970                          * we got preempted and moved to another processor
1971                          * or we got preempted and someone else ran and filled the cache
1972                          */
1973                         goto return_page_from_cpu_list;
1974                 }
1975                 if (vm_page_free_count <= vm_page_free_reserved)
1976                         pages_to_steal = 1;
1977                 else {
1978                         pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1979
1980                         if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1981                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1982                 }
1983                 color = PROCESSOR_DATA(current_processor(), start_color);
1984                 head = tail = NULL;
1985
1986                 while (pages_to_steal--) {
1987                         if (--vm_page_free_count < vm_page_free_count_minimum)
1988                                 vm_page_free_count_minimum = vm_page_free_count;
1989
1990                         while (queue_empty(&vm_page_queue_free[color]))
1991                                 color = (color + 1) & vm_color_mask;
1992
1993                         queue_remove_first(&vm_page_queue_free[color],
1994                                            mem,
1995                                            vm_page_t,
1996                                            pageq);
1997                         mem->pageq.next = NULL;
1998                         mem->pageq.prev = NULL;
1999
2000                         assert(!mem->active);
2001                         assert(!mem->inactive);
2002                         assert(!mem->throttled);
2003                         assert(!mem->speculative);
2004
2005                         color = (color + 1) & vm_color_mask;
2006
2007                         if (head == NULL)
2008                                 head = mem;
2009                         else
2010                                 tail->pageq.next = (queue_t)mem;
2011                         tail = mem;
2012
2013                         mem->pageq.prev = NULL;
2014                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2015                         assert(mem->tabled == FALSE);
2016                         assert(mem->object == VM_OBJECT_NULL);
2017                         assert(!mem->laundry);
2018                         assert(mem->free);
2019                         mem->free = FALSE;
2020
2021                         assert(pmap_verify_free(mem->phys_page));
2022                         assert(mem->busy);
2023                         assert(!mem->free);
2024                         assert(!mem->encrypted);
2025                         assert(!mem->pmapped);
2026                         assert(!mem->wpmapped);
2027                         assert(!pmap_is_noencrypt(mem->phys_page));
2028                 }
2029                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2030                 PROCESSOR_DATA(current_processor(), start_color) = color;
2031
2032                 /*
2033                  * satisfy this request
2034                  */
2035                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2036                 mem = head;
2037                 mem->pageq.next = NULL;
2038
2039                 lck_mtx_unlock(&vm_page_queue_free_lock);
2040
2041                 enable_preemption();
2042         }
2043         /*
2044          *      Decide if we should poke the pageout daemon.
2045          *      We do this if the free count is less than the low
2046          *      water mark, or if the free count is less than the high
2047          *      water mark (but above the low water mark) and the inactive
2048          *      count is less than its target.
2049          *
2050          *      We don't have the counts locked ... if they change a little,
2051          *      it doesn't really matter.
2052          */
2053         if ((vm_page_free_count < vm_page_free_min) ||
2054              ((vm_page_free_count < vm_page_free_target) &&
2055               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2056                  thread_wakeup((event_t) &vm_page_free_wanted);
2057
2058         VM_CHECK_MEMORYSTATUS;
2059
2060 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
2061
2062         return mem;
2063 }
2064
2065 /*
2066  *      vm_page_release:
2067  *
2068  *      Return a page to the free list.
2069  */
2070
2071 void
2072 vm_page_release(
2073         register vm_page_t      mem)
2074 {
2075         unsigned int    color;
2076         int     need_wakeup = 0;
2077         int     need_priv_wakeup = 0;
2078
2079
2080         assert(!mem->private && !mem->fictitious);
2081         if (vm_page_free_verify) {
2082                 assert(pmap_verify_free(mem->phys_page));
2083         }
2084 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
2085
2086         pmap_clear_noencrypt(mem->phys_page);
2087
2088         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2089 #if DEBUG
2090         if (mem->free)
2091                 panic("vm_page_release");
2092 #endif
2093
2094         assert(mem->busy);
2095         assert(!mem->laundry);
2096         assert(mem->object == VM_OBJECT_NULL);
2097         assert(mem->pageq.next == NULL &&
2098                mem->pageq.prev == NULL);
2099         assert(mem->listq.next == NULL &&
2100                mem->listq.prev == NULL);
2101
2102         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2103             vm_lopage_free_count < vm_lopage_free_limit &&
2104             mem->phys_page < max_valid_low_ppnum) {
2105                 /*
2106                  * this exists to support hardware controllers
2107                  * incapable of generating DMAs with more than 32 bits
2108                  * of address on platforms with physical memory > 4G...
2109                  */
2110                 queue_enter_first(&vm_lopage_queue_free,
2111                                   mem,
2112                                   vm_page_t,
2113                                   pageq);
2114                 vm_lopage_free_count++;
2115
2116                 if (vm_lopage_free_count >= vm_lopage_free_limit)
2117                         vm_lopage_refill = FALSE;
2118
2119                 mem->lopage = TRUE;
2120         } else {
2121                 mem->lopage = FALSE;
2122                 mem->free = TRUE;
2123
2124                 color = mem->phys_page & vm_color_mask;
2125                 queue_enter_first(&vm_page_queue_free[color],
2126                                   mem,
2127                                   vm_page_t,
2128                                   pageq);
2129                 vm_page_free_count++;
2130                 /*
2131                  *      Check if we should wake up someone waiting for page.
2132                  *      But don't bother waking them unless they can allocate.
2133                  *
2134                  *      We wakeup only one thread, to prevent starvation.
2135                  *      Because the scheduling system handles wait queues FIFO,
2136                  *      if we wakeup all waiting threads, one greedy thread
2137                  *      can starve multiple niceguy threads.  When the threads
2138                  *      all wakeup, the greedy threads runs first, grabs the page,
2139                  *      and waits for another page.  It will be the first to run
2140                  *      when the next page is freed.
2141                  *
2142                  *      However, there is a slight danger here.
2143                  *      The thread we wake might not use the free page.
2144                  *      Then the other threads could wait indefinitely
2145                  *      while the page goes unused.  To forestall this,
2146                  *      the pageout daemon will keep making free pages
2147                  *      as long as vm_page_free_wanted is non-zero.
2148                  */
2149
2150                 assert(vm_page_free_count > 0);
2151                 if (vm_page_free_wanted_privileged > 0) {
2152                         vm_page_free_wanted_privileged--;
2153                         need_priv_wakeup = 1;
2154                 } else if (vm_page_free_wanted > 0 &&
2155                            vm_page_free_count > vm_page_free_reserved) {
2156                         vm_page_free_wanted--;
2157                         need_wakeup = 1;
2158                 }
2159         }
2160         lck_mtx_unlock(&vm_page_queue_free_lock);
2161
2162         if (need_priv_wakeup)
2163                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2164         else if (need_wakeup)
2165                 thread_wakeup_one((event_t) &vm_page_free_count);
2166
2167         VM_CHECK_MEMORYSTATUS;
2168 }
2169
2170 /*
2171  *      vm_page_wait:
2172  *
2173  *      Wait for a page to become available.
2174  *      If there are plenty of free pages, then we don't sleep.
2175  *
2176  *      Returns:
2177  *              TRUE:  There may be another page, try again
2178  *              FALSE: We were interrupted out of our wait, don't try again
2179  */
2180
2181 boolean_t
2182 vm_page_wait(
2183         int     interruptible )
2184 {
2185         /*
2186          *      We can't use vm_page_free_reserved to make this
2187          *      determination.  Consider: some thread might
2188          *      need to allocate two pages.  The first allocation
2189          *      succeeds, the second fails.  After the first page is freed,
2190          *      a call to vm_page_wait must really block.
2191          */
2192         kern_return_t   wait_result;
2193         int             need_wakeup = 0;
2194         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2195
2196         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2197
2198         if (is_privileged && vm_page_free_count) {
2199                 lck_mtx_unlock(&vm_page_queue_free_lock);
2200                 return TRUE;
2201         }
2202         if (vm_page_free_count < vm_page_free_target) {
2203
2204                 if (is_privileged) {
2205                         if (vm_page_free_wanted_privileged++ == 0)
2206                                 need_wakeup = 1;
2207                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2208                 } else {
2209                         if (vm_page_free_wanted++ == 0)
2210                                 need_wakeup = 1;
2211                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2212                 }
2213                 lck_mtx_unlock(&vm_page_queue_free_lock);
2214                 counter(c_vm_page_wait_block++);
2215
2216                 if (need_wakeup)
2217                         thread_wakeup((event_t)&vm_page_free_wanted);
2218
2219                 if (wait_result == THREAD_WAITING) {
2220                         VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2221                                        vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2222                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2223                         VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2224                 }
2225
2226                 return(wait_result == THREAD_AWAKENED);
2227         } else {
2228                 lck_mtx_unlock(&vm_page_queue_free_lock);
2229                 return TRUE;
2230         }
2231 }
2232
2233 /*
2234  *      vm_page_alloc:
2235  *
2236  *      Allocate and return a memory cell associated
2237  *      with this VM object/offset pair.
2238  *
2239  *      Object must be locked.
2240  */
2241
2242 vm_page_t
2243 vm_page_alloc(
2244         vm_object_t             object,
2245         vm_object_offset_t      offset)
2246 {
2247         register vm_page_t      mem;
2248
2249         vm_object_lock_assert_exclusive(object);
2250         mem = vm_page_grab();
2251         if (mem == VM_PAGE_NULL)
2252                 return VM_PAGE_NULL;
2253
2254         vm_page_insert(mem, object, offset);
2255
2256         return(mem);
2257 }
2258
2259 vm_page_t
2260 vm_page_alloclo(
2261         vm_object_t             object,
2262         vm_object_offset_t      offset)
2263 {
2264         register vm_page_t      mem;
2265
2266         vm_object_lock_assert_exclusive(object);
2267         mem = vm_page_grablo();
2268         if (mem == VM_PAGE_NULL)
2269                 return VM_PAGE_NULL;
2270
2271         vm_page_insert(mem, object, offset);
2272
2273         return(mem);
2274 }
2275
2276
2277 /*
2278  *      vm_page_alloc_guard:
2279  *
2280  *      Allocate a fictitious page which will be used
2281  *      as a guard page.  The page will be inserted into
2282  *      the object and returned to the caller.
2283  */
2284
2285 vm_page_t
2286 vm_page_alloc_guard(
2287         vm_object_t             object,
2288         vm_object_offset_t      offset)
2289 {
2290         register vm_page_t      mem;
2291
2292         vm_object_lock_assert_exclusive(object);
2293         mem = vm_page_grab_guard();
2294         if (mem == VM_PAGE_NULL)
2295                 return VM_PAGE_NULL;
2296
2297         vm_page_insert(mem, object, offset);
2298
2299         return(mem);
2300 }
2301
2302
2303 counter(unsigned int c_laundry_pages_freed = 0;)
2304
2305 /*
2306  *      vm_page_free_prepare:
2307  *
2308  *      Removes page from any queue it may be on
2309  *      and disassociates it from its VM object.
2310  *
2311  *      Object and page queues must be locked prior to entry.
2312  */
2313 static void
2314 vm_page_free_prepare(
2315         vm_page_t       mem)
2316 {
2317         vm_page_free_prepare_queues(mem);
2318         vm_page_free_prepare_object(mem, TRUE);
2319 }
2320
2321
2322 void
2323 vm_page_free_prepare_queues(
2324         vm_page_t       mem)
2325 {
2326         VM_PAGE_CHECK(mem);
2327         assert(!mem->free);
2328         assert(!mem->cleaning);
2329 #if DEBUG
2330         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2331         if (mem->free)
2332                 panic("vm_page_free: freeing page on free list\n");
2333 #endif
2334         if (mem->object) {
2335                 vm_object_lock_assert_exclusive(mem->object);
2336         }
2337         if (mem->laundry) {
2338                 /*
2339                  * We may have to free a page while it's being laundered
2340                  * if we lost its pager (due to a forced unmount, for example).
2341                  * We need to call vm_pageout_steal_laundry() before removing
2342                  * the page from its VM object, so that we can remove it
2343                  * from its pageout queue and adjust the laundry accounting
2344                  */
2345                 vm_pageout_steal_laundry(mem, TRUE);
2346                 counter(++c_laundry_pages_freed);
2347         }
2348
2349         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2350
2351         if (VM_PAGE_WIRED(mem)) {
2352                 if (mem->object) {
2353                         assert(mem->object->wired_page_count > 0);
2354                         mem->object->wired_page_count--;
2355                         assert(mem->object->resident_page_count >=
2356                                mem->object->wired_page_count);
2357
2358                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2359                                 OSAddAtomic(+1, &vm_page_purgeable_count);
2360                                 assert(vm_page_purgeable_wired_count > 0);
2361                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2362                         }
2363                 }
2364                 if (!mem->private && !mem->fictitious)
2365                         vm_page_wire_count--;
2366                 mem->wire_count = 0;
2367                 assert(!mem->gobbled);
2368         } else if (mem->gobbled) {
2369                 if (!mem->private && !mem->fictitious)
2370                         vm_page_wire_count--;
2371                 vm_page_gobble_count--;
2372         }
2373 }
2374
2375
2376 void
2377 vm_page_free_prepare_object(
2378         vm_page_t       mem,
2379         boolean_t       remove_from_hash)
2380 {
2381         if (mem->tabled)
2382                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2383
2384         PAGE_WAKEUP(mem);               /* clears wanted */
2385
2386         if (mem->private) {
2387                 mem->private = FALSE;
2388                 mem->fictitious = TRUE;
2389                 mem->phys_page = vm_page_fictitious_addr;
2390         }
2391         if ( !mem->fictitious) {
2392                 vm_page_init(mem, mem->phys_page, mem->lopage);
2393         }
2394 }
2395
2396
2397 /*
2398  *      vm_page_free:
2399  *
2400  *      Returns the given page to the free list,
2401  *      disassociating it with any VM object.
2402  *
2403  *      Object and page queues must be locked prior to entry.
2404  */
2405 void
2406 vm_page_free(
2407         vm_page_t       mem)
2408 {
2409         vm_page_free_prepare(mem);
2410
2411         if (mem->fictitious) {
2412                 vm_page_release_fictitious(mem);
2413         } else {
2414                 vm_page_release(mem);
2415         }
2416 }
2417
2418
2419 void
2420 vm_page_free_unlocked(
2421         vm_page_t       mem,
2422         boolean_t       remove_from_hash)
2423 {
2424         vm_page_lockspin_queues();
2425         vm_page_free_prepare_queues(mem);
2426         vm_page_unlock_queues();
2427
2428         vm_page_free_prepare_object(mem, remove_from_hash);
2429
2430         if (mem->fictitious) {
2431                 vm_page_release_fictitious(mem);
2432         } else {
2433                 vm_page_release(mem);
2434         }
2435 }
2436
2437
2438 /*
2439  * Free a list of pages.  The list can be up to several hundred pages,
2440  * as blocked up by vm_pageout_scan().
2441  * The big win is not having to take the free list lock once
2442  * per page.
2443  */
2444 void
2445 vm_page_free_list(
2446         vm_page_t       freeq,
2447         boolean_t       prepare_object)
2448 {
2449         vm_page_t       mem;
2450         vm_page_t       nxt;
2451         vm_page_t       local_freeq;
2452         int             pg_count;
2453
2454         while (freeq) {
2455
2456                 pg_count = 0;
2457                 local_freeq = VM_PAGE_NULL;
2458                 mem = freeq;
2459
2460                 /*
2461                  * break up the processing into smaller chunks so
2462                  * that we can 'pipeline' the pages onto the
2463                  * free list w/o introducing too much
2464                  * contention on the global free queue lock
2465                  */
2466                 while (mem && pg_count < 64) {
2467
2468                         assert(!mem->inactive);
2469                         assert(!mem->active);
2470                         assert(!mem->throttled);
2471                         assert(!mem->free);
2472                         assert(!mem->speculative);
2473                         assert(!VM_PAGE_WIRED(mem));
2474                         assert(mem->pageq.prev == NULL);
2475
2476                         nxt = (vm_page_t)(mem->pageq.next);
2477
2478                         if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2479                                 assert(pmap_verify_free(mem->phys_page));
2480                         }
2481                         if (prepare_object == TRUE)
2482                                 vm_page_free_prepare_object(mem, TRUE);
2483
2484                         if (!mem->fictitious) {
2485                                 assert(mem->busy);
2486
2487                                 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2488                                     vm_lopage_free_count < vm_lopage_free_limit &&
2489                                     mem->phys_page < max_valid_low_ppnum) {
2490                                         mem->pageq.next = NULL;
2491                                         vm_page_release(mem);
2492                                 } else {
2493                                         /*
2494                                          * IMPORTANT: we can't set the page "free" here
2495                                          * because that would make the page eligible for
2496                                          * a physically-contiguous allocation (see
2497                                          * vm_page_find_contiguous()) right away (we don't
2498                                          * hold the vm_page_queue_free lock).  That would
2499                                          * cause trouble because the page is not actually
2500                                          * in the free queue yet...
2501                                          */
2502                                         mem->pageq.next = (queue_entry_t)local_freeq;
2503                                         local_freeq = mem;
2504                                         pg_count++;
2505
2506                                         pmap_clear_noencrypt(mem->phys_page);
2507                                 }
2508                         } else {
2509                                 assert(mem->phys_page == vm_page_fictitious_addr ||
2510                                        mem->phys_page == vm_page_guard_addr);
2511                                 vm_page_release_fictitious(mem);
2512                         }
2513                         mem = nxt;
2514                 }
2515                 freeq = mem;
2516
2517                 if ( (mem = local_freeq) ) {
2518                         unsigned int    avail_free_count;
2519                         unsigned int    need_wakeup = 0;
2520                         unsigned int    need_priv_wakeup = 0;
2521
2522                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2523
2524                         while (mem) {
2525                                 int     color;
2526
2527                                 nxt = (vm_page_t)(mem->pageq.next);
2528
2529                                 assert(!mem->free);
2530                                 assert(mem->busy);
2531                                 mem->free = TRUE;
2532
2533                                 color = mem->phys_page & vm_color_mask;
2534                                 queue_enter_first(&vm_page_queue_free[color],
2535                                                   mem,
2536                                                   vm_page_t,
2537                                                   pageq);
2538                                 mem = nxt;
2539                         }
2540                         vm_page_free_count += pg_count;
2541                         avail_free_count = vm_page_free_count;
2542
2543                         if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2544
2545                                 if (avail_free_count < vm_page_free_wanted_privileged) {
2546                                         need_priv_wakeup = avail_free_count;
2547                                         vm_page_free_wanted_privileged -= avail_free_count;
2548                                         avail_free_count = 0;
2549                                 } else {
2550                                         need_priv_wakeup = vm_page_free_wanted_privileged;
2551                                         vm_page_free_wanted_privileged = 0;
2552                                         avail_free_count -= vm_page_free_wanted_privileged;
2553                                 }
2554                         }
2555                         if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2556                                 unsigned int  available_pages;
2557
2558                                 available_pages = avail_free_count - vm_page_free_reserved;
2559
2560                                 if (available_pages >= vm_page_free_wanted) {
2561                                         need_wakeup = vm_page_free_wanted;
2562                                         vm_page_free_wanted = 0;
2563                                 } else {
2564                                         need_wakeup = available_pages;
2565                                         vm_page_free_wanted -= available_pages;
2566                                 }
2567                         }
2568                         lck_mtx_unlock(&vm_page_queue_free_lock);
2569
2570                         if (need_priv_wakeup != 0) {
2571                                 /*
2572                                  * There shouldn't be that many VM-privileged threads,
2573                                  * so let's wake them all up, even if we don't quite
2574                                  * have enough pages to satisfy them all.
2575                                  */
2576                                 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2577                         }
2578                         if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2579                                 /*
2580                                  * We don't expect to have any more waiters
2581                                  * after this, so let's wake them all up at
2582                                  * once.
2583                                  */
2584                                 thread_wakeup((event_t) &vm_page_free_count);
2585                         } else for (; need_wakeup != 0; need_wakeup--) {
2586                                 /*
2587                                  * Wake up one waiter per page we just released.
2588                                  */
2589                                 thread_wakeup_one((event_t) &vm_page_free_count);
2590                         }
2591
2592                         VM_CHECK_MEMORYSTATUS;
2593                 }
2594         }
2595 }
2596
2597
2598 /*
2599  *      vm_page_wire:
2600  *
2601  *      Mark this page as wired down by yet
2602  *      another map, removing it from paging queues
2603  *      as necessary.
2604  *
2605  *      The page's object and the page queues must be locked.
2606  */
2607 void
2608 vm_page_wire(
2609         register vm_page_t      mem)
2610 {
2611
2612 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2613
2614         VM_PAGE_CHECK(mem);
2615         if (mem->object) {
2616                 vm_object_lock_assert_exclusive(mem->object);
2617         } else {
2618                 /*
2619                  * In theory, the page should be in an object before it
2620                  * gets wired, since we need to hold the object lock
2621                  * to update some fields in the page structure.
2622                  * However, some code (i386 pmap, for example) might want
2623                  * to wire a page before it gets inserted into an object.
2624                  * That's somewhat OK, as long as nobody else can get to
2625                  * that page and update it at the same time.
2626                  */
2627         }
2628 #if DEBUG
2629         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2630 #endif
2631         if ( !VM_PAGE_WIRED(mem)) {
2632
2633                 if (mem->pageout_queue) {
2634                         mem->pageout = FALSE;
2635                         vm_pageout_throttle_up(mem);
2636                 }
2637                 VM_PAGE_QUEUES_REMOVE(mem);
2638
2639                 if (mem->object) {
2640                         mem->object->wired_page_count++;
2641                         assert(mem->object->resident_page_count >=
2642                                mem->object->wired_page_count);
2643                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2644                                 assert(vm_page_purgeable_count > 0);
2645                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2646                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2647                         }
2648                         if (mem->object->all_reusable) {
2649                                 /*
2650                                  * Wired pages are not counted as "re-usable"
2651                                  * in "all_reusable" VM objects, so nothing
2652                                  * to do here.
2653                                  */
2654                         } else if (mem->reusable) {
2655                                 /*
2656                                  * This page is not "re-usable" when it's
2657                                  * wired, so adjust its state and the
2658                                  * accounting.
2659                                  */
2660                                 vm_object_reuse_pages(mem->object,
2661                                                       mem->offset,
2662                                                       mem->offset+PAGE_SIZE_64,
2663                                                       FALSE);
2664                         }
2665                 }
2666                 assert(!mem->reusable);
2667
2668                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2669                         vm_page_wire_count++;
2670                 if (mem->gobbled)
2671                         vm_page_gobble_count--;
2672                 mem->gobbled = FALSE;
2673
2674                 VM_CHECK_MEMORYSTATUS;
2675
2676                 /*
2677                  * ENCRYPTED SWAP:
2678                  * The page could be encrypted, but
2679                  * We don't have to decrypt it here
2680                  * because we don't guarantee that the
2681                  * data is actually valid at this point.
2682                  * The page will get decrypted in
2683                  * vm_fault_wire() if needed.
2684                  */
2685         }
2686         assert(!mem->gobbled);
2687         mem->wire_count++;
2688         VM_PAGE_CHECK(mem);
2689 }
2690
2691 /*
2692  *      vm_page_gobble:
2693  *
2694  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2695  *
2696  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2697  */
2698 void
2699 vm_page_gobble(
2700         register vm_page_t      mem)
2701 {
2702         vm_page_lockspin_queues();
2703         VM_PAGE_CHECK(mem);
2704
2705         assert(!mem->gobbled);
2706         assert( !VM_PAGE_WIRED(mem));
2707
2708         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2709                 if (!mem->private && !mem->fictitious)
2710                         vm_page_wire_count++;
2711         }
2712         vm_page_gobble_count++;
2713         mem->gobbled = TRUE;
2714         vm_page_unlock_queues();
2715 }
2716
2717 /*
2718  *      vm_page_unwire:
2719  *
2720  *      Release one wiring of this page, potentially
2721  *      enabling it to be paged again.
2722  *
2723  *      The page's object and the page queues must be locked.
2724  */
2725 void
2726 vm_page_unwire(
2727         vm_page_t       mem,
2728         boolean_t       queueit)
2729 {
2730
2731 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2732
2733         VM_PAGE_CHECK(mem);
2734         assert(VM_PAGE_WIRED(mem));
2735         assert(mem->object != VM_OBJECT_NULL);
2736 #if DEBUG
2737         vm_object_lock_assert_exclusive(mem->object);
2738         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2739 #endif
2740         if (--mem->wire_count == 0) {
2741                 assert(!mem->private && !mem->fictitious);
2742                 vm_page_wire_count--;
2743                 assert(mem->object->wired_page_count > 0);
2744                 mem->object->wired_page_count--;
2745                 assert(mem->object->resident_page_count >=
2746                        mem->object->wired_page_count);
2747                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2748                         OSAddAtomic(+1, &vm_page_purgeable_count);
2749                         assert(vm_page_purgeable_wired_count > 0);
2750                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2751                 }
2752                 assert(!mem->laundry);
2753                 assert(mem->object != kernel_object);
2754                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2755
2756                 if (queueit == TRUE) {
2757                         if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2758                                 vm_page_deactivate(mem);
2759                         } else {
2760                                 vm_page_activate(mem);
2761                         }
2762                 }
2763
2764                 VM_CHECK_MEMORYSTATUS;
2765
2766         }
2767         VM_PAGE_CHECK(mem);
2768 }
2769
2770 /*
2771  *      vm_page_deactivate:
2772  *
2773  *      Returns the given page to the inactive list,
2774  *      indicating that no physical maps have access
2775  *      to this page.  [Used by the physical mapping system.]
2776  *
2777  *      The page queues must be locked.
2778  */
2779 void
2780 vm_page_deactivate(
2781         vm_page_t       m)
2782 {
2783         vm_page_deactivate_internal(m, TRUE);
2784 }
2785
2786
2787 void
2788 vm_page_deactivate_internal(
2789         vm_page_t       m,
2790         boolean_t       clear_hw_reference)
2791 {
2792
2793         VM_PAGE_CHECK(m);
2794         assert(m->object != kernel_object);
2795         assert(m->phys_page != vm_page_guard_addr);
2796
2797 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
2798 #if DEBUG
2799         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2800 #endif
2801         /*
2802          *      This page is no longer very interesting.  If it was
2803          *      interesting (active or inactive/referenced), then we
2804          *      clear the reference bit and (re)enter it in the
2805          *      inactive queue.  Note wired pages should not have
2806          *      their reference bit cleared.
2807          */
2808         assert ( !(m->absent && !m->unusual));
2809
2810         if (m->gobbled) {               /* can this happen? */
2811                 assert( !VM_PAGE_WIRED(m));
2812
2813                 if (!m->private && !m->fictitious)
2814                         vm_page_wire_count--;
2815                 vm_page_gobble_count--;
2816                 m->gobbled = FALSE;
2817         }
2818         /*
2819          * if this page is currently on the pageout queue, we can't do the
2820          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2821          * and we can't remove it manually since we would need the object lock
2822          * (which is not required here) to decrement the activity_in_progress
2823          * reference which is held on the object while the page is in the pageout queue...
2824          * just let the normal laundry processing proceed
2825          */
2826         if (m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
2827                 return;
2828
2829         if (!m->absent && clear_hw_reference == TRUE)
2830                 pmap_clear_reference(m->phys_page);
2831
2832         m->reference = FALSE;
2833         m->no_cache = FALSE;
2834
2835         if (!m->inactive) {
2836                 VM_PAGE_QUEUES_REMOVE(m);
2837
2838                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2839                     m->dirty && m->object->internal &&
2840                     (m->object->purgable == VM_PURGABLE_DENY ||
2841                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2842                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2843                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2844                         m->throttled = TRUE;
2845                         vm_page_throttled_count++;
2846                 } else {
2847                         if (m->object->named && m->object->ref_count == 1) {
2848                                 vm_page_speculate(m, FALSE);
2849 #if DEVELOPMENT || DEBUG
2850                                 vm_page_speculative_recreated++;
2851 #endif
2852                         } else {
2853                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2854                         }
2855                 }
2856         }
2857 }
2858
2859 /*
2860  * vm_page_enqueue_cleaned
2861  *
2862  * Put the page on the cleaned queue, mark it cleaned, etc.
2863  * Being on the cleaned queue (and having m->clean_queue set)
2864  * does ** NOT ** guarantee that the page is clean!
2865  *
2866  * Call with the queues lock held.
2867  */
2868
2869 void vm_page_enqueue_cleaned(vm_page_t m)
2870 {
2871         assert(m->phys_page != vm_page_guard_addr);
2872 #if DEBUG
2873         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2874 #endif
2875         assert( !(m->absent && !m->unusual));
2876
2877         if (m->gobbled) {
2878                 assert( !VM_PAGE_WIRED(m));
2879                 if (!m->private && !m->fictitious)
2880                         vm_page_wire_count--;
2881                 vm_page_gobble_count--;
2882                 m->gobbled = FALSE;
2883         }
2884         /*
2885          * if this page is currently on the pageout queue, we can't do the
2886          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2887          * and we can't remove it manually since we would need the object lock
2888          * (which is not required here) to decrement the activity_in_progress
2889          * reference which is held on the object while the page is in the pageout queue...
2890          * just let the normal laundry processing proceed
2891          */
2892         if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2893                 return;
2894
2895         VM_PAGE_QUEUES_REMOVE(m);
2896
2897         queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2898         m->clean_queue = TRUE;
2899         vm_page_cleaned_count++;
2900
2901         m->inactive = TRUE;
2902         vm_page_inactive_count++;
2903         if (m->object->internal) {
2904                 vm_page_pageable_internal_count++;
2905         } else {
2906                 vm_page_pageable_external_count++;
2907         }
2908
2909         vm_pageout_enqueued_cleaned++;
2910 }
2911
2912 /*
2913  *      vm_page_activate:
2914  *
2915  *      Put the specified page on the active list (if appropriate).
2916  *
2917  *      The page queues must be locked.
2918  */
2919
2920 #if CONFIG_JETSAM
2921 #if LATENCY_JETSAM
2922 extern struct vm_page   jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS];
2923 #endif  /* LATENCY_JETSAM */
2924 #endif  /* CONFIG_JETSAM */
2925
2926 void
2927 vm_page_activate(
2928         register vm_page_t      m)
2929 {
2930         VM_PAGE_CHECK(m);
2931 #ifdef  FIXME_4778297
2932         assert(m->object != kernel_object);
2933 #endif
2934         assert(m->phys_page != vm_page_guard_addr);
2935 #if DEBUG
2936         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2937 #endif
2938         assert( !(m->absent && !m->unusual));
2939
2940         if (m->gobbled) {
2941                 assert( !VM_PAGE_WIRED(m));
2942                 if (!m->private && !m->fictitious)
2943                         vm_page_wire_count--;
2944                 vm_page_gobble_count--;
2945                 m->gobbled = FALSE;
2946         }
2947         /*
2948          * if this page is currently on the pageout queue, we can't do the
2949          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2950          * and we can't remove it manually since we would need the object lock
2951          * (which is not required here) to decrement the activity_in_progress
2952          * reference which is held on the object while the page is in the pageout queue...
2953          * just let the normal laundry processing proceed
2954          */
2955         if (m->pageout_queue || m->private || m->fictitious || m->compressor)
2956                 return;
2957
2958 #if DEBUG
2959         if (m->active)
2960                 panic("vm_page_activate: already active");
2961 #endif
2962
2963         if (m->speculative) {
2964                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2965                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2966         }
2967
2968         VM_PAGE_QUEUES_REMOVE(m);
2969
2970         if ( !VM_PAGE_WIRED(m)) {
2971
2972                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2973                     m->dirty && m->object->internal &&
2974                     (m->object->purgable == VM_PURGABLE_DENY ||
2975                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2976                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2977                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2978                         m->throttled = TRUE;
2979                         vm_page_throttled_count++;
2980                 } else {
2981                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2982                         m->active = TRUE;
2983                         vm_page_active_count++;
2984                         if (m->object->internal) {
2985                                 vm_page_pageable_internal_count++;
2986                         } else {
2987                                 vm_page_pageable_external_count++;
2988                         }
2989 #if LATENCY_JETSAM
2990                         if (jlp_init) {
2991                                 uint64_t now = mach_absolute_time();
2992                                 uint64_t delta = now - jlp_time;
2993                                 clock_sec_t jl_secs = 0;
2994                                 clock_usec_t jl_usecs = 0;
2995                                 vm_page_t jlp;
2996
2997                                 absolutetime_to_microtime(delta, &jl_secs, &jl_usecs);
2998
2999                                 jl_usecs += jl_secs * USEC_PER_SEC;
3000                                 if (jl_usecs >= JETSAM_LATENCY_TOKEN_AGE) {
3001
3002                                         jlp = &jetsam_latency_page[jlp_current];
3003                                         if (jlp->active) {
3004                                                 queue_remove(&vm_page_queue_active, jlp, vm_page_t, pageq);
3005                                         }
3006                                         queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq);
3007
3008                                         jlp->active = TRUE;
3009
3010                                         jlp->offset = now;
3011                                         jlp_time = jlp->offset;
3012
3013                                         if(++jlp_current == NUM_OF_JETSAM_LATENCY_TOKENS) {
3014                                                 jlp_current = 0;
3015                                         }
3016
3017                                 }
3018                         }
3019 #endif  /* LATENCY_JETSAM */
3020                 }
3021                 m->reference = TRUE;
3022                 m->no_cache = FALSE;
3023         }
3024         VM_PAGE_CHECK(m);
3025 }
3026
3027
3028 /*
3029  *      vm_page_speculate:
3030  *
3031  *      Put the specified page on the speculative list (if appropriate).
3032  *
3033  *      The page queues must be locked.
3034  */
3035 void
3036 vm_page_speculate(
3037         vm_page_t       m,
3038         boolean_t       new)
3039 {
3040         struct vm_speculative_age_q     *aq;
3041
3042         VM_PAGE_CHECK(m);
3043         assert(m->object != kernel_object);
3044         assert(m->phys_page != vm_page_guard_addr);
3045 #if DEBUG
3046         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3047 #endif
3048         assert( !(m->absent && !m->unusual));
3049
3050         /*
3051          * if this page is currently on the pageout queue, we can't do the
3052          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3053          * and we can't remove it manually since we would need the object lock
3054          * (which is not required here) to decrement the activity_in_progress
3055          * reference which is held on the object while the page is in the pageout queue...
3056          * just let the normal laundry processing proceed
3057          */
3058         if (m->pageout_queue || m->private || m->fictitious || m->compressor)
3059                 return;
3060
3061         VM_PAGE_QUEUES_REMOVE(m);
3062
3063         if ( !VM_PAGE_WIRED(m)) {
3064                 mach_timespec_t         ts;
3065                 clock_sec_t sec;
3066                 clock_nsec_t nsec;
3067
3068                 clock_get_system_nanotime(&sec, &nsec);
3069                 ts.tv_sec = (unsigned int) sec;
3070                 ts.tv_nsec = nsec;
3071
3072                 if (vm_page_speculative_count == 0) {
3073
3074                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3075                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3076
3077                         aq = &vm_page_queue_speculative[speculative_age_index];
3078
3079                         /*
3080                          * set the timer to begin a new group
3081                          */
3082                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3083                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3084
3085                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3086                 } else {
3087                         aq = &vm_page_queue_speculative[speculative_age_index];
3088
3089                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3090
3091                                 speculative_age_index++;
3092
3093                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3094                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3095                                 if (speculative_age_index == speculative_steal_index) {
3096                                         speculative_steal_index = speculative_age_index + 1;
3097
3098                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3099                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3100                                 }
3101                                 aq = &vm_page_queue_speculative[speculative_age_index];
3102
3103                                 if (!queue_empty(&aq->age_q))
3104                                         vm_page_speculate_ageit(aq);
3105
3106                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3107                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3108
3109                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3110                         }
3111                 }
3112                 enqueue_tail(&aq->age_q, &m->pageq);
3113                 m->speculative = TRUE;
3114                 vm_page_speculative_count++;
3115                 if (m->object->internal) {
3116                         vm_page_pageable_internal_count++;
3117                 } else {
3118                         vm_page_pageable_external_count++;
3119                 }
3120
3121                 if (new == TRUE) {
3122                         vm_object_lock_assert_exclusive(m->object);
3123
3124                         m->object->pages_created++;
3125 #if DEVELOPMENT || DEBUG
3126                         vm_page_speculative_created++;
3127 #endif
3128                 }
3129         }
3130         VM_PAGE_CHECK(m);
3131 }
3132
3133
3134 /*
3135  * move pages from the specified aging bin to
3136  * the speculative bin that pageout_scan claims from
3137  *
3138  *      The page queues must be locked.
3139  */
3140 void
3141 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3142 {
3143         struct vm_speculative_age_q     *sq;
3144         vm_page_t       t;
3145
3146         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3147
3148         if (queue_empty(&sq->age_q)) {
3149                 sq->age_q.next = aq->age_q.next;
3150                 sq->age_q.prev = aq->age_q.prev;
3151
3152                 t = (vm_page_t)sq->age_q.next;
3153                 t->pageq.prev = &sq->age_q;
3154
3155                 t = (vm_page_t)sq->age_q.prev;
3156                 t->pageq.next = &sq->age_q;
3157         } else {
3158                 t = (vm_page_t)sq->age_q.prev;
3159                 t->pageq.next = aq->age_q.next;
3160
3161                 t = (vm_page_t)aq->age_q.next;
3162                 t->pageq.prev = sq->age_q.prev;
3163
3164                 t = (vm_page_t)aq->age_q.prev;
3165                 t->pageq.next = &sq->age_q;
3166
3167                 sq->age_q.prev = aq->age_q.prev;
3168         }
3169         queue_init(&aq->age_q);
3170 }
3171
3172
3173 void
3174 vm_page_lru(
3175         vm_page_t       m)
3176 {
3177         VM_PAGE_CHECK(m);
3178         assert(m->object != kernel_object);
3179         assert(m->phys_page != vm_page_guard_addr);
3180
3181 #if DEBUG
3182         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3183 #endif
3184         /*
3185          * if this page is currently on the pageout queue, we can't do the
3186          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3187          * and we can't remove it manually since we would need the object lock
3188          * (which is not required here) to decrement the activity_in_progress
3189          * reference which is held on the object while the page is in the pageout queue...
3190          * just let the normal laundry processing proceed
3191          */
3192         if (m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3193                 return;
3194
3195         m->no_cache = FALSE;
3196
3197         VM_PAGE_QUEUES_REMOVE(m);
3198
3199         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3200 }
3201
3202
3203 void
3204 vm_page_reactivate_all_throttled(void)
3205 {
3206         vm_page_t       first_throttled, last_throttled;
3207         vm_page_t       first_active;
3208         vm_page_t       m;
3209         int             extra_active_count;
3210         int             extra_internal_count, extra_external_count;
3211
3212         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3213                 return;
3214
3215         extra_active_count = 0;
3216         extra_internal_count = 0;
3217         extra_external_count = 0;
3218         vm_page_lock_queues();
3219         if (! queue_empty(&vm_page_queue_throttled)) {
3220                 /*
3221                  * Switch "throttled" pages to "active".
3222                  */
3223                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3224                         VM_PAGE_CHECK(m);
3225                         assert(m->throttled);
3226                         assert(!m->active);
3227                         assert(!m->inactive);
3228                         assert(!m->speculative);
3229                         assert(!VM_PAGE_WIRED(m));
3230
3231                         extra_active_count++;
3232                         if (m->object->internal) {
3233                                 extra_internal_count++;
3234                         } else {
3235                                 extra_external_count++;
3236                         }
3237
3238                         m->throttled = FALSE;
3239                         m->active = TRUE;
3240                         VM_PAGE_CHECK(m);
3241                 }
3242
3243                 /*
3244                  * Transfer the entire throttled queue to a regular LRU page queues.
3245                  * We insert it at the head of the active queue, so that these pages
3246                  * get re-evaluated by the LRU algorithm first, since they've been
3247                  * completely out of it until now.
3248                  */
3249                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3250                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3251                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3252                 if (queue_empty(&vm_page_queue_active)) {
3253                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3254                 } else {
3255                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3256                 }
3257                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3258                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3259                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3260
3261 #if DEBUG
3262                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3263 #endif
3264                 queue_init(&vm_page_queue_throttled);
3265                 /*
3266                  * Adjust the global page counts.
3267                  */
3268                 vm_page_active_count += extra_active_count;
3269                 vm_page_pageable_internal_count += extra_internal_count;
3270                 vm_page_pageable_external_count += extra_external_count;
3271                 vm_page_throttled_count = 0;
3272         }
3273         assert(vm_page_throttled_count == 0);
3274         assert(queue_empty(&vm_page_queue_throttled));
3275         vm_page_unlock_queues();
3276 }
3277
3278
3279 /*
3280  * move pages from the indicated local queue to the global active queue
3281  * its ok to fail if we're below the hard limit and force == FALSE
3282  * the nolocks == TRUE case is to allow this function to be run on
3283  * the hibernate path
3284  */
3285
3286 void
3287 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3288 {
3289         struct vpl      *lq;
3290         vm_page_t       first_local, last_local;
3291         vm_page_t       first_active;
3292         vm_page_t       m;
3293         uint32_t        count = 0;
3294
3295         if (vm_page_local_q == NULL)
3296                 return;
3297
3298         lq = &vm_page_local_q[lid].vpl_un.vpl;
3299
3300         if (nolocks == FALSE) {
3301                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3302                         if ( !vm_page_trylockspin_queues())
3303                                 return;
3304                 } else
3305                         vm_page_lockspin_queues();
3306
3307                 VPL_LOCK(&lq->vpl_lock);
3308         }
3309         if (lq->vpl_count) {
3310                 /*
3311                  * Switch "local" pages to "active".
3312                  */
3313                 assert(!queue_empty(&lq->vpl_queue));
3314
3315                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3316                         VM_PAGE_CHECK(m);
3317                         assert(m->local);
3318                         assert(!m->active);
3319                         assert(!m->inactive);
3320                         assert(!m->speculative);
3321                         assert(!VM_PAGE_WIRED(m));
3322                         assert(!m->throttled);
3323                         assert(!m->fictitious);
3324
3325                         if (m->local_id != lid)
3326                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3327
3328                         m->local_id = 0;
3329                         m->local = FALSE;
3330                         m->active = TRUE;
3331                         VM_PAGE_CHECK(m);
3332
3333                         count++;
3334                 }
3335                 if (count != lq->vpl_count)
3336                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3337
3338                 /*
3339                  * Transfer the entire local queue to a regular LRU page queues.
3340                  */
3341                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3342                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3343                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3344
3345                 if (queue_empty(&vm_page_queue_active)) {
3346                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3347                 } else {
3348                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3349                 }
3350                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3351                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3352                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3353
3354                 queue_init(&lq->vpl_queue);
3355                 /*
3356                  * Adjust the global page counts.
3357                  */
3358                 vm_page_active_count += lq->vpl_count;
3359                 vm_page_pageable_internal_count += lq->vpl_internal_count;
3360                 vm_page_pageable_external_count += lq->vpl_external_count;
3361                 lq->vpl_count = 0;
3362                 lq->vpl_internal_count = 0;
3363                 lq->vpl_external_count = 0;
3364         }
3365         assert(queue_empty(&lq->vpl_queue));
3366
3367         if (nolocks == FALSE) {
3368                 VPL_UNLOCK(&lq->vpl_lock);
3369                 vm_page_unlock_queues();
3370         }
3371 }
3372
3373 /*
3374  *      vm_page_part_zero_fill:
3375  *
3376  *      Zero-fill a part of the page.
3377  */
3378 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3379 void
3380 vm_page_part_zero_fill(
3381         vm_page_t       m,
3382         vm_offset_t     m_pa,
3383         vm_size_t       len)
3384 {
3385
3386 #if 0
3387         /*
3388          * we don't hold the page queue lock
3389          * so this check isn't safe to make
3390          */
3391         VM_PAGE_CHECK(m);
3392 #endif
3393
3394 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3395         pmap_zero_part_page(m->phys_page, m_pa, len);
3396 #else
3397         vm_page_t       tmp;
3398         while (1) {
3399                 tmp = vm_page_grab();
3400                 if (tmp == VM_PAGE_NULL) {
3401                         vm_page_wait(THREAD_UNINT);
3402                         continue;
3403                 }
3404                 break;
3405         }
3406         vm_page_zero_fill(tmp);
3407         if(m_pa != 0) {
3408                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3409         }
3410         if((m_pa + len) <  PAGE_SIZE) {
3411                 vm_page_part_copy(m, m_pa + len, tmp,
3412                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3413         }
3414         vm_page_copy(tmp,m);
3415         VM_PAGE_FREE(tmp);
3416 #endif
3417
3418 }
3419
3420 /*
3421  *      vm_page_zero_fill:
3422  *
3423  *      Zero-fill the specified page.
3424  */
3425 void
3426 vm_page_zero_fill(
3427         vm_page_t       m)
3428 {
3429         XPR(XPR_VM_PAGE,
3430                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3431                 m->object, m->offset, m, 0,0);
3432 #if 0
3433         /*
3434          * we don't hold the page queue lock
3435          * so this check isn't safe to make
3436          */
3437         VM_PAGE_CHECK(m);
3438 #endif
3439
3440 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3441         pmap_zero_page(m->phys_page);
3442 }
3443
3444 /*
3445  *      vm_page_part_copy:
3446  *
3447  *      copy part of one page to another
3448  */
3449
3450 void
3451 vm_page_part_copy(
3452         vm_page_t       src_m,
3453         vm_offset_t     src_pa,
3454         vm_page_t       dst_m,
3455         vm_offset_t     dst_pa,
3456         vm_size_t       len)
3457 {
3458 #if 0
3459         /*
3460          * we don't hold the page queue lock
3461          * so this check isn't safe to make
3462          */
3463         VM_PAGE_CHECK(src_m);
3464         VM_PAGE_CHECK(dst_m);
3465 #endif
3466         pmap_copy_part_page(src_m->phys_page, src_pa,
3467                         dst_m->phys_page, dst_pa, len);
3468 }
3469
3470 /*
3471  *      vm_page_copy:
3472  *
3473  *      Copy one page to another
3474  *
3475  * ENCRYPTED SWAP:
3476  * The source page should not be encrypted.  The caller should
3477  * make sure the page is decrypted first, if necessary.
3478  */
3479
3480 int vm_page_copy_cs_validations = 0;
3481 int vm_page_copy_cs_tainted = 0;
3482
3483 void
3484 vm_page_copy(
3485         vm_page_t       src_m,
3486         vm_page_t       dest_m)
3487 {
3488         XPR(XPR_VM_PAGE,
3489         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3490         src_m->object, src_m->offset,
3491         dest_m->object, dest_m->offset,
3492         0);
3493 #if 0
3494         /*
3495          * we don't hold the page queue lock
3496          * so this check isn't safe to make
3497          */
3498         VM_PAGE_CHECK(src_m);
3499         VM_PAGE_CHECK(dest_m);
3500 #endif
3501         vm_object_lock_assert_held(src_m->object);
3502
3503         /*
3504          * ENCRYPTED SWAP:
3505          * The source page should not be encrypted at this point.
3506          * The destination page will therefore not contain encrypted
3507          * data after the copy.
3508          */
3509         if (src_m->encrypted) {
3510                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3511         }
3512         dest_m->encrypted = FALSE;
3513
3514         if (src_m->object != VM_OBJECT_NULL &&
3515             src_m->object->code_signed) {
3516                 /*
3517                  * We're copying a page from a code-signed object.
3518                  * Whoever ends up mapping the copy page might care about
3519                  * the original page's integrity, so let's validate the
3520                  * source page now.
3521                  */
3522                 vm_page_copy_cs_validations++;
3523                 vm_page_validate_cs(src_m);
3524         }
3525
3526         if (vm_page_is_slideable(src_m)) {
3527                 boolean_t was_busy = src_m->busy;
3528                 src_m->busy = TRUE;
3529                 (void) vm_page_slide(src_m, 0);
3530                 assert(src_m->busy);
3531                 if (!was_busy) {
3532                         PAGE_WAKEUP_DONE(src_m);
3533                 }
3534         }
3535
3536         /*
3537          * Propagate the cs_tainted bit to the copy page. Do not propagate
3538          * the cs_validated bit.
3539          */
3540         dest_m->cs_tainted = src_m->cs_tainted;
3541         if (dest_m->cs_tainted) {
3542                 vm_page_copy_cs_tainted++;
3543         }
3544         dest_m->slid = src_m->slid;
3545         dest_m->error = src_m->error; /* sliding src_m might have failed... */
3546         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3547 }
3548
3549 #if MACH_ASSERT
3550 static void
3551 _vm_page_print(
3552         vm_page_t       p)
3553 {
3554         printf("vm_page %p: \n", p);
3555         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3556         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3557         printf("  next=%p\n", p->next);
3558         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3559         printf("  wire_count=%u\n", p->wire_count);
3560
3561         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3562                (p->local ? "" : "!"),
3563                (p->inactive ? "" : "!"),
3564                (p->active ? "" : "!"),
3565                (p->pageout_queue ? "" : "!"),
3566                (p->speculative ? "" : "!"),
3567                (p->laundry ? "" : "!"));
3568         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3569                (p->free ? "" : "!"),
3570                (p->reference ? "" : "!"),
3571                (p->gobbled ? "" : "!"),
3572                (p->private ? "" : "!"),
3573                (p->throttled ? "" : "!"));
3574         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3575                 (p->busy ? "" : "!"),
3576                 (p->wanted ? "" : "!"),
3577                 (p->tabled ? "" : "!"),
3578                 (p->fictitious ? "" : "!"),
3579                 (p->pmapped ? "" : "!"),
3580                 (p->wpmapped ? "" : "!"));
3581         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3582                (p->pageout ? "" : "!"),
3583                (p->absent ? "" : "!"),
3584                (p->error ? "" : "!"),
3585                (p->dirty ? "" : "!"),
3586                (p->cleaning ? "" : "!"),
3587                (p->precious ? "" : "!"),
3588                (p->clustered ? "" : "!"));
3589         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3590                (p->overwriting ? "" : "!"),
3591                (p->restart ? "" : "!"),
3592                (p->unusual ? "" : "!"),
3593                (p->encrypted ? "" : "!"),
3594                (p->encrypted_cleaning ? "" : "!"));
3595         printf("  %scs_validated, %scs_tainted, %sno_cache\n",
3596                (p->cs_validated ? "" : "!"),
3597                (p->cs_tainted ? "" : "!"),
3598                (p->no_cache ? "" : "!"));
3599
3600         printf("phys_page=0x%x\n", p->phys_page);
3601 }
3602
3603 /*
3604  *      Check that the list of pages is ordered by
3605  *      ascending physical address and has no holes.
3606  */
3607 static int
3608 vm_page_verify_contiguous(
3609         vm_page_t       pages,
3610         unsigned int    npages)
3611 {
3612         register vm_page_t      m;
3613         unsigned int            page_count;
3614         vm_offset_t             prev_addr;
3615
3616         prev_addr = pages->phys_page;
3617         page_count = 1;
3618         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3619                 if (m->phys_page != prev_addr + 1) {
3620                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3621                                m, (long)prev_addr, m->phys_page);
3622                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3623                         panic("vm_page_verify_contiguous:  not contiguous!");
3624                 }
3625                 prev_addr = m->phys_page;
3626                 ++page_count;
3627         }
3628         if (page_count != npages) {
3629                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3630                        pages, page_count, npages);
3631                 panic("vm_page_verify_contiguous:  count error");
3632         }
3633         return 1;
3634 }
3635
3636
3637 /*
3638  *      Check the free lists for proper length etc.
3639  */
3640 static unsigned int
3641 vm_page_verify_free_list(
3642         queue_head_t    *vm_page_queue,
3643         unsigned int    color,
3644         vm_page_t       look_for_page,
3645         boolean_t       expect_page)
3646 {
3647         unsigned int    npages;
3648         vm_page_t       m;
3649         vm_page_t       prev_m;
3650         boolean_t       found_page;
3651
3652         found_page = FALSE;
3653         npages = 0;
3654         prev_m = (vm_page_t) vm_page_queue;
3655         queue_iterate(vm_page_queue,
3656                       m,
3657                       vm_page_t,
3658                       pageq) {
3659
3660                 if (m == look_for_page) {
3661                         found_page = TRUE;
3662                 }
3663                 if ((vm_page_t) m->pageq.prev != prev_m)
3664                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3665                               color, npages, m, m->pageq.prev, prev_m);
3666                 if ( ! m->busy )
3667                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3668                               color, npages, m);
3669                 if (color != (unsigned int) -1) {
3670                         if ((m->phys_page & vm_color_mask) != color)
3671                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3672                                       color, npages, m, m->phys_page & vm_color_mask, color);
3673                         if ( ! m->free )
3674                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3675                                       color, npages, m);
3676                 }
3677                 ++npages;
3678                 prev_m = m;
3679         }
3680         if (look_for_page != VM_PAGE_NULL) {
3681                 unsigned int other_color;
3682
3683                 if (expect_page && !found_page) {
3684                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3685                                color, npages, look_for_page, look_for_page->phys_page);
3686                         _vm_page_print(look_for_page);
3687                         for (other_color = 0;
3688                              other_color < vm_colors;
3689                              other_color++) {
3690                                 if (other_color == color)
3691                                         continue;
3692                                 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3693                                                          other_color, look_for_page, FALSE);
3694                         }
3695                         if (color == (unsigned int) -1) {
3696                                 vm_page_verify_free_list(&vm_lopage_queue_free,
3697                                                          (unsigned int) -1, look_for_page, FALSE);
3698                         }
3699                         panic("vm_page_verify_free_list(color=%u)\n", color);
3700                 }
3701                 if (!expect_page && found_page) {
3702                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3703                                color, npages, look_for_page, look_for_page->phys_page);
3704                 }
3705         }
3706         return npages;
3707 }
3708
3709 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3710 static void
3711 vm_page_verify_free_lists( void )
3712 {
3713         unsigned int    color, npages, nlopages;
3714
3715         if (! vm_page_verify_free_lists_enabled)
3716                 return;
3717
3718         npages = 0;
3719
3720         lck_mtx_lock(&vm_page_queue_free_lock);
3721
3722         for( color = 0; color < vm_colors; color++ ) {
3723                 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3724                                                    color, VM_PAGE_NULL, FALSE);
3725         }
3726         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3727                                             (unsigned int) -1,
3728                                             VM_PAGE_NULL, FALSE);
3729         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3730                 panic("vm_page_verify_free_lists:  "
3731                       "npages %u free_count %d nlopages %u lo_free_count %u",
3732                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3733
3734         lck_mtx_unlock(&vm_page_queue_free_lock);
3735 }
3736
3737 void
3738 vm_page_queues_assert(
3739         vm_page_t       mem,
3740         int             val)
3741 {
3742 #if DEBUG
3743         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3744 #endif
3745         if (mem->free + mem->active + mem->inactive + mem->speculative +
3746             mem->throttled + mem->pageout_queue > (val)) {
3747                 _vm_page_print(mem);
3748                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3749         }
3750         if (VM_PAGE_WIRED(mem)) {
3751                 assert(!mem->active);
3752                 assert(!mem->inactive);
3753                 assert(!mem->speculative);
3754                 assert(!mem->throttled);
3755                 assert(!mem->pageout_queue);
3756         }
3757 }
3758 #endif  /* MACH_ASSERT */
3759
3760
3761 /*
3762  *      CONTIGUOUS PAGE ALLOCATION
3763  *
3764  *      Find a region large enough to contain at least n pages
3765  *      of contiguous physical memory.
3766  *
3767  *      This is done by traversing the vm_page_t array in a linear fashion
3768  *      we assume that the vm_page_t array has the avaiable physical pages in an
3769  *      ordered, ascending list... this is currently true of all our implementations
3770  *      and must remain so... there can be 'holes' in the array...  we also can
3771  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3772  *      which use to happen via 'vm_page_convert'... that function was no longer
3773  *      being called and was removed...
3774  *
3775  *      The basic flow consists of stabilizing some of the interesting state of
3776  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3777  *      sweep at the beginning of the array looking for pages that meet our criterea
3778  *      for a 'stealable' page... currently we are pretty conservative... if the page
3779  *      meets this criterea and is physically contiguous to the previous page in the 'run'
3780  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
3781  *      and start to develop a new run... if at this point we've already considered
3782  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3783  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
3784  *      to other threads trying to acquire free pages (or move pages from q to q),
3785  *      and then continue from the spot we left off... we only make 1 pass through the
3786  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
3787  *      which steals the pages from the queues they're currently on... pages on the free
3788  *      queue can be stolen directly... pages that are on any of the other queues
3789  *      must be removed from the object they are tabled on... this requires taking the
3790  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3791  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
3792  *      dump the pages we've currently stolen back to the free list, and pick up our
3793  *      scan from the point where we aborted the 'current' run.
3794  *
3795  *
3796  *      Requirements:
3797  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
3798  *
3799  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3800  *
3801  * Algorithm:
3802  */
3803
3804 #define MAX_CONSIDERED_BEFORE_YIELD     1000
3805
3806
3807 #define RESET_STATE_OF_RUN()    \
3808         MACRO_BEGIN             \
3809         prevcontaddr = -2;      \
3810         start_pnum = -1;        \
3811         free_considered = 0;    \
3812         substitute_needed = 0;  \
3813         npages = 0;             \
3814         MACRO_END
3815
3816 /*
3817  * Can we steal in-use (i.e. not free) pages when searching for
3818  * physically-contiguous pages ?
3819  */
3820 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3821
3822 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
3823 #if DEBUG
3824 int vm_page_find_contig_debug = 0;
3825 #endif
3826
3827 static vm_page_t
3828 vm_page_find_contiguous(
3829         unsigned int    contig_pages,
3830         ppnum_t         max_pnum,
3831         ppnum_t     pnum_mask,
3832         boolean_t       wire,
3833         int             flags)
3834 {
3835         vm_page_t       m = NULL;
3836         ppnum_t         prevcontaddr;
3837         ppnum_t         start_pnum;
3838         unsigned int    npages, considered, scanned;
3839         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
3840         unsigned int    idx_last_contig_page_found = 0;
3841         int             free_considered, free_available;
3842         int             substitute_needed;
3843         boolean_t       wrapped;
3844 #if DEBUG
3845         clock_sec_t     tv_start_sec, tv_end_sec;
3846         clock_usec_t    tv_start_usec, tv_end_usec;
3847 #endif
3848 #if MACH_ASSERT
3849         int             yielded = 0;
3850         int             dumped_run = 0;
3851         int             stolen_pages = 0;
3852         int             compressed_pages = 0;
3853 #endif
3854
3855         if (contig_pages == 0)
3856                 return VM_PAGE_NULL;
3857
3858 #if MACH_ASSERT
3859         vm_page_verify_free_lists();
3860 #endif
3861 #if DEBUG
3862         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3863 #endif
3864         PAGE_REPLACEMENT_ALLOWED(TRUE);
3865
3866         vm_page_lock_queues();
3867         lck_mtx_lock(&vm_page_queue_free_lock);
3868
3869         RESET_STATE_OF_RUN();
3870
3871         scanned = 0;
3872         considered = 0;
3873         free_available = vm_page_free_count - vm_page_free_reserved;
3874
3875         wrapped = FALSE;
3876
3877         if(flags & KMA_LOMEM)
3878                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3879         else
3880                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
3881
3882         orig_last_idx = idx_last_contig_page_found;
3883         last_idx = orig_last_idx;
3884
3885         for (page_idx = last_idx, start_idx = last_idx;
3886              npages < contig_pages && page_idx < vm_pages_count;
3887              page_idx++) {
3888 retry:
3889                 if (wrapped &&
3890                     npages == 0 &&
3891                     page_idx >= orig_last_idx) {
3892                         /*
3893                          * We're back where we started and we haven't
3894                          * found any suitable contiguous range.  Let's
3895                          * give up.
3896                          */
3897                         break;
3898                 }
3899                 scanned++;
3900                 m = &vm_pages[page_idx];
3901
3902                 assert(!m->fictitious);
3903                 assert(!m->private);
3904
3905                 if (max_pnum && m->phys_page > max_pnum) {
3906                         /* no more low pages... */
3907                         break;
3908                 }
3909                 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3910                         /*
3911                          * not aligned
3912                          */
3913                         RESET_STATE_OF_RUN();
3914
3915                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3916                            m->encrypted_cleaning ||
3917                            m->pageout_queue || m->laundry || m->wanted ||
3918                            m->cleaning || m->overwriting || m->pageout) {
3919                         /*
3920                          * page is in a transient state
3921                          * or a state we don't want to deal
3922                          * with, so don't consider it which
3923                          * means starting a new run
3924                          */
3925                         RESET_STATE_OF_RUN();
3926
3927                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
3928                         /*
3929                          * page needs to be on one of our queues
3930                          * or it needs to belong to the compressor pool
3931                          * in order for it to be stable behind the
3932                          * locks we hold at this point...
3933                          * if not, don't consider it which
3934                          * means starting a new run
3935                          */
3936                         RESET_STATE_OF_RUN();
3937
3938                 } else if (!m->free && (!m->tabled || m->busy)) {
3939                         /*
3940                          * pages on the free list are always 'busy'
3941                          * so we couldn't test for 'busy' in the check
3942                          * for the transient states... pages that are
3943                          * 'free' are never 'tabled', so we also couldn't
3944                          * test for 'tabled'.  So we check here to make
3945                          * sure that a non-free page is not busy and is
3946                          * tabled on an object...
3947                          * if not, don't consider it which
3948                          * means starting a new run
3949                          */
3950                         RESET_STATE_OF_RUN();
3951
3952                 } else {
3953                         if (m->phys_page != prevcontaddr + 1) {
3954                                 if ((m->phys_page & pnum_mask) != 0) {
3955                                         RESET_STATE_OF_RUN();
3956                                         goto did_consider;
3957                                 } else {
3958                                         npages = 1;
3959                                         start_idx = page_idx;
3960                                         start_pnum = m->phys_page;
3961                                 }
3962                         } else {
3963                                 npages++;
3964                         }
3965                         prevcontaddr = m->phys_page;
3966
3967                         VM_PAGE_CHECK(m);
3968                         if (m->free) {
3969                                 free_considered++;
3970                         } else {
3971                                 /*
3972                                  * This page is not free.
3973                                  * If we can't steal used pages,
3974                                  * we have to give up this run
3975                                  * and keep looking.
3976                                  * Otherwise, we might need to
3977                                  * move the contents of this page
3978                                  * into a substitute page.
3979                                  */
3980 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3981                                 if (m->pmapped || m->dirty || m->precious) {
3982                                         substitute_needed++;
3983                                 }
3984 #else
3985                                 RESET_STATE_OF_RUN();
3986 #endif
3987                         }
3988
3989                         if ((free_considered + substitute_needed) > free_available) {
3990                                 /*
3991                                  * if we let this run continue
3992                                  * we will end up dropping the vm_page_free_count
3993                                  * below the reserve limit... we need to abort
3994                                  * this run, but we can at least re-consider this
3995                                  * page... thus the jump back to 'retry'
3996                                  */
3997                                 RESET_STATE_OF_RUN();
3998
3999                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4000                                         considered++;
4001                                         goto retry;
4002                                 }
4003                                 /*
4004                                  * free_available == 0
4005                                  * so can't consider any free pages... if
4006                                  * we went to retry in this case, we'd
4007                                  * get stuck looking at the same page
4008                                  * w/o making any forward progress
4009                                  * we also want to take this path if we've already
4010                                  * reached our limit that controls the lock latency
4011                                  */
4012                         }
4013                 }
4014 did_consider:
4015                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4016
4017                         PAGE_REPLACEMENT_ALLOWED(FALSE);
4018
4019                         lck_mtx_unlock(&vm_page_queue_free_lock);
4020                         vm_page_unlock_queues();
4021
4022                         mutex_pause(0);
4023
4024                         PAGE_REPLACEMENT_ALLOWED(TRUE);
4025
4026                         vm_page_lock_queues();
4027                         lck_mtx_lock(&vm_page_queue_free_lock);
4028
4029                         RESET_STATE_OF_RUN();
4030                         /*
4031                          * reset our free page limit since we
4032                          * dropped the lock protecting the vm_page_free_queue
4033                          */
4034                         free_available = vm_page_free_count - vm_page_free_reserved;
4035                         considered = 0;
4036 #if MACH_ASSERT
4037                         yielded++;
4038 #endif
4039                         goto retry;
4040                 }
4041                 considered++;
4042         }
4043         m = VM_PAGE_NULL;
4044
4045         if (npages != contig_pages) {
4046                 if (!wrapped) {
4047                         /*
4048                          * We didn't find a contiguous range but we didn't
4049                          * start from the very first page.
4050                          * Start again from the very first page.
4051                          */
4052                         RESET_STATE_OF_RUN();
4053                         if( flags & KMA_LOMEM)
4054                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
4055                         else
4056                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4057                         last_idx = 0;
4058                         page_idx = last_idx;
4059                         wrapped = TRUE;
4060                         goto retry;
4061                 }
4062                 lck_mtx_unlock(&vm_page_queue_free_lock);
4063         } else {
4064                 vm_page_t       m1;
4065                 vm_page_t       m2;
4066                 unsigned int    cur_idx;
4067                 unsigned int    tmp_start_idx;
4068                 vm_object_t     locked_object = VM_OBJECT_NULL;
4069                 boolean_t       abort_run = FALSE;
4070
4071                 assert(page_idx - start_idx == contig_pages);
4072
4073                 tmp_start_idx = start_idx;
4074
4075                 /*
4076                  * first pass through to pull the free pages
4077                  * off of the free queue so that in case we
4078                  * need substitute pages, we won't grab any
4079                  * of the free pages in the run... we'll clear
4080                  * the 'free' bit in the 2nd pass, and even in
4081                  * an abort_run case, we'll collect all of the
4082                  * free pages in this run and return them to the free list
4083                  */
4084                 while (start_idx < page_idx) {
4085
4086                         m1 = &vm_pages[start_idx++];
4087
4088 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4089                         assert(m1->free);
4090 #endif
4091
4092                         if (m1->free) {
4093                                 unsigned int color;
4094
4095                                 color = m1->phys_page & vm_color_mask;
4096 #if MACH_ASSERT
4097                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4098 #endif
4099                                 queue_remove(&vm_page_queue_free[color],
4100                                              m1,
4101                                              vm_page_t,
4102                                              pageq);
4103                                 m1->pageq.next = NULL;
4104                                 m1->pageq.prev = NULL;
4105 #if MACH_ASSERT
4106                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4107 #endif
4108                                 /*
4109                                  * Clear the "free" bit so that this page
4110                                  * does not get considered for another
4111                                  * concurrent physically-contiguous allocation.
4112                                  */
4113                                 m1->free = FALSE;
4114                                 assert(m1->busy);
4115
4116                                 vm_page_free_count--;
4117                         }
4118                 }
4119                 /*
4120                  * adjust global freelist counts
4121                  */
4122                 if (vm_page_free_count < vm_page_free_count_minimum)
4123                         vm_page_free_count_minimum = vm_page_free_count;
4124
4125                 if( flags & KMA_LOMEM)
4126                         vm_page_lomem_find_contiguous_last_idx = page_idx;
4127                 else
4128                         vm_page_find_contiguous_last_idx = page_idx;
4129
4130                 /*
4131                  * we can drop the free queue lock at this point since
4132                  * we've pulled any 'free' candidates off of the list
4133                  * we need it dropped so that we can do a vm_page_grab
4134                  * when substituing for pmapped/dirty pages
4135                  */
4136                 lck_mtx_unlock(&vm_page_queue_free_lock);
4137
4138                 start_idx = tmp_start_idx;
4139                 cur_idx = page_idx - 1;
4140
4141                 while (start_idx++ < page_idx) {
4142                         /*
4143                          * must go through the list from back to front
4144                          * so that the page list is created in the
4145                          * correct order - low -> high phys addresses
4146                          */
4147                         m1 = &vm_pages[cur_idx--];
4148
4149                         assert(!m1->free);
4150
4151                         if (m1->object == VM_OBJECT_NULL) {
4152                                 /*
4153                                  * page has already been removed from
4154                                  * the free list in the 1st pass
4155                                  */
4156                                 assert(m1->offset == (vm_object_offset_t) -1);
4157                                 assert(m1->busy);
4158                                 assert(!m1->wanted);
4159                                 assert(!m1->laundry);
4160                         } else {
4161                                 vm_object_t object;
4162                                 int refmod;
4163                                 boolean_t disconnected, reusable;
4164
4165                                 if (abort_run == TRUE)
4166                                         continue;
4167
4168                                 object = m1->object;
4169
4170                                 if (object != locked_object) {
4171                                         if (locked_object) {
4172                                                 vm_object_unlock(locked_object);
4173                                                 locked_object = VM_OBJECT_NULL;
4174                                         }
4175                                         if (vm_object_lock_try(object))
4176                                                 locked_object = object;
4177                                 }
4178                                 if (locked_object == VM_OBJECT_NULL ||
4179                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
4180                                      m1->encrypted_cleaning ||
4181                                      m1->pageout_queue || m1->laundry || m1->wanted ||
4182                                      m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4183
4184                                         if (locked_object) {
4185                                                 vm_object_unlock(locked_object);
4186                                                 locked_object = VM_OBJECT_NULL;
4187                                         }
4188                                         tmp_start_idx = cur_idx;
4189                                         abort_run = TRUE;
4190                                         continue;
4191                                 }
4192
4193                                 disconnected = FALSE;
4194                                 reusable = FALSE;
4195
4196                                 if ((m1->reusable ||
4197                                      m1->object->all_reusable) &&
4198                                     m1->inactive &&
4199                                     !m1->dirty &&
4200                                     !m1->reference) {
4201                                         /* reusable page... */
4202                                         refmod = pmap_disconnect(m1->phys_page);
4203                                         disconnected = TRUE;
4204                                         if (refmod == 0) {
4205                                                 /*
4206                                                  * ... not reused: can steal
4207                                                  * without relocating contents.
4208                                                  */
4209                                                 reusable = TRUE;
4210                                         }
4211                                 }
4212
4213                                 if ((m1->pmapped &&
4214                                      ! reusable) ||
4215                                     m1->dirty ||
4216                                     m1->precious) {
4217                                         vm_object_offset_t offset;
4218
4219                                         m2 = vm_page_grab();
4220
4221                                         if (m2 == VM_PAGE_NULL) {
4222                                                 if (locked_object) {
4223                                                         vm_object_unlock(locked_object);
4224                                                         locked_object = VM_OBJECT_NULL;
4225                                                 }
4226                                                 tmp_start_idx = cur_idx;
4227                                                 abort_run = TRUE;
4228                                                 continue;
4229                                         }
4230                                         if (! disconnected) {
4231                                                 if (m1->pmapped)
4232                                                         refmod = pmap_disconnect(m1->phys_page);
4233                                                 else
4234                                                         refmod = 0;
4235                                         }
4236
4237                                         /* copy the page's contents */
4238                                         pmap_copy_page(m1->phys_page, m2->phys_page);
4239                                         /* copy the page's state */
4240                                         assert(!VM_PAGE_WIRED(m1));
4241                                         assert(!m1->free);
4242                                         assert(!m1->pageout_queue);
4243                                         assert(!m1->laundry);
4244                                         m2->reference   = m1->reference;
4245                                         assert(!m1->gobbled);
4246                                         assert(!m1->private);
4247                                         m2->no_cache    = m1->no_cache;
4248                                         m2->xpmapped    = m1->xpmapped;
4249                                         assert(!m1->busy);
4250                                         assert(!m1->wanted);
4251                                         assert(!m1->fictitious);
4252                                         m2->pmapped     = m1->pmapped; /* should flush cache ? */
4253                                         m2->wpmapped    = m1->wpmapped;
4254                                         assert(!m1->pageout);
4255                                         m2->absent      = m1->absent;
4256                                         m2->error       = m1->error;
4257                                         m2->dirty       = m1->dirty;
4258                                         assert(!m1->cleaning);
4259                                         m2->precious    = m1->precious;
4260                                         m2->clustered   = m1->clustered;
4261                                         assert(!m1->overwriting);
4262                                         m2->restart     = m1->restart;
4263                                         m2->unusual     = m1->unusual;
4264                                         m2->encrypted   = m1->encrypted;
4265                                         assert(!m1->encrypted_cleaning);
4266                                         m2->cs_validated = m1->cs_validated;
4267                                         m2->cs_tainted  = m1->cs_tainted;
4268
4269                                         /*
4270                                          * If m1 had really been reusable,
4271                                          * we would have just stolen it, so
4272                                          * let's not propagate it's "reusable"
4273                                          * bit and assert that m2 is not
4274                                          * marked as "reusable".
4275                                          */
4276                                         // m2->reusable = m1->reusable;
4277                                         assert(!m2->reusable);
4278
4279                                         assert(!m1->lopage);
4280                                         m2->slid        = m1->slid;
4281                                         m2->was_dirty   = m1->was_dirty;
4282                                         m2->compressor  = m1->compressor;
4283
4284                                         /*
4285                                          * make sure we clear the ref/mod state
4286                                          * from the pmap layer... else we risk
4287                                          * inheriting state from the last time
4288                                          * this page was used...
4289                                          */
4290                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4291
4292                                         if (refmod & VM_MEM_REFERENCED)
4293                                                 m2->reference = TRUE;
4294                                         if (refmod & VM_MEM_MODIFIED) {
4295                                                 SET_PAGE_DIRTY(m2, TRUE);
4296                                         }
4297                                         offset = m1->offset;
4298
4299                                         /*
4300                                          * completely cleans up the state
4301                                          * of the page so that it is ready
4302                                          * to be put onto the free list, or
4303                                          * for this purpose it looks like it
4304                                          * just came off of the free list
4305                                          */
4306                                         vm_page_free_prepare(m1);
4307
4308                                         /*
4309                                          * now put the substitute page
4310                                          * on the object
4311                                          */
4312                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4313
4314                                         if (m2->compressor) {
4315                                                 m2->pmapped = TRUE;
4316                                                 m2->wpmapped = TRUE;
4317
4318                                                 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4319                                                            VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4320 #if MACH_ASSERT
4321                                                 compressed_pages++;
4322 #endif
4323                                         } else {
4324                                                 if (m2->reference)
4325                                                         vm_page_activate(m2);
4326                                                 else
4327                                                         vm_page_deactivate(m2);
4328                                         }
4329                                         PAGE_WAKEUP_DONE(m2);
4330
4331                                 } else {
4332                                         assert(!m1->compressor);
4333
4334                                         /*
4335                                          * completely cleans up the state
4336                                          * of the page so that it is ready
4337                                          * to be put onto the free list, or
4338                                          * for this purpose it looks like it
4339                                          * just came off of the free list
4340                                          */
4341                                         vm_page_free_prepare(m1);
4342                                 }
4343 #if MACH_ASSERT
4344                                 stolen_pages++;
4345 #endif
4346                         }
4347                         m1->pageq.next = (queue_entry_t) m;
4348                         m1->pageq.prev = NULL;
4349                         m = m1;
4350                 }
4351                 if (locked_object) {
4352                         vm_object_unlock(locked_object);
4353                         locked_object = VM_OBJECT_NULL;
4354                 }
4355
4356                 if (abort_run == TRUE) {
4357                         if (m != VM_PAGE_NULL) {
4358                                 vm_page_free_list(m, FALSE);
4359                         }
4360 #if MACH_ASSERT
4361                         dumped_run++;
4362 #endif
4363                         /*
4364                          * want the index of the last
4365                          * page in this run that was
4366                          * successfully 'stolen', so back
4367                          * it up 1 for the auto-decrement on use
4368                          * and 1 more to bump back over this page
4369                          */
4370                         page_idx = tmp_start_idx + 2;
4371                         if (page_idx >= vm_pages_count) {
4372                                 if (wrapped)
4373                                         goto done_scanning;
4374                                 page_idx = last_idx = 0;
4375                                 wrapped = TRUE;
4376                         }
4377                         abort_run = FALSE;
4378
4379                         /*
4380                          * We didn't find a contiguous range but we didn't
4381                          * start from the very first page.
4382                          * Start again from the very first page.
4383                          */
4384                         RESET_STATE_OF_RUN();
4385
4386                         if( flags & KMA_LOMEM)
4387                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4388                         else
4389                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4390
4391                         last_idx = page_idx;
4392
4393                         lck_mtx_lock(&vm_page_queue_free_lock);
4394                         /*
4395                         * reset our free page limit since we
4396                         * dropped the lock protecting the vm_page_free_queue
4397                         */
4398                         free_available = vm_page_free_count - vm_page_free_reserved;
4399                         goto retry;
4400                 }
4401
4402                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4403
4404                         if (wire == TRUE)
4405                                 m1->wire_count++;
4406                         else
4407                                 m1->gobbled = TRUE;
4408                 }
4409                 if (wire == FALSE)
4410                         vm_page_gobble_count += npages;
4411
4412                 /*
4413                  * gobbled pages are also counted as wired pages
4414                  */
4415                 vm_page_wire_count += npages;
4416
4417                 assert(vm_page_verify_contiguous(m, npages));
4418         }
4419 done_scanning:
4420         PAGE_REPLACEMENT_ALLOWED(FALSE);
4421
4422         vm_page_unlock_queues();
4423
4424 #if DEBUG
4425         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4426
4427         tv_end_sec -= tv_start_sec;
4428         if (tv_end_usec < tv_start_usec) {
4429                 tv_end_sec--;
4430                 tv_end_usec += 1000000;
4431         }
4432         tv_end_usec -= tv_start_usec;
4433         if (tv_end_usec >= 1000000) {
4434                 tv_end_sec++;
4435                 tv_end_sec -= 1000000;
4436         }
4437         if (vm_page_find_contig_debug) {
4438                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages\n",
4439                        __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4440                        (long)tv_end_sec, tv_end_usec, orig_last_idx,
4441                        scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4442         }
4443
4444 #endif
4445 #if MACH_ASSERT
4446         vm_page_verify_free_lists();
4447 #endif
4448         return m;
4449 }
4450
4451 /*
4452  *      Allocate a list of contiguous, wired pages.
4453  */
4454 kern_return_t
4455 cpm_allocate(
4456         vm_size_t       size,
4457         vm_page_t       *list,
4458         ppnum_t         max_pnum,
4459         ppnum_t         pnum_mask,
4460         boolean_t       wire,
4461         int             flags)
4462 {
4463         vm_page_t               pages;
4464         unsigned int            npages;
4465
4466         if (size % PAGE_SIZE != 0)
4467                 return KERN_INVALID_ARGUMENT;
4468
4469         npages = (unsigned int) (size / PAGE_SIZE);
4470         if (npages != size / PAGE_SIZE) {
4471                 /* 32-bit overflow */
4472                 return KERN_INVALID_ARGUMENT;
4473         }
4474
4475         /*
4476          *      Obtain a pointer to a subset of the free
4477          *      list large enough to satisfy the request;
4478          *      the region will be physically contiguous.
4479          */
4480         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4481
4482         if (pages == VM_PAGE_NULL)
4483                 return KERN_NO_SPACE;
4484         /*
4485          * determine need for wakeups
4486          */
4487         if ((vm_page_free_count < vm_page_free_min) ||
4488              ((vm_page_free_count < vm_page_free_target) &&
4489               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4490                  thread_wakeup((event_t) &vm_page_free_wanted);
4491
4492         VM_CHECK_MEMORYSTATUS;
4493
4494         /*
4495          *      The CPM pages should now be available and
4496          *      ordered by ascending physical address.
4497          */
4498         assert(vm_page_verify_contiguous(pages, npages));
4499
4500         *list = pages;
4501         return KERN_SUCCESS;
4502 }
4503
4504
4505 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4506
4507 /*
4508  * when working on a 'run' of pages, it is necessary to hold
4509  * the vm_page_queue_lock (a hot global lock) for certain operations
4510  * on the page... however, the majority of the work can be done
4511  * while merely holding the object lock... in fact there are certain
4512  * collections of pages that don't require any work brokered by the
4513  * vm_page_queue_lock... to mitigate the time spent behind the global
4514  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4515  * while doing all of the work that doesn't require the vm_page_queue_lock...
4516  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4517  * necessary work for each page... we will grab the busy bit on the page
4518  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4519  * if it can't immediately take the vm_page_queue_lock in order to compete
4520  * for the locks in the same order that vm_pageout_scan takes them.
4521  * the operation names are modeled after the names of the routines that
4522  * need to be called in order to make the changes very obvious in the
4523  * original loop
4524  */
4525
4526 void
4527 vm_page_do_delayed_work(
4528         vm_object_t     object,
4529         struct vm_page_delayed_work *dwp,
4530         int             dw_count)
4531 {
4532         int             j;
4533         vm_page_t       m;
4534         vm_page_t       local_free_q = VM_PAGE_NULL;
4535
4536         /*
4537          * pageout_scan takes the vm_page_lock_queues first
4538          * then tries for the object lock... to avoid what
4539          * is effectively a lock inversion, we'll go to the
4540          * trouble of taking them in that same order... otherwise
4541          * if this object contains the majority of the pages resident
4542          * in the UBC (or a small set of large objects actively being
4543          * worked on contain the majority of the pages), we could
4544          * cause the pageout_scan thread to 'starve' in its attempt
4545          * to find pages to move to the free queue, since it has to
4546          * successfully acquire the object lock of any candidate page
4547          * before it can steal/clean it.
4548          */
4549         if (!vm_page_trylockspin_queues()) {
4550                 vm_object_unlock(object);
4551
4552                 vm_page_lockspin_queues();
4553
4554                 for (j = 0; ; j++) {
4555                         if (!vm_object_lock_avoid(object) &&
4556                             _vm_object_lock_try(object))
4557                                 break;
4558                         vm_page_unlock_queues();
4559                         mutex_pause(j);
4560                         vm_page_lockspin_queues();
4561                 }
4562         }
4563         for (j = 0; j < dw_count; j++, dwp++) {
4564
4565                 m = dwp->dw_m;
4566
4567                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4568                         vm_pageout_throttle_up(m);
4569
4570                 if (dwp->dw_mask & DW_vm_page_wire)
4571                         vm_page_wire(m);
4572                 else if (dwp->dw_mask & DW_vm_page_unwire) {
4573                         boolean_t       queueit;
4574
4575                         queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4576
4577                         vm_page_unwire(m, queueit);
4578                 }
4579                 if (dwp->dw_mask & DW_vm_page_free) {
4580                         vm_page_free_prepare_queues(m);
4581
4582                         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4583                         /*
4584                          * Add this page to our list of reclaimed pages,
4585                          * to be freed later.
4586                          */
4587                         m->pageq.next = (queue_entry_t) local_free_q;
4588                         local_free_q = m;
4589                 } else {
4590                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4591                                 vm_page_deactivate_internal(m, FALSE);
4592                         else if (dwp->dw_mask & DW_vm_page_activate) {
4593                                 if (m->active == FALSE) {
4594                                         vm_page_activate(m);
4595                                 }
4596                         }
4597                         else if (dwp->dw_mask & DW_vm_page_speculate)
4598                                 vm_page_speculate(m, TRUE);
4599                         else if (dwp->dw_mask & DW_enqueue_cleaned) {
4600                                 /*
4601                                  * if we didn't hold the object lock and did this,
4602                                  * we might disconnect the page, then someone might
4603                                  * soft fault it back in, then we would put it on the
4604                                  * cleaned queue, and so we would have a referenced (maybe even dirty)
4605                                  * page on that queue, which we don't want
4606                                  */
4607                                 int refmod_state = pmap_disconnect(m->phys_page);
4608
4609                                 if ((refmod_state & VM_MEM_REFERENCED)) {
4610                                         /*
4611                                          * this page has been touched since it got cleaned; let's activate it
4612                                          * if it hasn't already been
4613                                          */
4614                                         vm_pageout_enqueued_cleaned++;
4615                                         vm_pageout_cleaned_reactivated++;
4616                                         vm_pageout_cleaned_commit_reactivated++;
4617
4618                                         if (m->active == FALSE)
4619                                                 vm_page_activate(m);
4620                                 } else {
4621                                         m->reference = FALSE;
4622                                         vm_page_enqueue_cleaned(m);
4623                                 }
4624                         }
4625                         else if (dwp->dw_mask & DW_vm_page_lru)
4626                                 vm_page_lru(m);
4627                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4628                                 if ( !m->pageout_queue)
4629                                         VM_PAGE_QUEUES_REMOVE(m);
4630                         }
4631                         if (dwp->dw_mask & DW_set_reference)
4632                                 m->reference = TRUE;
4633                         else if (dwp->dw_mask & DW_clear_reference)
4634                                 m->reference = FALSE;
4635
4636                         if (dwp->dw_mask & DW_move_page) {
4637                                 if ( !m->pageout_queue) {
4638                                         VM_PAGE_QUEUES_REMOVE(m);
4639
4640                                         assert(m->object != kernel_object);
4641
4642                                         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4643                                 }
4644                         }
4645                         if (dwp->dw_mask & DW_clear_busy)
4646                                 m->busy = FALSE;
4647
4648                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
4649                                 PAGE_WAKEUP(m);
4650                 }
4651         }
4652         vm_page_unlock_queues();
4653
4654         if (local_free_q)
4655                 vm_page_free_list(local_free_q, TRUE);
4656
4657         VM_CHECK_MEMORYSTATUS;
4658
4659 }
4660
4661 kern_return_t
4662 vm_page_alloc_list(
4663         int     page_count,
4664         int     flags,
4665         vm_page_t *list)
4666 {
4667         vm_page_t       lo_page_list = VM_PAGE_NULL;
4668         vm_page_t       mem;
4669         int             i;
4670
4671         if ( !(flags & KMA_LOMEM))
4672                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4673
4674         for (i = 0; i < page_count; i++) {
4675
4676                 mem = vm_page_grablo();
4677
4678                 if (mem == VM_PAGE_NULL) {
4679                         if (lo_page_list)
4680                                 vm_page_free_list(lo_page_list, FALSE);
4681
4682                         *list = VM_PAGE_NULL;
4683
4684                         return (KERN_RESOURCE_SHORTAGE);
4685                 }
4686                 mem->pageq.next = (queue_entry_t) lo_page_list;
4687                 lo_page_list = mem;
4688         }
4689         *list = lo_page_list;
4690
4691         return (KERN_SUCCESS);
4692 }
4693
4694 void
4695 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4696 {
4697         page->offset = offset;
4698 }
4699
4700 vm_page_t
4701 vm_page_get_next(vm_page_t page)
4702 {
4703         return ((vm_page_t) page->pageq.next);
4704 }
4705
4706 vm_object_offset_t
4707 vm_page_get_offset(vm_page_t page)
4708 {
4709         return (page->offset);
4710 }
4711
4712 ppnum_t
4713 vm_page_get_phys_page(vm_page_t page)
4714 {
4715         return (page->phys_page);
4716 }
4717
4718
4719 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4720
4721 #if HIBERNATION
4722
4723 static vm_page_t hibernate_gobble_queue;
4724
4725 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4726
4727 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4728 static int  hibernate_flush_dirty_pages(int);
4729 static int  hibernate_flush_queue(queue_head_t *, int);
4730
4731 void hibernate_flush_wait(void);
4732 void hibernate_mark_in_progress(void);
4733 void hibernate_clear_in_progress(void);
4734
4735 void            hibernate_free_range(int, int);
4736 void            hibernate_hash_insert_page(vm_page_t);
4737 uint32_t        hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4738 void            hibernate_rebuild_vm_structs(void);
4739 uint32_t        hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4740 ppnum_t         hibernate_lookup_paddr(unsigned int);
4741
4742 struct hibernate_statistics {
4743         int hibernate_considered;
4744         int hibernate_reentered_on_q;
4745         int hibernate_found_dirty;
4746         int hibernate_skipped_cleaning;
4747         int hibernate_skipped_transient;
4748         int hibernate_skipped_precious;
4749         int hibernate_skipped_external;
4750         int hibernate_queue_nolock;
4751         int hibernate_queue_paused;
4752         int hibernate_throttled;
4753         int hibernate_throttle_timeout;
4754         int hibernate_drained;
4755         int hibernate_drain_timeout;
4756         int cd_lock_failed;
4757         int cd_found_precious;
4758         int cd_found_wired;
4759         int cd_found_busy;
4760         int cd_found_unusual;
4761         int cd_found_cleaning;
4762         int cd_found_laundry;
4763         int cd_found_dirty;
4764         int cd_found_xpmapped;
4765         int cd_local_free;
4766         int cd_total_free;
4767         int cd_vm_page_wire_count;
4768         int cd_vm_struct_pages_unneeded;
4769         int cd_pages;
4770         int cd_discarded;
4771         int cd_count_wire;
4772 } hibernate_stats;
4773
4774
4775
4776 static int
4777 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4778 {
4779         wait_result_t   wait_result;
4780
4781         vm_page_lock_queues();
4782
4783         while ( !queue_empty(&q->pgo_pending) ) {
4784
4785                 q->pgo_draining = TRUE;
4786
4787                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4788
4789                 vm_page_unlock_queues();
4790
4791                 wait_result = thread_block(THREAD_CONTINUE_NULL);
4792
4793                 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
4794                         hibernate_stats.hibernate_drain_timeout++;
4795
4796                         if (q == &vm_pageout_queue_external)
4797                                 return (0);
4798
4799                         return (1);
4800                 }
4801                 vm_page_lock_queues();
4802
4803                 hibernate_stats.hibernate_drained++;
4804         }
4805         vm_page_unlock_queues();
4806
4807         return (0);
4808 }
4809
4810
4811 boolean_t hibernate_skip_external = FALSE;
4812
4813 static int
4814 hibernate_flush_queue(queue_head_t *q, int qcount)
4815 {
4816         vm_page_t       m;
4817         vm_object_t     l_object = NULL;
4818         vm_object_t     m_object = NULL;
4819         int             refmod_state = 0;
4820         int             try_failed_count = 0;
4821         int             retval = 0;
4822         int             current_run = 0;
4823         struct  vm_pageout_queue *iq;
4824         struct  vm_pageout_queue *eq;
4825         struct  vm_pageout_queue *tq;
4826
4827
4828         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4829
4830         iq = &vm_pageout_queue_internal;
4831         eq = &vm_pageout_queue_external;
4832
4833         vm_page_lock_queues();
4834
4835         while (qcount && !queue_empty(q)) {
4836
4837                 if (current_run++ == 1000) {
4838                         if (hibernate_should_abort()) {
4839                                 retval = 1;
4840                                 break;
4841                         }
4842                         current_run = 0;
4843                 }
4844
4845                 m = (vm_page_t) queue_first(q);
4846                 m_object = m->object;
4847
4848                 /*
4849                  * check to see if we currently are working
4850                  * with the same object... if so, we've
4851                  * already got the lock
4852                  */
4853                 if (m_object != l_object) {
4854                         /*
4855                          * the object associated with candidate page is
4856                          * different from the one we were just working
4857                          * with... dump the lock if we still own it
4858                          */
4859                         if (l_object != NULL) {
4860                                 vm_object_unlock(l_object);
4861                                 l_object = NULL;
4862                         }
4863                         /*
4864                          * Try to lock object; since we've alread got the
4865                          * page queues lock, we can only 'try' for this one.
4866                          * if the 'try' fails, we need to do a mutex_pause
4867                          * to allow the owner of the object lock a chance to
4868                          * run...
4869                          */
4870                         if ( !vm_object_lock_try_scan(m_object)) {
4871
4872                                 if (try_failed_count > 20) {
4873                                         hibernate_stats.hibernate_queue_nolock++;
4874
4875                                         goto reenter_pg_on_q;
4876                                 }
4877                                 vm_pageout_scan_wants_object = m_object;
4878
4879                                 vm_page_unlock_queues();
4880                                 mutex_pause(try_failed_count++);
4881                                 vm_page_lock_queues();
4882
4883                                 hibernate_stats.hibernate_queue_paused++;
4884                                 continue;
4885                         } else {
4886                                 l_object = m_object;
4887                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4888                         }
4889                 }
4890                 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
4891                         /*
4892                          * page is not to be cleaned
4893                          * put it back on the head of its queue
4894                          */
4895                         if (m->cleaning)
4896                                 hibernate_stats.hibernate_skipped_cleaning++;
4897                         else
4898                                 hibernate_stats.hibernate_skipped_transient++;
4899
4900                         goto reenter_pg_on_q;
4901                 }
4902                 if (m_object->copy == VM_OBJECT_NULL) {
4903                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4904                                 /*
4905                                  * let the normal hibernate image path
4906                                  * deal with these
4907                                  */
4908                                 goto reenter_pg_on_q;
4909                         }
4910                 }
4911                 if ( !m->dirty && m->pmapped) {
4912                         refmod_state = pmap_get_refmod(m->phys_page);
4913
4914                         if ((refmod_state & VM_MEM_MODIFIED)) {
4915                                 SET_PAGE_DIRTY(m, FALSE);
4916                         }
4917                 } else
4918                         refmod_state = 0;
4919
4920                 if ( !m->dirty) {
4921                         /*
4922                          * page is not to be cleaned
4923                          * put it back on the head of its queue
4924                          */
4925                         if (m->precious)
4926                                 hibernate_stats.hibernate_skipped_precious++;
4927
4928                         goto reenter_pg_on_q;
4929                 }
4930
4931                 if (hibernate_skip_external == TRUE && !m_object->internal) {
4932
4933                         hibernate_stats.hibernate_skipped_external++;
4934
4935                         goto reenter_pg_on_q;
4936                 }
4937                 tq = NULL;
4938
4939                 if (m_object->internal) {
4940                         if (VM_PAGE_Q_THROTTLED(iq))
4941                                 tq = iq;
4942                 } else if (VM_PAGE_Q_THROTTLED(eq))
4943                         tq = eq;
4944
4945                 if (tq != NULL) {
4946                         wait_result_t   wait_result;
4947                         int             wait_count = 5;
4948
4949                         if (l_object != NULL) {
4950                                 vm_object_unlock(l_object);
4951                                 l_object = NULL;
4952                         }
4953                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4954
4955                         while (retval == 0) {
4956
4957                                 tq->pgo_throttled = TRUE;
4958
4959                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4960
4961                                 vm_page_unlock_queues();
4962
4963                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
4964
4965                                 vm_page_lock_queues();
4966
4967                                 if (wait_result != THREAD_TIMED_OUT)
4968                                         break;
4969                                 if (!VM_PAGE_Q_THROTTLED(tq))
4970                                         break;
4971
4972                                 if (hibernate_should_abort())
4973                                         retval = 1;
4974
4975                                 if (--wait_count == 0) {
4976
4977                                         hibernate_stats.hibernate_throttle_timeout++;
4978
4979                                         if (tq == eq) {
4980                                                 hibernate_skip_external = TRUE;
4981                                                 break;
4982                                         }
4983                                         retval = 1;
4984                                 }
4985                         }
4986                         if (retval)
4987                                 break;
4988
4989                         hibernate_stats.hibernate_throttled++;
4990
4991                         continue;
4992                 }
4993                 /*
4994                  * we've already factored out pages in the laundry which
4995                  * means this page can't be on the pageout queue so it's
4996                  * safe to do the VM_PAGE_QUEUES_REMOVE
4997                  */
4998                 assert(!m->pageout_queue);
4999
5000                 VM_PAGE_QUEUES_REMOVE(m);
5001
5002                 if (COMPRESSED_PAGER_IS_ACTIVE)
5003                         pmap_disconnect(m->phys_page);
5004
5005                 vm_pageout_cluster(m, FALSE);
5006
5007                 hibernate_stats.hibernate_found_dirty++;
5008
5009                 goto next_pg;
5010
5011 reenter_pg_on_q:
5012                 queue_remove(q, m, vm_page_t, pageq);
5013                 queue_enter(q, m, vm_page_t, pageq);
5014
5015                 hibernate_stats.hibernate_reentered_on_q++;
5016 next_pg:
5017                 hibernate_stats.hibernate_considered++;
5018
5019                 qcount--;
5020                 try_failed_count = 0;
5021         }
5022         if (l_object != NULL) {
5023                 vm_object_unlock(l_object);
5024                 l_object = NULL;
5025         }
5026         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5027
5028         vm_page_unlock_queues();
5029
5030         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5031
5032         return (retval);
5033 }
5034
5035
5036 static int
5037 hibernate_flush_dirty_pages(int pass)
5038 {
5039         struct vm_speculative_age_q     *aq;
5040         uint32_t        i;
5041
5042         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5043
5044         if (vm_page_local_q) {
5045                 for (i = 0; i < vm_page_local_q_count; i++)
5046                         vm_page_reactivate_local(i, TRUE, FALSE);
5047         }
5048
5049         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5050                 int             qcount;
5051                 vm_page_t       m;
5052
5053                 aq = &vm_page_queue_speculative[i];
5054
5055                 if (queue_empty(&aq->age_q))
5056                         continue;
5057                 qcount = 0;
5058
5059                 vm_page_lockspin_queues();
5060
5061                 queue_iterate(&aq->age_q,
5062                               m,
5063                               vm_page_t,
5064                               pageq)
5065                 {
5066                         qcount++;
5067                 }
5068                 vm_page_unlock_queues();
5069
5070                 if (qcount) {
5071                         if (hibernate_flush_queue(&aq->age_q, qcount))
5072                                 return (1);
5073                 }
5074         }
5075         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5076                 return (1);
5077         if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5078                 return (1);
5079         if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5080                 return (1);
5081         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5082                 return (1);
5083
5084         if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5085                 vm_compressor_record_warmup_start();
5086
5087         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5088                 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5089                         vm_compressor_record_warmup_end();
5090                 return (1);
5091         }
5092         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5093                 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5094                         vm_compressor_record_warmup_end();
5095                 return (1);
5096         }
5097         if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5098                 vm_compressor_record_warmup_end();
5099
5100         if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5101                 return (1);
5102
5103         return (0);
5104 }
5105
5106
5107 int
5108 hibernate_flush_memory()
5109 {
5110         int     retval;
5111
5112         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5113
5114         hibernate_cleaning_in_progress = TRUE;
5115         hibernate_skip_external = FALSE;
5116
5117         if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5118
5119                 if (COMPRESSED_PAGER_IS_ACTIVE) {
5120
5121                         if ((retval = hibernate_flush_dirty_pages(2)) == 0) {
5122
5123                                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5124
5125                                 vm_compressor_flush();
5126
5127                                 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5128                         }
5129                 }
5130                 if (retval == 0 && consider_buffer_cache_collect != NULL) {
5131                         unsigned int orig_wire_count;
5132
5133                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5134                         orig_wire_count = vm_page_wire_count;
5135
5136                         (void)(*consider_buffer_cache_collect)(1);
5137                         consider_zone_gc(TRUE);
5138
5139                         HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5140
5141                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5142                 }
5143         }
5144         hibernate_cleaning_in_progress = FALSE;
5145
5146         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5147
5148         if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5149                 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5150
5151
5152     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5153                 hibernate_stats.hibernate_considered,
5154                 hibernate_stats.hibernate_reentered_on_q,
5155                 hibernate_stats.hibernate_found_dirty);
5156     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5157                 hibernate_stats.hibernate_skipped_cleaning,
5158                 hibernate_stats.hibernate_skipped_transient,
5159                 hibernate_stats.hibernate_skipped_precious,
5160                 hibernate_stats.hibernate_skipped_external,
5161                 hibernate_stats.hibernate_queue_nolock);
5162     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5163                 hibernate_stats.hibernate_queue_paused,
5164                 hibernate_stats.hibernate_throttled,
5165                 hibernate_stats.hibernate_throttle_timeout,
5166                 hibernate_stats.hibernate_drained,
5167                 hibernate_stats.hibernate_drain_timeout);
5168
5169         return (retval);
5170 }
5171
5172
5173 static void
5174 hibernate_page_list_zero(hibernate_page_list_t *list)
5175 {
5176     uint32_t             bank;
5177     hibernate_bitmap_t * bitmap;
5178
5179     bitmap = &list->bank_bitmap[0];
5180     for (bank = 0; bank < list->bank_count; bank++)
5181     {
5182         uint32_t last_bit;
5183
5184         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5185         // set out-of-bound bits at end of bitmap.
5186         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5187         if (last_bit)
5188             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5189
5190         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5191     }
5192 }
5193
5194 void
5195 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5196 {
5197     uint32_t i;
5198     vm_page_t m;
5199     uint64_t start, end, timeout, nsec;
5200     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5201     clock_get_uptime(&start);
5202
5203     for (i = 0; i < gobble_count; i++)
5204     {
5205         while (VM_PAGE_NULL == (m = vm_page_grab()))
5206         {
5207             clock_get_uptime(&end);
5208             if (end >= timeout)
5209                 break;
5210             VM_PAGE_WAIT();
5211         }
5212         if (!m)
5213             break;
5214         m->busy = FALSE;
5215         vm_page_gobble(m);
5216
5217         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5218         hibernate_gobble_queue = m;
5219     }
5220
5221     clock_get_uptime(&end);
5222     absolutetime_to_nanoseconds(end - start, &nsec);
5223     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5224 }
5225
5226 void
5227 hibernate_free_gobble_pages(void)
5228 {
5229     vm_page_t m, next;
5230     uint32_t  count = 0;
5231
5232     m = (vm_page_t) hibernate_gobble_queue;
5233     while(m)
5234     {
5235         next = (vm_page_t) m->pageq.next;
5236         vm_page_free(m);
5237         count++;
5238         m = next;
5239     }
5240     hibernate_gobble_queue = VM_PAGE_NULL;
5241
5242     if (count)
5243         HIBLOG("Freed %d pages\n", count);
5244 }
5245
5246 static boolean_t
5247 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5248 {
5249     vm_object_t object = NULL;
5250     int                  refmod_state;
5251     boolean_t            discard = FALSE;
5252
5253     do
5254     {
5255         if (m->private)
5256             panic("hibernate_consider_discard: private");
5257
5258         if (!vm_object_lock_try(m->object)) {
5259             if (!preflight) hibernate_stats.cd_lock_failed++;
5260             break;
5261         }
5262         object = m->object;
5263
5264         if (VM_PAGE_WIRED(m)) {
5265             if (!preflight) hibernate_stats.cd_found_wired++;
5266             break;
5267         }
5268         if (m->precious) {
5269             if (!preflight) hibernate_stats.cd_found_precious++;
5270             break;
5271         }
5272         if (m->busy || !object->alive) {
5273            /*
5274             *   Somebody is playing with this page.
5275             */
5276             if (!preflight) hibernate_stats.cd_found_busy++;
5277             break;
5278         }
5279         if (m->absent || m->unusual || m->error) {
5280            /*
5281             * If it's unusual in anyway, ignore it
5282             */
5283             if (!preflight) hibernate_stats.cd_found_unusual++;
5284             break;
5285         }
5286         if (m->cleaning) {
5287             if (!preflight) hibernate_stats.cd_found_cleaning++;
5288             break;
5289         }
5290         if (m->laundry) {
5291             if (!preflight) hibernate_stats.cd_found_laundry++;
5292             break;
5293         }
5294         if (!m->dirty)
5295         {
5296             refmod_state = pmap_get_refmod(m->phys_page);
5297
5298             if (refmod_state & VM_MEM_REFERENCED)
5299                 m->reference = TRUE;
5300             if (refmod_state & VM_MEM_MODIFIED) {
5301                 SET_PAGE_DIRTY(m, FALSE);
5302             }
5303         }
5304
5305         /*
5306          * If it's clean or purgeable we can discard the page on wakeup.
5307          */
5308         discard = (!m->dirty)
5309                     || (VM_PURGABLE_VOLATILE == object->purgable)
5310                     || (VM_PURGABLE_EMPTY    == object->purgable);
5311
5312
5313         if (discard == FALSE) {
5314                 if (!preflight)
5315                         hibernate_stats.cd_found_dirty++;
5316         } else if (m->xpmapped && m->reference) {
5317                 if (!preflight)
5318                         hibernate_stats.cd_found_xpmapped++;
5319                 discard = FALSE;
5320         }
5321     }
5322     while (FALSE);
5323
5324     if (object)
5325         vm_object_unlock(object);
5326
5327     return (discard);
5328 }
5329
5330
5331 static void
5332 hibernate_discard_page(vm_page_t m)
5333 {
5334     if (m->absent || m->unusual || m->error)
5335        /*
5336         * If it's unusual in anyway, ignore
5337         */
5338         return;
5339
5340 #if DEBUG
5341     vm_object_t object = m->object;
5342     if (!vm_object_lock_try(m->object))
5343         panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5344 #else
5345     /* No need to lock page queue for token delete, hibernate_vm_unlock()
5346        makes sure these locks are uncontended before sleep */
5347 #endif  /* !DEBUG */
5348
5349     if (m->pmapped == TRUE)
5350     {
5351         __unused int refmod_state = pmap_disconnect(m->phys_page);
5352     }
5353
5354     if (m->laundry)
5355         panic("hibernate_discard_page(%p) laundry", m);
5356     if (m->private)
5357         panic("hibernate_discard_page(%p) private", m);
5358     if (m->fictitious)
5359         panic("hibernate_discard_page(%p) fictitious", m);
5360
5361     if (VM_PURGABLE_VOLATILE == m->object->purgable)
5362     {
5363         /* object should be on a queue */
5364         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5365         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5366         assert(old_queue);
5367         if (m->object->purgeable_when_ripe) {
5368                 vm_purgeable_token_delete_first(old_queue);
5369         }
5370         m->object->purgable = VM_PURGABLE_EMPTY;
5371     }
5372
5373     vm_page_free(m);
5374
5375 #if DEBUG
5376     vm_object_unlock(object);
5377 #endif  /* DEBUG */
5378 }
5379
5380 /*
5381  Grab locks for hibernate_page_list_setall()
5382 */
5383 void
5384 hibernate_vm_lock_queues(void)
5385 {
5386     vm_object_lock(compressor_object);
5387     vm_page_lock_queues();
5388     lck_mtx_lock(&vm_page_queue_free_lock);
5389
5390     if (vm_page_local_q) {
5391         uint32_t  i;
5392         for (i = 0; i < vm_page_local_q_count; i++) {
5393             struct vpl  *lq;
5394             lq = &vm_page_local_q[i].vpl_un.vpl;
5395             VPL_LOCK(&lq->vpl_lock);
5396         }
5397     }
5398 }
5399
5400 void
5401 hibernate_vm_unlock_queues(void)
5402 {
5403     if (vm_page_local_q) {
5404         uint32_t  i;
5405         for (i = 0; i < vm_page_local_q_count; i++) {
5406             struct vpl  *lq;
5407             lq = &vm_page_local_q[i].vpl_un.vpl;
5408             VPL_UNLOCK(&lq->vpl_lock);
5409         }
5410     }
5411     lck_mtx_unlock(&vm_page_queue_free_lock);
5412     vm_page_unlock_queues();
5413     vm_object_unlock(compressor_object);
5414 }
5415
5416 /*
5417  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5418  pages known to VM to not need saving are subtracted.
5419  Wired pages to be saved are present in page_list_wired, pageable in page_list.
5420 */
5421
5422 void
5423 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5424                            hibernate_page_list_t * page_list_wired,
5425                            hibernate_page_list_t * page_list_pal,
5426                            boolean_t preflight,
5427                            boolean_t will_discard,
5428                            uint32_t * pagesOut)
5429 {
5430     uint64_t start, end, nsec;
5431     vm_page_t m;
5432     vm_page_t next;
5433     uint32_t pages = page_list->page_count;
5434     uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5435     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5436     uint32_t count_wire = pages;
5437     uint32_t count_discard_active    = 0;
5438     uint32_t count_discard_inactive  = 0;
5439     uint32_t count_discard_cleaned   = 0;
5440     uint32_t count_discard_purgeable = 0;
5441     uint32_t count_discard_speculative = 0;
5442     uint32_t count_discard_vm_struct_pages = 0;
5443     uint32_t i;
5444     uint32_t             bank;
5445     hibernate_bitmap_t * bitmap;
5446     hibernate_bitmap_t * bitmap_wired;
5447     boolean_t                    discard_all;
5448     boolean_t            discard;
5449
5450     HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5451
5452     if (preflight) {
5453         page_list       = NULL;
5454         page_list_wired = NULL;
5455         page_list_pal   = NULL;
5456                 discard_all     = FALSE;
5457     } else {
5458                 discard_all     = will_discard;
5459     }
5460
5461 #if DEBUG
5462     if (!preflight)
5463     {
5464         vm_page_lock_queues();
5465         if (vm_page_local_q) {
5466             for (i = 0; i < vm_page_local_q_count; i++) {
5467                 struct vpl      *lq;
5468                 lq = &vm_page_local_q[i].vpl_un.vpl;
5469                 VPL_LOCK(&lq->vpl_lock);
5470             }
5471         }
5472     }
5473 #endif  /* DEBUG */
5474
5475
5476     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5477
5478     clock_get_uptime(&start);
5479
5480     if (!preflight) {
5481         hibernate_page_list_zero(page_list);
5482         hibernate_page_list_zero(page_list_wired);
5483         hibernate_page_list_zero(page_list_pal);
5484
5485         hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5486         hibernate_stats.cd_pages = pages;
5487     }
5488
5489     if (vm_page_local_q) {
5490             for (i = 0; i < vm_page_local_q_count; i++)
5491                     vm_page_reactivate_local(i, TRUE, !preflight);
5492     }
5493
5494     if (preflight) {
5495         vm_object_lock(compressor_object);
5496         vm_page_lock_queues();
5497         lck_mtx_lock(&vm_page_queue_free_lock);
5498     }
5499
5500     m = (vm_page_t) hibernate_gobble_queue;
5501     while (m)
5502     {
5503         pages--;
5504         count_wire--;
5505         if (!preflight) {
5506             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5507             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5508         }
5509         m = (vm_page_t) m->pageq.next;
5510     }
5511
5512     if (!preflight) for( i = 0; i < real_ncpus; i++ )
5513     {
5514         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5515         {
5516             for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5517             {
5518                 pages--;
5519                 count_wire--;
5520                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5521                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5522
5523                 hibernate_stats.cd_local_free++;
5524                 hibernate_stats.cd_total_free++;
5525             }
5526         }
5527     }
5528
5529     for( i = 0; i < vm_colors; i++ )
5530     {
5531         queue_iterate(&vm_page_queue_free[i],
5532                       m,
5533                       vm_page_t,
5534                       pageq)
5535         {
5536             pages--;
5537             count_wire--;
5538             if (!preflight) {
5539                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5540                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5541
5542                 hibernate_stats.cd_total_free++;
5543             }
5544         }
5545     }
5546
5547     queue_iterate(&vm_lopage_queue_free,
5548                   m,
5549                   vm_page_t,
5550                   pageq)
5551     {
5552         pages--;
5553         count_wire--;
5554         if (!preflight) {
5555             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5556             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5557
5558             hibernate_stats.cd_total_free++;
5559         }
5560     }
5561
5562     m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5563     while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5564     {
5565         next = (vm_page_t) m->pageq.next;
5566         discard = FALSE;
5567         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5568          && hibernate_consider_discard(m, preflight))
5569         {
5570             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5571             count_discard_inactive++;
5572             discard = discard_all;
5573         }
5574         else
5575             count_throttled++;
5576         count_wire--;
5577         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5578
5579         if (discard) hibernate_discard_page(m);
5580         m = next;
5581     }
5582
5583     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5584     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5585     {
5586         next = (vm_page_t) m->pageq.next;
5587         discard = FALSE;
5588         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5589          && hibernate_consider_discard(m, preflight))
5590         {
5591             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5592             if (m->dirty)
5593                 count_discard_purgeable++;
5594             else
5595                 count_discard_inactive++;
5596             discard = discard_all;
5597         }
5598         else
5599             count_anonymous++;
5600         count_wire--;
5601         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5602         if (discard)    hibernate_discard_page(m);
5603         m = next;
5604     }
5605
5606     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5607     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5608     {
5609         next = (vm_page_t) m->pageq.next;
5610         discard = FALSE;
5611         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5612          && hibernate_consider_discard(m, preflight))
5613         {
5614             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5615             if (m->dirty)
5616                 count_discard_purgeable++;
5617             else
5618                 count_discard_inactive++;
5619             discard = discard_all;
5620         }
5621         else
5622             count_inactive++;
5623         count_wire--;
5624         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5625         if (discard)    hibernate_discard_page(m);
5626         m = next;
5627     }
5628
5629     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5630     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5631     {
5632         next = (vm_page_t) m->pageq.next;
5633         discard = FALSE;
5634         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5635          && hibernate_consider_discard(m, preflight))
5636         {
5637             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5638             if (m->dirty)
5639                 count_discard_purgeable++;
5640             else
5641                 count_discard_cleaned++;
5642             discard = discard_all;
5643         }
5644         else
5645             count_cleaned++;
5646         count_wire--;
5647         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5648         if (discard)    hibernate_discard_page(m);
5649         m = next;
5650     }
5651
5652     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5653     {
5654         m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5655         while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5656         {
5657             next = (vm_page_t) m->pageq.next;
5658             discard = FALSE;
5659             if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5660              && hibernate_consider_discard(m, preflight))
5661             {
5662                 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5663                 count_discard_speculative++;
5664                 discard = discard_all;
5665             }
5666             else
5667                 count_speculative++;
5668             count_wire--;
5669             if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5670             if (discard)    hibernate_discard_page(m);
5671             m = next;
5672         }
5673     }
5674
5675     m = (vm_page_t) queue_first(&vm_page_queue_active);
5676     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5677     {
5678         next = (vm_page_t) m->pageq.next;
5679         discard = FALSE;
5680         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5681          && hibernate_consider_discard(m, preflight))
5682         {
5683             if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5684             if (m->dirty)
5685                 count_discard_purgeable++;
5686             else
5687                 count_discard_active++;
5688             discard = discard_all;
5689         }
5690         else
5691             count_active++;
5692         count_wire--;
5693         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5694         if (discard)    hibernate_discard_page(m);
5695         m = next;
5696     }
5697
5698     queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5699     {
5700         count_compressor++;
5701         count_wire--;
5702         if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5703     }
5704
5705     if (preflight == FALSE && discard_all == TRUE) {
5706             KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5707
5708             HIBLOG("hibernate_teardown started\n");
5709             count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5710             HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5711
5712             pages -= count_discard_vm_struct_pages;
5713             count_wire -= count_discard_vm_struct_pages;
5714
5715             hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5716
5717             KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5718     }
5719
5720     if (!preflight) {
5721         // pull wired from hibernate_bitmap
5722         bitmap = &page_list->bank_bitmap[0];
5723         bitmap_wired = &page_list_wired->bank_bitmap[0];
5724         for (bank = 0; bank < page_list->bank_count; bank++)
5725         {
5726             for (i = 0; i < bitmap->bitmapwords; i++)
5727                 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5728             bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5729             bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5730         }
5731     }
5732
5733     // machine dependent adjustments
5734     hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
5735
5736     if (!preflight) {
5737         hibernate_stats.cd_count_wire = count_wire;
5738         hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
5739                 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
5740     }
5741
5742     clock_get_uptime(&end);
5743     absolutetime_to_nanoseconds(end - start, &nsec);
5744     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5745
5746     HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n  %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5747            pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
5748                 discard_all ? "did" : "could",
5749                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5750
5751     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5752
5753     if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
5754
5755 #if DEBUG
5756     if (!preflight)
5757     {
5758         if (vm_page_local_q) {
5759             for (i = 0; i < vm_page_local_q_count; i++) {
5760                 struct vpl      *lq;
5761                 lq = &vm_page_local_q[i].vpl_un.vpl;
5762                 VPL_UNLOCK(&lq->vpl_lock);
5763             }
5764         }
5765         vm_page_unlock_queues();
5766     }
5767 #endif  /* DEBUG */
5768
5769     if (preflight) {
5770         lck_mtx_unlock(&vm_page_queue_free_lock);
5771         vm_page_unlock_queues();
5772         vm_object_unlock(compressor_object);
5773     }
5774
5775     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5776 }
5777
5778 void
5779 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5780 {
5781     uint64_t  start, end, nsec;
5782     vm_page_t m;
5783     vm_page_t next;
5784     uint32_t  i;
5785     uint32_t  count_discard_active    = 0;
5786     uint32_t  count_discard_inactive  = 0;
5787     uint32_t  count_discard_purgeable = 0;
5788     uint32_t  count_discard_cleaned   = 0;
5789     uint32_t  count_discard_speculative = 0;
5790
5791
5792 #if DEBUG
5793         vm_page_lock_queues();
5794         if (vm_page_local_q) {
5795             for (i = 0; i < vm_page_local_q_count; i++) {
5796                 struct vpl      *lq;
5797                 lq = &vm_page_local_q[i].vpl_un.vpl;
5798                 VPL_LOCK(&lq->vpl_lock);
5799             }
5800         }
5801 #endif  /* DEBUG */
5802
5803     clock_get_uptime(&start);
5804
5805     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5806     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5807     {
5808         next = (vm_page_t) m->pageq.next;
5809         if (hibernate_page_bittst(page_list, m->phys_page))
5810         {
5811             if (m->dirty)
5812                 count_discard_purgeable++;
5813             else
5814                 count_discard_inactive++;
5815             hibernate_discard_page(m);
5816         }
5817         m = next;
5818     }
5819
5820     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5821     {
5822        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5823        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5824        {
5825            next = (vm_page_t) m->pageq.next;
5826            if (hibernate_page_bittst(page_list, m->phys_page))
5827            {
5828                count_discard_speculative++;
5829                hibernate_discard_page(m);
5830            }
5831            m = next;
5832        }
5833     }
5834
5835     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5836     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5837     {
5838         next = (vm_page_t) m->pageq.next;
5839         if (hibernate_page_bittst(page_list, m->phys_page))
5840         {
5841             if (m->dirty)
5842                 count_discard_purgeable++;
5843             else
5844                 count_discard_inactive++;
5845             hibernate_discard_page(m);
5846         }
5847         m = next;
5848     }
5849
5850     m = (vm_page_t) queue_first(&vm_page_queue_active);
5851     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5852     {
5853         next = (vm_page_t) m->pageq.next;
5854         if (hibernate_page_bittst(page_list, m->phys_page))
5855         {
5856             if (m->dirty)
5857                 count_discard_purgeable++;
5858             else
5859                 count_discard_active++;
5860             hibernate_discard_page(m);
5861         }
5862         m = next;
5863     }
5864
5865     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5866     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5867     {
5868         next = (vm_page_t) m->pageq.next;
5869         if (hibernate_page_bittst(page_list, m->phys_page))
5870         {
5871             if (m->dirty)
5872                 count_discard_purgeable++;
5873             else
5874                 count_discard_cleaned++;
5875             hibernate_discard_page(m);
5876         }
5877         m = next;
5878     }
5879
5880 #if DEBUG
5881         if (vm_page_local_q) {
5882             for (i = 0; i < vm_page_local_q_count; i++) {
5883                 struct vpl      *lq;
5884                 lq = &vm_page_local_q[i].vpl_un.vpl;
5885                 VPL_UNLOCK(&lq->vpl_lock);
5886             }
5887         }
5888         vm_page_unlock_queues();
5889 #endif  /* DEBUG */
5890
5891     clock_get_uptime(&end);
5892     absolutetime_to_nanoseconds(end - start, &nsec);
5893     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5894                 nsec / 1000000ULL,
5895                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5896 }
5897
5898 boolean_t       hibernate_paddr_map_inited = FALSE;
5899 boolean_t       hibernate_rebuild_needed = FALSE;
5900 unsigned int    hibernate_teardown_last_valid_compact_indx = -1;
5901 vm_page_t       hibernate_rebuild_hash_list = NULL;
5902
5903 unsigned int    hibernate_teardown_found_tabled_pages = 0;
5904 unsigned int    hibernate_teardown_found_created_pages = 0;
5905 unsigned int    hibernate_teardown_found_free_pages = 0;
5906 unsigned int    hibernate_teardown_vm_page_free_count;
5907
5908
5909 struct ppnum_mapping {
5910         struct ppnum_mapping    *ppnm_next;
5911         ppnum_t                 ppnm_base_paddr;
5912         unsigned int            ppnm_sindx;
5913         unsigned int            ppnm_eindx;
5914 };
5915
5916 struct ppnum_mapping    *ppnm_head;
5917 struct ppnum_mapping    *ppnm_last_found = NULL;
5918
5919
5920 void
5921 hibernate_create_paddr_map()
5922 {
5923         unsigned int    i;
5924         ppnum_t         next_ppnum_in_run = 0;
5925         struct ppnum_mapping *ppnm = NULL;
5926
5927         if (hibernate_paddr_map_inited == FALSE) {
5928
5929                 for (i = 0; i < vm_pages_count; i++) {
5930
5931                         if (ppnm)
5932                                 ppnm->ppnm_eindx = i;
5933
5934                         if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
5935
5936                                 ppnm = kalloc(sizeof(struct ppnum_mapping));
5937
5938                                 ppnm->ppnm_next = ppnm_head;
5939                                 ppnm_head = ppnm;
5940
5941                                 ppnm->ppnm_sindx = i;
5942                                 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
5943                         }
5944                         next_ppnum_in_run = vm_pages[i].phys_page + 1;
5945                 }
5946                 ppnm->ppnm_eindx++;
5947
5948                 hibernate_paddr_map_inited = TRUE;
5949         }
5950 }
5951
5952 ppnum_t
5953 hibernate_lookup_paddr(unsigned int indx)
5954 {
5955         struct ppnum_mapping *ppnm = NULL;
5956
5957         ppnm = ppnm_last_found;
5958
5959         if (ppnm) {
5960                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
5961                         goto done;
5962         }
5963         for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
5964
5965                 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
5966                         ppnm_last_found = ppnm;
5967                         break;
5968                 }
5969         }
5970         if (ppnm == NULL)
5971                 panic("hibernate_lookup_paddr of %d failed\n", indx);
5972 done:
5973         return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
5974 }
5975
5976
5977 uint32_t
5978 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
5979 {
5980         addr64_t        saddr_aligned;
5981         addr64_t        eaddr_aligned;
5982         addr64_t        addr;
5983         ppnum_t         paddr;
5984         unsigned int    mark_as_unneeded_pages = 0;
5985
5986         saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
5987         eaddr_aligned = eaddr & ~PAGE_MASK_64;
5988
5989         for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
5990
5991                 paddr = pmap_find_phys(kernel_pmap, addr);
5992
5993                 assert(paddr);
5994
5995                 hibernate_page_bitset(page_list,       TRUE, paddr);
5996                 hibernate_page_bitset(page_list_wired, TRUE, paddr);
5997
5998                 mark_as_unneeded_pages++;
5999         }
6000         return (mark_as_unneeded_pages);
6001 }
6002
6003
6004 void
6005 hibernate_hash_insert_page(vm_page_t mem)
6006 {
6007         vm_page_bucket_t *bucket;
6008         int             hash_id;
6009
6010         assert(mem->tabled);
6011         assert(mem->object);
6012         assert(mem->offset != (vm_object_offset_t) -1);
6013
6014         /*
6015          *      Insert it into the object_object/offset hash table
6016          */
6017         hash_id = vm_page_hash(mem->object, mem->offset);
6018         bucket = &vm_page_buckets[hash_id];
6019
6020         mem->next = bucket->pages;
6021         bucket->pages = mem;
6022 }
6023
6024
6025 void
6026 hibernate_free_range(int sindx, int eindx)
6027 {
6028         vm_page_t       mem;
6029         unsigned int    color;
6030
6031         while (sindx < eindx) {
6032                 mem = &vm_pages[sindx];
6033
6034                 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6035
6036                 mem->lopage = FALSE;
6037                 mem->free = TRUE;
6038
6039                 color = mem->phys_page & vm_color_mask;
6040                 queue_enter_first(&vm_page_queue_free[color],
6041                                   mem,
6042                                   vm_page_t,
6043                                   pageq);
6044                 vm_page_free_count++;
6045
6046                 sindx++;
6047         }
6048 }
6049
6050
6051 extern void hibernate_rebuild_pmap_structs(void);
6052
6053 void
6054 hibernate_rebuild_vm_structs(void)
6055 {
6056         int             cindx, sindx, eindx;
6057         vm_page_t       mem, tmem, mem_next;
6058         AbsoluteTime    startTime, endTime;
6059         uint64_t        nsec;
6060
6061         if (hibernate_rebuild_needed == FALSE)
6062                 return;
6063
6064         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6065         HIBLOG("hibernate_rebuild started\n");
6066
6067         clock_get_uptime(&startTime);
6068
6069         hibernate_rebuild_pmap_structs();
6070
6071         bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6072         eindx = vm_pages_count;
6073
6074         for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6075
6076                 mem = &vm_pages[cindx];
6077                 /*
6078                  * hibernate_teardown_vm_structs leaves the location where
6079                  * this vm_page_t must be located in "next".
6080                  */
6081                 tmem = mem->next;
6082                 mem->next = NULL;
6083
6084                 sindx = (int)(tmem - &vm_pages[0]);
6085
6086                 if (mem != tmem) {
6087                         /*
6088                          * this vm_page_t was moved by hibernate_teardown_vm_structs,
6089                          * so move it back to its real location
6090                          */
6091                         *tmem = *mem;
6092                         mem = tmem;
6093                 }
6094                 if (mem->tabled)
6095                         hibernate_hash_insert_page(mem);
6096                 /*
6097                  * the 'hole' between this vm_page_t and the previous
6098                  * vm_page_t we moved needs to be initialized as
6099                  * a range of free vm_page_t's
6100                  */
6101                 hibernate_free_range(sindx + 1, eindx);
6102
6103                 eindx = sindx;
6104         }
6105         if (sindx)
6106                 hibernate_free_range(0, sindx);
6107
6108         assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6109
6110         /*
6111          * process the list of vm_page_t's that were tabled in the hash,
6112          * but were not located in the vm_pages arrary... these are
6113          * vm_page_t's that were created on the fly (i.e. fictitious)
6114          */
6115         for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6116                 mem_next = mem->next;
6117
6118                 mem->next = NULL;
6119                 hibernate_hash_insert_page(mem);
6120         }
6121         hibernate_rebuild_hash_list = NULL;
6122
6123         clock_get_uptime(&endTime);
6124         SUB_ABSOLUTETIME(&endTime, &startTime);
6125         absolutetime_to_nanoseconds(endTime, &nsec);
6126
6127         HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6128
6129         hibernate_rebuild_needed = FALSE;
6130
6131         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6132 }
6133
6134
6135 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6136
6137 uint32_t
6138 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6139 {
6140         unsigned int    i;
6141         unsigned int    compact_target_indx;
6142         vm_page_t       mem, mem_next;
6143         vm_page_bucket_t *bucket;
6144         unsigned int    mark_as_unneeded_pages = 0;
6145         unsigned int    unneeded_vm_page_bucket_pages = 0;
6146         unsigned int    unneeded_vm_pages_pages = 0;
6147         unsigned int    unneeded_pmap_pages = 0;
6148         addr64_t        start_of_unneeded = 0;
6149         addr64_t        end_of_unneeded = 0;
6150
6151
6152         if (hibernate_should_abort())
6153                 return (0);
6154
6155         HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6156                vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6157                vm_page_cleaned_count, compressor_object->resident_page_count);
6158
6159         for (i = 0; i < vm_page_bucket_count; i++) {
6160
6161                 bucket = &vm_page_buckets[i];
6162
6163                 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem_next) {
6164
6165                         assert(mem->tabled);
6166
6167                         mem_next = mem->next;
6168
6169                         if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6170                                 mem->next = hibernate_rebuild_hash_list;
6171                                 hibernate_rebuild_hash_list = mem;
6172                         }
6173                 }
6174         }
6175         unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6176         mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6177
6178         hibernate_teardown_vm_page_free_count = vm_page_free_count;
6179
6180         compact_target_indx = 0;
6181
6182         for (i = 0; i < vm_pages_count; i++) {
6183
6184                 mem = &vm_pages[i];
6185
6186                 if (mem->free) {
6187                         unsigned int color;
6188
6189                         assert(mem->busy);
6190                         assert(!mem->lopage);
6191
6192                         color = mem->phys_page & vm_color_mask;
6193
6194                         queue_remove(&vm_page_queue_free[color],
6195                                      mem,
6196                                      vm_page_t,
6197                                      pageq);
6198                         mem->pageq.next = NULL;
6199                         mem->pageq.prev = NULL;
6200
6201                         vm_page_free_count--;
6202
6203                         hibernate_teardown_found_free_pages++;
6204
6205                         if ( !vm_pages[compact_target_indx].free)
6206                                 compact_target_indx = i;
6207                 } else {
6208                         /*
6209                          * record this vm_page_t's original location
6210                          * we need this even if it doesn't get moved
6211                          * as an indicator to the rebuild function that
6212                          * we don't have to move it
6213                          */
6214                         mem->next = mem;
6215
6216                         if (vm_pages[compact_target_indx].free) {
6217                                 /*
6218                                  * we've got a hole to fill, so
6219                                  * move this vm_page_t to it's new home
6220                                  */
6221                                 vm_pages[compact_target_indx] = *mem;
6222                                 mem->free = TRUE;
6223
6224                                 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6225                                 compact_target_indx++;
6226                         } else
6227                                 hibernate_teardown_last_valid_compact_indx = i;
6228                 }
6229         }
6230         unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6231                                                              (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6232         mark_as_unneeded_pages += unneeded_vm_pages_pages;
6233
6234         hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6235
6236         if (start_of_unneeded) {
6237                 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6238                 mark_as_unneeded_pages += unneeded_pmap_pages;
6239         }
6240         HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6241
6242         hibernate_rebuild_needed = TRUE;
6243
6244         return (mark_as_unneeded_pages);
6245 }
6246
6247
6248 #endif /* HIBERNATION */
6249
6250 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6251
6252 #include <mach_vm_debug.h>
6253 #if     MACH_VM_DEBUG
6254
6255 #include <mach_debug/hash_info.h>
6256 #include <vm/vm_debug.h>
6257
6258 /*
6259  *      Routine:        vm_page_info
6260  *      Purpose:
6261  *              Return information about the global VP table.
6262  *              Fills the buffer with as much information as possible
6263  *              and returns the desired size of the buffer.
6264  *      Conditions:
6265  *              Nothing locked.  The caller should provide
6266  *              possibly-pageable memory.
6267  */
6268
6269 unsigned int
6270 vm_page_info(
6271         hash_info_bucket_t *info,
6272         unsigned int count)
6273 {
6274         unsigned int i;
6275         lck_spin_t      *bucket_lock;
6276
6277         if (vm_page_bucket_count < count)
6278                 count = vm_page_bucket_count;
6279
6280         for (i = 0; i < count; i++) {
6281                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6282                 unsigned int bucket_count = 0;
6283                 vm_page_t m;
6284
6285                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6286                 lck_spin_lock(bucket_lock);
6287
6288                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
6289                         bucket_count++;
6290
6291                 lck_spin_unlock(bucket_lock);
6292
6293                 /* don't touch pageable memory while holding locks */
6294                 info[i].hib_count = bucket_count;
6295         }
6296
6297         return vm_page_bucket_count;
6298 }
6299 #endif  /* MACH_VM_DEBUG */