osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <vm/pmap.h>
  80 #include <vm/vm_init.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_page.h>
  83 #include <vm/vm_pageout.h>
  84 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  85 #include <kern/misc_protos.h>
  86 #include <zone_debug.h>
  87 #include <vm/cpm.h>
  88 #include <ppc/mappings.h>               /* (BRINGUP) */
  89 #include <pexpert/pexpert.h>    /* (BRINGUP) */
  90
  91 #include <vm/vm_protos.h>
  92 #include <vm/memory_object.h>
  93 #include <vm/vm_purgeable_internal.h>
  94
  95 #include <IOKit/IOHibernatePrivate.h>
  96
  97
  98 #if CONFIG_EMBEDDED
  99 #include <sys/kern_memorystatus.h>
 100 #endif
 101
 102 #include <sys/kdebug.h>
 103
 104 boolean_t       vm_page_free_verify = TRUE;
 105
 106 int                             speculative_age_index = 0;
 107 int                             speculative_steal_index = 0;
 108 lck_mtx_ext_t   vm_page_queue_lock_ext;
 109 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 110 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 111
 112 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 113
 114
 115 __private_extern__ void         vm_page_init_lck_grp(void);
 116
 117 static void                     vm_page_free_prepare(vm_page_t  page);
 118
 119
 120
 121 /*
 122  *      Associated with page of user-allocatable memory is a
 123  *      page structure.
 124  */
 125
 126 /*
 127  *      These variables record the values returned by vm_page_bootstrap,
 128  *      for debugging purposes.  The implementation of pmap_steal_memory
 129  *      and pmap_startup here also uses them internally.
 130  */
 131
 132 vm_offset_t virtual_space_start;
 133 vm_offset_t virtual_space_end;
 134 int     vm_page_pages;
 135
 136 /*
 137  *      The vm_page_lookup() routine, which provides for fast
 138  *      (virtual memory object, offset) to page lookup, employs
 139  *      the following hash table.  The vm_page_{insert,remove}
 140  *      routines install and remove associations in the table.
 141  *      [This table is often called the virtual-to-physical,
 142  *      or VP, table.]
 143  */
 144 typedef struct {
 145         vm_page_t       pages;
 146 #if     MACH_PAGE_HASH_STATS
 147         int             cur_count;              /* current count */
 148         int             hi_count;               /* high water mark */
 149 #endif /* MACH_PAGE_HASH_STATS */
 150 } vm_page_bucket_t;
 151
 152
 153 #define BUCKETS_PER_LOCK        16
 154
 155 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 156 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 157 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 158 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 159 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 160 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 161
 162 lck_spin_t      *vm_page_bucket_locks;
 163
 164
 165 #if     MACH_PAGE_HASH_STATS
 166 /* This routine is only for debug.  It is intended to be called by
 167  * hand by a developer using a kernel debugger.  This routine prints
 168  * out vm_page_hash table statistics to the kernel debug console.
 169  */
 170 void
 171 hash_debug(void)
 172 {
 173         int     i;
 174         int     numbuckets = 0;
 175         int     highsum = 0;
 176         int     maxdepth = 0;
 177
 178         for (i = 0; i < vm_page_bucket_count; i++) {
 179                 if (vm_page_buckets[i].hi_count) {
 180                         numbuckets++;
 181                         highsum += vm_page_buckets[i].hi_count;
 182                         if (vm_page_buckets[i].hi_count > maxdepth)
 183                                 maxdepth = vm_page_buckets[i].hi_count;
 184                 }
 185         }
 186         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 187         printf("Number used buckets:     %d = %d%%\n",
 188                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 189         printf("Number unused buckets:   %d = %d%%\n",
 190                 vm_page_bucket_count - numbuckets,
 191                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 192         printf("Sum of bucket max depth: %d\n", highsum);
 193         printf("Average bucket depth:    %d.%2d\n",
 194                 highsum/vm_page_bucket_count,
 195                 highsum%vm_page_bucket_count);
 196         printf("Maximum bucket depth:    %d\n", maxdepth);
 197 }
 198 #endif /* MACH_PAGE_HASH_STATS */
 199
 200 /*
 201  *      The virtual page size is currently implemented as a runtime
 202  *      variable, but is constant once initialized using vm_set_page_size.
 203  *      This initialization must be done in the machine-dependent
 204  *      bootstrap sequence, before calling other machine-independent
 205  *      initializations.
 206  *
 207  *      All references to the virtual page size outside this
 208  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 209  *      constants.
 210  */
 211 vm_size_t       page_size  = PAGE_SIZE;
 212 vm_size_t       page_mask  = PAGE_MASK;
 213 int             page_shift = PAGE_SHIFT;
 214
 215 /*
 216  *      Resident page structures are initialized from
 217  *      a template (see vm_page_alloc).
 218  *
 219  *      When adding a new field to the virtual memory
 220  *      object structure, be sure to add initialization
 221  *      (see vm_page_bootstrap).
 222  */
 223 struct vm_page  vm_page_template;
 224
 225 vm_page_t       vm_pages = VM_PAGE_NULL;
 226 unsigned int    vm_pages_count = 0;
 227
 228 /*
 229  *      Resident pages that represent real memory
 230  *      are allocated from a set of free lists,
 231  *      one per color.
 232  */
 233 unsigned int    vm_colors;
 234 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 235 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 236 queue_head_t    vm_page_queue_free[MAX_COLORS];
 237 vm_page_t       vm_page_queue_fictitious;
 238 unsigned int    vm_page_free_wanted;
 239 unsigned int    vm_page_free_wanted_privileged;
 240 unsigned int    vm_page_free_count;
 241 unsigned int    vm_page_fictitious_count;
 242
 243 unsigned int    vm_page_free_count_minimum;     /* debugging */
 244
 245 /*
 246  *      Occasionally, the virtual memory system uses
 247  *      resident page structures that do not refer to
 248  *      real pages, for example to leave a page with
 249  *      important state information in the VP table.
 250  *
 251  *      These page structures are allocated the way
 252  *      most other kernel structures are.
 253  */
 254 zone_t  vm_page_zone;
 255 vm_locks_array_t vm_page_locks;
 256 decl_lck_mtx_data(,vm_page_alloc_lock)
 257 unsigned int io_throttle_zero_fill;
 258
 259 unsigned int    vm_page_local_q_count = 0;
 260 unsigned int    vm_page_local_q_soft_limit = 250;
 261 unsigned int    vm_page_local_q_hard_limit = 500;
 262 struct vplq     *vm_page_local_q = NULL;
 263
 264 /*
 265  *      Fictitious pages don't have a physical address,
 266  *      but we must initialize phys_page to something.
 267  *      For debugging, this should be a strange value
 268  *      that the pmap module can recognize in assertions.
 269  */
 270 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 271
 272 /*
 273  *      Guard pages are not accessible so they don't
 274  *      need a physical address, but we need to enter
 275  *      one in the pmap.
 276  *      Let's make it recognizable and make sure that
 277  *      we don't use a real physical page with that
 278  *      physical address.
 279  */
 280 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 281
 282 /*
 283  *      Resident page structures are also chained on
 284  *      queues that are used by the page replacement
 285  *      system (pageout daemon).  These queues are
 286  *      defined here, but are shared by the pageout
 287  *      module.  The inactive queue is broken into
 288  *      inactive and zf for convenience as the
 289  *      pageout daemon often assignes a higher
 290  *      affinity to zf pages
 291  */
 292 queue_head_t    vm_page_queue_active;
 293 queue_head_t    vm_page_queue_inactive;
 294 queue_head_t    vm_page_queue_zf;       /* inactive memory queue for zero fill */
 295 queue_head_t    vm_page_queue_throttled;
 296
 297 unsigned int    vm_page_active_count;
 298 unsigned int    vm_page_inactive_count;
 299 unsigned int    vm_page_throttled_count;
 300 unsigned int    vm_page_speculative_count;
 301 unsigned int    vm_page_wire_count;
 302 unsigned int    vm_page_gobble_count = 0;
 303 unsigned int    vm_page_wire_count_warning = 0;
 304 unsigned int    vm_page_gobble_count_warning = 0;
 305
 306 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 307 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 308 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 309
 310 #if DEVELOPMENT || DEBUG
 311 unsigned int    vm_page_speculative_recreated = 0;
 312 unsigned int    vm_page_speculative_created = 0;
 313 unsigned int    vm_page_speculative_used = 0;
 314 #endif
 315
 316 ppnum_t         vm_lopage_poolstart = 0;
 317 ppnum_t         vm_lopage_poolend = 0;
 318 int             vm_lopage_poolsize = 0;
 319 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 320
 321
 322 /*
 323  *      Several page replacement parameters are also
 324  *      shared with this module, so that page allocation
 325  *      (done here in vm_page_alloc) can trigger the
 326  *      pageout daemon.
 327  */
 328 unsigned int    vm_page_free_target = 0;
 329 unsigned int    vm_page_free_min = 0;
 330 unsigned int    vm_page_throttle_limit = 0;
 331 uint32_t        vm_page_creation_throttle = 0;
 332 unsigned int    vm_page_inactive_target = 0;
 333 unsigned int    vm_page_inactive_min = 0;
 334 unsigned int    vm_page_free_reserved = 0;
 335 unsigned int    vm_page_throttle_count = 0;
 336
 337 /*
 338  *      The VM system has a couple of heuristics for deciding
 339  *      that pages are "uninteresting" and should be placed
 340  *      on the inactive queue as likely candidates for replacement.
 341  *      These variables let the heuristics be controlled at run-time
 342  *      to make experimentation easier.
 343  */
 344
 345 boolean_t vm_page_deactivate_hint = TRUE;
 346
 347 struct vm_page_stats_reusable vm_page_stats_reusable;
 348
 349 /*
 350  *      vm_set_page_size:
 351  *
 352  *      Sets the page size, perhaps based upon the memory
 353  *      size.  Must be called before any use of page-size
 354  *      dependent functions.
 355  *
 356  *      Sets page_shift and page_mask from page_size.
 357  */
 358 void
 359 vm_set_page_size(void)
 360 {
 361         page_mask = page_size - 1;
 362
 363         if ((page_mask & page_size) != 0)
 364                 panic("vm_set_page_size: page size not a power of two");
 365
 366         for (page_shift = 0; ; page_shift++)
 367                 if ((1U << page_shift) == page_size)
 368                         break;
 369 }
 370
 371
 372 /* Called once during statup, once the cache geometry is known.
 373  */
 374 static void
 375 vm_page_set_colors( void )
 376 {
 377         unsigned int    n, override;
 378
 379         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 380                 n = override;
 381         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 382                 n = vm_cache_geometry_colors;
 383         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 384
 385         if ( n == 0 )
 386                 n = 1;
 387         if ( n > MAX_COLORS )
 388                 n = MAX_COLORS;
 389
 390         /* the count must be a power of 2  */
 391         if ( ( n & (n - 1)) != 0  )
 392                 panic("vm_page_set_colors");
 393
 394         vm_colors = n;
 395         vm_color_mask = n - 1;
 396 }
 397
 398
 399 lck_grp_t               vm_page_lck_grp_free;
 400 lck_grp_t               vm_page_lck_grp_queue;
 401 lck_grp_t               vm_page_lck_grp_local;
 402 lck_grp_t               vm_page_lck_grp_purge;
 403 lck_grp_t               vm_page_lck_grp_alloc;
 404 lck_grp_t               vm_page_lck_grp_bucket;
 405 lck_grp_attr_t          vm_page_lck_grp_attr;
 406 lck_attr_t              vm_page_lck_attr;
 407
 408
 409 __private_extern__ void
 410 vm_page_init_lck_grp(void)
 411 {
 412         /*
 413          * initialze the vm_page lock world
 414          */
 415         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 416         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 417         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 418         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 419         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 420         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 421         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 422         lck_attr_setdefault(&vm_page_lck_attr);
 423 }
 424
 425 void
 426 vm_page_init_local_q()
 427 {
 428         unsigned int            num_cpus;
 429         unsigned int            i;
 430         struct vplq             *t_local_q;
 431
 432         num_cpus = ml_get_max_cpus();
 433
 434         /*
 435          * no point in this for a uni-processor system
 436          */
 437         if (num_cpus >= 2) {
 438                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 439
 440                 for (i = 0; i < num_cpus; i++) {
 441                         struct vpl      *lq;
 442
 443                         lq = &t_local_q[i].vpl_un.vpl;
 444                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 445                         queue_init(&lq->vpl_queue);
 446                         lq->vpl_count = 0;
 447                 }
 448                 vm_page_local_q_count = num_cpus;
 449
 450                 vm_page_local_q = (struct vplq *)t_local_q;
 451         }
 452 }
 453
 454
 455 /*
 456  *      vm_page_bootstrap:
 457  *
 458  *      Initializes the resident memory module.
 459  *
 460  *      Allocates memory for the page cells, and
 461  *      for the object/offset-to-page hash table headers.
 462  *      Each page cell is initialized and placed on the free list.
 463  *      Returns the range of available kernel virtual memory.
 464  */
 465
 466 void
 467 vm_page_bootstrap(
 468         vm_offset_t             *startp,
 469         vm_offset_t             *endp)
 470 {
 471         register vm_page_t      m;
 472         unsigned int            i;
 473         unsigned int            log1;
 474         unsigned int            log2;
 475         unsigned int            size;
 476
 477         /*
 478          *      Initialize the vm_page template.
 479          */
 480
 481         m = &vm_page_template;
 482         bzero(m, sizeof (*m));
 483
 484         m->pageq.next = NULL;
 485         m->pageq.prev = NULL;
 486         m->listq.next = NULL;
 487         m->listq.prev = NULL;
 488         m->next = VM_PAGE_NULL;
 489
 490         m->object = VM_OBJECT_NULL;             /* reset later */
 491         m->offset = (vm_object_offset_t) -1;    /* reset later */
 492
 493         m->wire_count = 0;
 494         m->local = FALSE;
 495         m->inactive = FALSE;
 496         m->active = FALSE;
 497         m->pageout_queue = FALSE;
 498         m->speculative = FALSE;
 499         m->laundry = FALSE;
 500         m->free = FALSE;
 501         m->reference = FALSE;
 502         m->gobbled = FALSE;
 503         m->private = FALSE;
 504         m->throttled = FALSE;
 505         m->__unused_pageq_bits = 0;
 506
 507         m->phys_page = 0;               /* reset later */
 508
 509         m->busy = TRUE;
 510         m->wanted = FALSE;
 511         m->tabled = FALSE;
 512         m->fictitious = FALSE;
 513         m->pmapped = FALSE;
 514         m->wpmapped = FALSE;
 515         m->pageout = FALSE;
 516         m->absent = FALSE;
 517         m->error = FALSE;
 518         m->dirty = FALSE;
 519         m->cleaning = FALSE;
 520         m->precious = FALSE;
 521         m->clustered = FALSE;
 522         m->overwriting = FALSE;
 523         m->restart = FALSE;
 524         m->unusual = FALSE;
 525         m->encrypted = FALSE;
 526         m->encrypted_cleaning = FALSE;
 527         m->list_req_pending = FALSE;
 528         m->dump_cleaning = FALSE;
 529         m->cs_validated = FALSE;
 530         m->cs_tainted = FALSE;
 531         m->no_cache = FALSE;
 532         m->zero_fill = FALSE;
 533         m->reusable = FALSE;
 534         m->__unused_object_bits = 0;
 535
 536
 537         /*
 538          *      Initialize the page queues.
 539          */
 540         vm_page_init_lck_grp();
 541
 542         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 543         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 544         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 545
 546         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 547                 int group;
 548
 549                 purgeable_queues[i].token_q_head = 0;
 550                 purgeable_queues[i].token_q_tail = 0;
 551                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 552                         queue_init(&purgeable_queues[i].objq[group]);
 553
 554                 purgeable_queues[i].type = i;
 555                 purgeable_queues[i].new_pages = 0;
 556 #if MACH_ASSERT
 557                 purgeable_queues[i].debug_count_tokens = 0;
 558                 purgeable_queues[i].debug_count_objects = 0;
 559 #endif
 560         };
 561
 562         for (i = 0; i < MAX_COLORS; i++ )
 563                 queue_init(&vm_page_queue_free[i]);
 564         queue_init(&vm_lopage_queue_free);
 565         vm_page_queue_fictitious = VM_PAGE_NULL;
 566         queue_init(&vm_page_queue_active);
 567         queue_init(&vm_page_queue_inactive);
 568         queue_init(&vm_page_queue_throttled);
 569         queue_init(&vm_page_queue_zf);
 570
 571         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 572                 queue_init(&vm_page_queue_speculative[i].age_q);
 573
 574                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 575                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 576         }
 577         vm_page_free_wanted = 0;
 578         vm_page_free_wanted_privileged = 0;
 579
 580         vm_page_set_colors();
 581
 582
 583         /*
 584          *      Steal memory for the map and zone subsystems.
 585          */
 586
 587         vm_map_steal_memory();
 588         zone_steal_memory();
 589
 590         /*
 591          *      Allocate (and initialize) the virtual-to-physical
 592          *      table hash buckets.
 593          *
 594          *      The number of buckets should be a power of two to
 595          *      get a good hash function.  The following computation
 596          *      chooses the first power of two that is greater
 597          *      than the number of physical pages in the system.
 598          */
 599
 600         if (vm_page_bucket_count == 0) {
 601                 unsigned int npages = pmap_free_pages();
 602
 603                 vm_page_bucket_count = 1;
 604                 while (vm_page_bucket_count < npages)
 605                         vm_page_bucket_count <<= 1;
 606         }
 607         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 608
 609         vm_page_hash_mask = vm_page_bucket_count - 1;
 610
 611         /*
 612          *      Calculate object shift value for hashing algorithm:
 613          *              O = log2(sizeof(struct vm_object))
 614          *              B = log2(vm_page_bucket_count)
 615          *              hash shifts the object left by
 616          *              B/2 - O
 617          */
 618         size = vm_page_bucket_count;
 619         for (log1 = 0; size > 1; log1++)
 620                 size /= 2;
 621         size = sizeof(struct vm_object);
 622         for (log2 = 0; size > 1; log2++)
 623                 size /= 2;
 624         vm_page_hash_shift = log1/2 - log2 + 1;
 625
 626         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 627         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 628         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 629
 630         if (vm_page_hash_mask & vm_page_bucket_count)
 631                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 632
 633         vm_page_buckets = (vm_page_bucket_t *)
 634                 pmap_steal_memory(vm_page_bucket_count *
 635                                   sizeof(vm_page_bucket_t));
 636
 637         vm_page_bucket_locks = (lck_spin_t *)
 638                 pmap_steal_memory(vm_page_bucket_lock_count *
 639                                   sizeof(lck_spin_t));
 640
 641         for (i = 0; i < vm_page_bucket_count; i++) {
 642                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 643
 644                 bucket->pages = VM_PAGE_NULL;
 645 #if     MACH_PAGE_HASH_STATS
 646                 bucket->cur_count = 0;
 647                 bucket->hi_count = 0;
 648 #endif /* MACH_PAGE_HASH_STATS */
 649         }
 650
 651         for (i = 0; i < vm_page_bucket_lock_count; i++)
 652                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 653
 654         /*
 655          *      Machine-dependent code allocates the resident page table.
 656          *      It uses vm_page_init to initialize the page frames.
 657          *      The code also returns to us the virtual space available
 658          *      to the kernel.  We don't trust the pmap module
 659          *      to get the alignment right.
 660          */
 661
 662         pmap_startup(&virtual_space_start, &virtual_space_end);
 663         virtual_space_start = round_page(virtual_space_start);
 664         virtual_space_end = trunc_page(virtual_space_end);
 665
 666         *startp = virtual_space_start;
 667         *endp = virtual_space_end;
 668
 669         /*
 670          *      Compute the initial "wire" count.
 671          *      Up until now, the pages which have been set aside are not under
 672          *      the VM system's control, so although they aren't explicitly
 673          *      wired, they nonetheless can't be moved. At this moment,
 674          *      all VM managed pages are "free", courtesy of pmap_startup.
 675          */
 676         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 677         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count;    /* initial value */
 678         vm_page_free_count_minimum = vm_page_free_count;
 679
 680         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 681                vm_page_free_count, vm_page_wire_count);
 682
 683         simple_lock_init(&vm_paging_lock, 0);
 684 }
 685
 686 #ifndef MACHINE_PAGES
 687 /*
 688  *      We implement pmap_steal_memory and pmap_startup with the help
 689  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 690  */
 691
 692 void *
 693 pmap_steal_memory(
 694         vm_size_t size)
 695 {
 696         vm_offset_t addr, vaddr;
 697         ppnum_t phys_page;
 698
 699         /*
 700          *      We round the size to a round multiple.
 701          */
 702
 703         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 704
 705         /*
 706          *      If this is the first call to pmap_steal_memory,
 707          *      we have to initialize ourself.
 708          */
 709
 710         if (virtual_space_start == virtual_space_end) {
 711                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 712
 713                 /*
 714                  *      The initial values must be aligned properly, and
 715                  *      we don't trust the pmap module to do it right.
 716                  */
 717
 718                 virtual_space_start = round_page(virtual_space_start);
 719                 virtual_space_end = trunc_page(virtual_space_end);
 720         }
 721
 722         /*
 723          *      Allocate virtual memory for this request.
 724          */
 725
 726         addr = virtual_space_start;
 727         virtual_space_start += size;
 728
 729         kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size);   /* (TEST/DEBUG) */
 730
 731         /*
 732          *      Allocate and map physical pages to back new virtual pages.
 733          */
 734
 735         for (vaddr = round_page(addr);
 736              vaddr < addr + size;
 737              vaddr += PAGE_SIZE) {
 738 #if defined(__LP64__)
 739                 if (!pmap_next_page_k64(&phys_page))
 740 #else
 741                 if (!pmap_next_page(&phys_page))
 742 #endif
 743
 744                         panic("pmap_steal_memory");
 745
 746                 /*
 747                  *      XXX Logically, these mappings should be wired,
 748                  *      but some pmap modules barf if they are.
 749                  */
 750 #if defined(__LP64__)
 751                 pmap_pre_expand(kernel_pmap, vaddr);
 752 #endif
 753
 754                 pmap_enter(kernel_pmap, vaddr, phys_page,
 755                            VM_PROT_READ|VM_PROT_WRITE,
 756                                 VM_WIMG_USE_DEFAULT, FALSE);
 757                 /*
 758                  * Account for newly stolen memory
 759                  */
 760                 vm_page_wire_count++;
 761
 762         }
 763
 764         return (void *) addr;
 765 }
 766
 767 void
 768 pmap_startup(
 769         vm_offset_t *startp,
 770         vm_offset_t *endp)
 771 {
 772         unsigned int i, npages, pages_initialized, fill, fillval;
 773         ppnum_t         phys_page;
 774         addr64_t        tmpaddr;
 775         unsigned int    num_of_lopages = 0;
 776         unsigned int    last_index;
 777
 778         /*
 779          *      We calculate how many page frames we will have
 780          *      and then allocate the page structures in one chunk.
 781          */
 782
 783         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 784         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 785         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 786
 787         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 788
 789         /*
 790          *      Initialize the page frames.
 791          */
 792         for (i = 0, pages_initialized = 0; i < npages; i++) {
 793                 if (!pmap_next_page(&phys_page))
 794                         break;
 795
 796                 vm_page_init(&vm_pages[i], phys_page);
 797                 vm_page_pages++;
 798                 pages_initialized++;
 799         }
 800         vm_pages_count = pages_initialized;
 801
 802         /*
 803          * Check if we want to initialize pages to a known value
 804          */
 805         fill = 0;                                                               /* Assume no fill */
 806         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 807
 808
 809         /*
 810          * if vm_lopage_poolsize is non-zero, than we need to reserve
 811          * a pool of pages whose addresess are less than 4G... this pool
 812          * is used by drivers whose hardware can't DMA beyond 32 bits...
 813          *
 814          * note that I'm assuming that the page list is ascending and
 815          * ordered w/r to the physical address
 816          */
 817         for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
 818                 vm_page_t m;
 819
 820                 m = &vm_pages[i];
 821
 822                 if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
 823                         panic("couldn't reserve the lopage pool: not enough lo pages\n");
 824
 825                 if (m->phys_page < vm_lopage_poolend)
 826                         panic("couldn't reserve the lopage pool: page list out of order\n");
 827
 828                 vm_lopage_poolend = m->phys_page;
 829
 830                 if (vm_lopage_poolstart == 0)
 831                         vm_lopage_poolstart = m->phys_page;
 832                 else {
 833                         if (m->phys_page < vm_lopage_poolstart)
 834                                 panic("couldn't reserve the lopage pool: page list out of order\n");
 835                 }
 836
 837                 if (fill)
 838                         fillPage(m->phys_page, fillval);                /* Fill the page with a know value if requested at boot */
 839
 840                 vm_page_release(m);
 841         }
 842         last_index = i;
 843
 844         // -debug code remove
 845         if (2 == vm_himemory_mode) {
 846                 // free low -> high so high is preferred
 847                 for (i = last_index + 1; i <= pages_initialized; i++) {
 848                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 849                         vm_page_release(&vm_pages[i - 1]);
 850                 }
 851         }
 852         else
 853         // debug code remove-
 854
 855         /*
 856          * Release pages in reverse order so that physical pages
 857          * initially get allocated in ascending addresses. This keeps
 858          * the devices (which must address physical memory) happy if
 859          * they require several consecutive pages.
 860          */
 861         for (i = pages_initialized; i > last_index; i--) {
 862                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 863                 vm_page_release(&vm_pages[i - 1]);
 864         }
 865
 866 #if 0
 867         {
 868                 vm_page_t xx, xxo, xxl;
 869                 int i, j, k, l;
 870
 871                 j = 0;                                                                                                  /* (BRINGUP) */
 872                 xxl = 0;
 873
 874                 for( i = 0; i < vm_colors; i++ ) {
 875                         queue_iterate(&vm_page_queue_free[i],
 876                                       xx,
 877                                       vm_page_t,
 878                                       pageq) {  /* BRINGUP */
 879                                 j++;                                                                                            /* (BRINGUP) */
 880                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 881                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 882                                 }
 883
 884                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 885                                 k = 0;                                                                                          /* (BRINGUP) */
 886
 887                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 888
 889                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 890                                         k++;
 891                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 892                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 893                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 894                                         }
 895                                 }
 896
 897                                 xxl = xx;
 898                         }
 899                 }
 900
 901                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 902                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 903                 }
 904         }
 905 #endif
 906
 907
 908         /*
 909          *      We have to re-align virtual_space_start,
 910          *      because pmap_steal_memory has been using it.
 911          */
 912
 913         virtual_space_start = round_page(virtual_space_start);
 914
 915         *startp = virtual_space_start;
 916         *endp = virtual_space_end;
 917 }
 918 #endif  /* MACHINE_PAGES */
 919
 920 /*
 921  *      Routine:        vm_page_module_init
 922  *      Purpose:
 923  *              Second initialization pass, to be done after
 924  *              the basic VM system is ready.
 925  */
 926 void
 927 vm_page_module_init(void)
 928 {
 929         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
 930                              0, PAGE_SIZE, "vm pages");
 931
 932 #if     ZONE_DEBUG
 933         zone_debug_disable(vm_page_zone);
 934 #endif  /* ZONE_DEBUG */
 935
 936         zone_change(vm_page_zone, Z_EXPAND, FALSE);
 937         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
 938         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
 939
 940         /*
 941          * Adjust zone statistics to account for the real pages allocated
 942          * in vm_page_create(). [Q: is this really what we want?]
 943          */
 944         vm_page_zone->count += vm_page_pages;
 945         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
 946
 947         lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 948 }
 949
 950 /*
 951  *      Routine:        vm_page_create
 952  *      Purpose:
 953  *              After the VM system is up, machine-dependent code
 954  *              may stumble across more physical memory.  For example,
 955  *              memory that it was reserving for a frame buffer.
 956  *              vm_page_create turns this memory into available pages.
 957  */
 958
 959 void
 960 vm_page_create(
 961         ppnum_t start,
 962         ppnum_t end)
 963 {
 964         ppnum_t         phys_page;
 965         vm_page_t       m;
 966
 967         for (phys_page = start;
 968              phys_page < end;
 969              phys_page++) {
 970                 while ((m = (vm_page_t) vm_page_grab_fictitious())
 971                         == VM_PAGE_NULL)
 972                         vm_page_more_fictitious();
 973
 974                 vm_page_init(m, phys_page);
 975                 vm_page_pages++;
 976                 vm_page_release(m);
 977         }
 978 }
 979
 980 /*
 981  *      vm_page_hash:
 982  *
 983  *      Distributes the object/offset key pair among hash buckets.
 984  *
 985  *      NOTE:   The bucket count must be a power of 2
 986  */
 987 #define vm_page_hash(object, offset) (\
 988         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
 989          & vm_page_hash_mask)
 990
 991
 992 /*
 993  *      vm_page_insert:         [ internal use only ]
 994  *
 995  *      Inserts the given mem entry into the object/object-page
 996  *      table and object list.
 997  *
 998  *      The object must be locked.
 999  */
1000 void
1001 vm_page_insert(
1002         vm_page_t               mem,
1003         vm_object_t             object,
1004         vm_object_offset_t      offset)
1005 {
1006         vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
1007 }
1008
1009 void
1010 vm_page_insert_internal(
1011         vm_page_t               mem,
1012         vm_object_t             object,
1013         vm_object_offset_t      offset,
1014         boolean_t               queues_lock_held,
1015         boolean_t               insert_in_hash)
1016 {
1017         vm_page_bucket_t *bucket;
1018         lck_spin_t      *bucket_lock;
1019         int     hash_id;
1020
1021         XPR(XPR_VM_PAGE,
1022                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1023                 object, offset, mem, 0,0);
1024
1025         VM_PAGE_CHECK(mem);
1026
1027         if (object == vm_submap_object) {
1028                 /* the vm_submap_object is only a placeholder for submaps */
1029                 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1030         }
1031
1032         vm_object_lock_assert_exclusive(object);
1033 #if DEBUG
1034         lck_mtx_assert(&vm_page_queue_lock,
1035                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1036                                         : LCK_MTX_ASSERT_NOTOWNED);
1037 #endif  /* DEBUG */
1038
1039         if (insert_in_hash == TRUE) {
1040 #if DEBUG
1041                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1042                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1043                               "already in (obj=%p,off=0x%llx)",
1044                               mem, object, offset, mem->object, mem->offset);
1045 #endif
1046                 assert(!object->internal || offset < object->size);
1047
1048                 /* only insert "pageout" pages into "pageout" objects,
1049                  * and normal pages into normal objects */
1050                 assert(object->pageout == mem->pageout);
1051
1052                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1053
1054                 /*
1055                  *      Record the object/offset pair in this page
1056                  */
1057
1058                 mem->object = object;
1059                 mem->offset = offset;
1060
1061                 /*
1062                  *      Insert it into the object_object/offset hash table
1063                  */
1064                 hash_id = vm_page_hash(object, offset);
1065                 bucket = &vm_page_buckets[hash_id];
1066                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1067
1068                 lck_spin_lock(bucket_lock);
1069
1070                 mem->next = bucket->pages;
1071                 bucket->pages = mem;
1072 #if     MACH_PAGE_HASH_STATS
1073                 if (++bucket->cur_count > bucket->hi_count)
1074                         bucket->hi_count = bucket->cur_count;
1075 #endif /* MACH_PAGE_HASH_STATS */
1076
1077                 lck_spin_unlock(bucket_lock);
1078         }
1079         /*
1080          *      Now link into the object's list of backed pages.
1081          */
1082
1083         VM_PAGE_INSERT(mem, object);
1084         mem->tabled = TRUE;
1085
1086         /*
1087          *      Show that the object has one more resident page.
1088          */
1089
1090         object->resident_page_count++;
1091         if (VM_PAGE_WIRED(mem)) {
1092                 object->wired_page_count++;
1093         }
1094         assert(object->resident_page_count >= object->wired_page_count);
1095
1096         assert(!mem->reusable);
1097
1098         if (object->purgable == VM_PURGABLE_VOLATILE) {
1099                 if (VM_PAGE_WIRED(mem)) {
1100                         OSAddAtomic(1, &vm_page_purgeable_wired_count);
1101                 } else {
1102                         OSAddAtomic(1, &vm_page_purgeable_count);
1103                 }
1104         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1105                    mem->throttled) {
1106                 /*
1107                  * This page belongs to a purged VM object but hasn't
1108                  * been purged (because it was "busy").
1109                  * It's in the "throttled" queue and hence not
1110                  * visible to vm_pageout_scan().  Move it to a pageable
1111                  * queue, so that it can eventually be reclaimed, instead
1112                  * of lingering in the "empty" object.
1113                  */
1114                 if (queues_lock_held == FALSE)
1115                         vm_page_lockspin_queues();
1116                 vm_page_deactivate(mem);
1117                 if (queues_lock_held == FALSE)
1118                         vm_page_unlock_queues();
1119         }
1120 }
1121
1122 /*
1123  *      vm_page_replace:
1124  *
1125  *      Exactly like vm_page_insert, except that we first
1126  *      remove any existing page at the given offset in object.
1127  *
1128  *      The object must be locked.
1129  */
1130 void
1131 vm_page_replace(
1132         register vm_page_t              mem,
1133         register vm_object_t            object,
1134         register vm_object_offset_t     offset)
1135 {
1136         vm_page_bucket_t *bucket;
1137         vm_page_t        found_m = VM_PAGE_NULL;
1138         lck_spin_t      *bucket_lock;
1139         int             hash_id;
1140
1141         VM_PAGE_CHECK(mem);
1142         vm_object_lock_assert_exclusive(object);
1143 #if DEBUG
1144         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1145                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1146                       "already in (obj=%p,off=0x%llx)",
1147                       mem, object, offset, mem->object, mem->offset);
1148         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1149 #endif
1150         /*
1151          *      Record the object/offset pair in this page
1152          */
1153
1154         mem->object = object;
1155         mem->offset = offset;
1156
1157         /*
1158          *      Insert it into the object_object/offset hash table,
1159          *      replacing any page that might have been there.
1160          */
1161
1162         hash_id = vm_page_hash(object, offset);
1163         bucket = &vm_page_buckets[hash_id];
1164         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1165
1166         lck_spin_lock(bucket_lock);
1167
1168         if (bucket->pages) {
1169                 vm_page_t *mp = &bucket->pages;
1170                 vm_page_t m = *mp;
1171
1172                 do {
1173                         if (m->object == object && m->offset == offset) {
1174                                 /*
1175                                  * Remove old page from hash list
1176                                  */
1177                                 *mp = m->next;
1178
1179                                 found_m = m;
1180                                 break;
1181                         }
1182                         mp = &m->next;
1183                 } while ((m = *mp));
1184
1185                 mem->next = bucket->pages;
1186         } else {
1187                 mem->next = VM_PAGE_NULL;
1188         }
1189         /*
1190          * insert new page at head of hash list
1191          */
1192         bucket->pages = mem;
1193
1194         lck_spin_unlock(bucket_lock);
1195
1196         if (found_m) {
1197                 /*
1198                  * there was already a page at the specified
1199                  * offset for this object... remove it from
1200                  * the object and free it back to the free list
1201                  */
1202                 vm_page_free_unlocked(found_m, FALSE);
1203         }
1204         vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1205 }
1206
1207 /*
1208  *      vm_page_remove:         [ internal use only ]
1209  *
1210  *      Removes the given mem entry from the object/offset-page
1211  *      table and the object page list.
1212  *
1213  *      The object must be locked.
1214  */
1215
1216 void
1217 vm_page_remove(
1218         vm_page_t       mem,
1219         boolean_t       remove_from_hash)
1220 {
1221         vm_page_bucket_t *bucket;
1222         vm_page_t       this;
1223         lck_spin_t      *bucket_lock;
1224         int             hash_id;
1225
1226         XPR(XPR_VM_PAGE,
1227                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1228                 mem->object, mem->offset,
1229                 mem, 0,0);
1230
1231         vm_object_lock_assert_exclusive(mem->object);
1232         assert(mem->tabled);
1233         assert(!mem->cleaning);
1234         VM_PAGE_CHECK(mem);
1235
1236         if (remove_from_hash == TRUE) {
1237                 /*
1238                  *      Remove from the object_object/offset hash table
1239                  */
1240                 hash_id = vm_page_hash(mem->object, mem->offset);
1241                 bucket = &vm_page_buckets[hash_id];
1242                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1243
1244                 lck_spin_lock(bucket_lock);
1245
1246                 if ((this = bucket->pages) == mem) {
1247                         /* optimize for common case */
1248
1249                         bucket->pages = mem->next;
1250                 } else {
1251                         vm_page_t       *prev;
1252
1253                         for (prev = &this->next;
1254                              (this = *prev) != mem;
1255                              prev = &this->next)
1256                                 continue;
1257                         *prev = this->next;
1258                 }
1259 #if     MACH_PAGE_HASH_STATS
1260                 bucket->cur_count--;
1261 #endif /* MACH_PAGE_HASH_STATS */
1262
1263                 lck_spin_unlock(bucket_lock);
1264         }
1265         /*
1266          *      Now remove from the object's list of backed pages.
1267          */
1268
1269         VM_PAGE_REMOVE(mem);
1270
1271         /*
1272          *      And show that the object has one fewer resident
1273          *      page.
1274          */
1275
1276         assert(mem->object->resident_page_count > 0);
1277         mem->object->resident_page_count--;
1278         if (VM_PAGE_WIRED(mem)) {
1279                 assert(mem->object->wired_page_count > 0);
1280                 mem->object->wired_page_count--;
1281         }
1282         assert(mem->object->resident_page_count >=
1283                mem->object->wired_page_count);
1284         if (mem->reusable) {
1285                 assert(mem->object->reusable_page_count > 0);
1286                 mem->object->reusable_page_count--;
1287                 assert(mem->object->reusable_page_count <=
1288                        mem->object->resident_page_count);
1289                 mem->reusable = FALSE;
1290                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1291                 vm_page_stats_reusable.reused_remove++;
1292         } else if (mem->object->all_reusable) {
1293                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1294                 vm_page_stats_reusable.reused_remove++;
1295         }
1296
1297         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1298                 if (VM_PAGE_WIRED(mem)) {
1299                         assert(vm_page_purgeable_wired_count > 0);
1300                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1301                 } else {
1302                         assert(vm_page_purgeable_count > 0);
1303                         OSAddAtomic(-1, &vm_page_purgeable_count);
1304                 }
1305         }
1306         mem->tabled = FALSE;
1307         mem->object = VM_OBJECT_NULL;
1308         mem->offset = (vm_object_offset_t) -1;
1309 }
1310
1311
1312 /*
1313  *      vm_page_lookup:
1314  *
1315  *      Returns the page associated with the object/offset
1316  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1317  *
1318  *      The object must be locked.  No side effects.
1319  */
1320
1321 unsigned long vm_page_lookup_hint = 0;
1322 unsigned long vm_page_lookup_hint_next = 0;
1323 unsigned long vm_page_lookup_hint_prev = 0;
1324 unsigned long vm_page_lookup_hint_miss = 0;
1325 unsigned long vm_page_lookup_bucket_NULL = 0;
1326 unsigned long vm_page_lookup_miss = 0;
1327
1328
1329 vm_page_t
1330 vm_page_lookup(
1331         vm_object_t             object,
1332         vm_object_offset_t      offset)
1333 {
1334         vm_page_t       mem;
1335         vm_page_bucket_t *bucket;
1336         queue_entry_t   qe;
1337         lck_spin_t      *bucket_lock;
1338         int             hash_id;
1339
1340         vm_object_lock_assert_held(object);
1341         mem = object->memq_hint;
1342
1343         if (mem != VM_PAGE_NULL) {
1344                 assert(mem->object == object);
1345
1346                 if (mem->offset == offset) {
1347                         vm_page_lookup_hint++;
1348                         return mem;
1349                 }
1350                 qe = queue_next(&mem->listq);
1351
1352                 if (! queue_end(&object->memq, qe)) {
1353                         vm_page_t       next_page;
1354
1355                         next_page = (vm_page_t) qe;
1356                         assert(next_page->object == object);
1357
1358                         if (next_page->offset == offset) {
1359                                 vm_page_lookup_hint_next++;
1360                                 object->memq_hint = next_page; /* new hint */
1361                                 return next_page;
1362                         }
1363                 }
1364                 qe = queue_prev(&mem->listq);
1365
1366                 if (! queue_end(&object->memq, qe)) {
1367                         vm_page_t prev_page;
1368
1369                         prev_page = (vm_page_t) qe;
1370                         assert(prev_page->object == object);
1371
1372                         if (prev_page->offset == offset) {
1373                                 vm_page_lookup_hint_prev++;
1374                                 object->memq_hint = prev_page; /* new hint */
1375                                 return prev_page;
1376                         }
1377                 }
1378         }
1379         /*
1380          * Search the hash table for this object/offset pair
1381          */
1382         hash_id = vm_page_hash(object, offset);
1383         bucket = &vm_page_buckets[hash_id];
1384
1385         /*
1386          * since we hold the object lock, we are guaranteed that no
1387          * new pages can be inserted into this object... this in turn
1388          * guarantess that the page we're looking for can't exist
1389          * if the bucket it hashes to is currently NULL even when looked
1390          * at outside the scope of the hash bucket lock... this is a
1391          * really cheap optimiztion to avoid taking the lock
1392          */
1393         if (bucket->pages == VM_PAGE_NULL) {
1394                 vm_page_lookup_bucket_NULL++;
1395
1396                 return (VM_PAGE_NULL);
1397         }
1398         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1399
1400         lck_spin_lock(bucket_lock);
1401
1402         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1403                 VM_PAGE_CHECK(mem);
1404                 if ((mem->object == object) && (mem->offset == offset))
1405                         break;
1406         }
1407         lck_spin_unlock(bucket_lock);
1408
1409         if (mem != VM_PAGE_NULL) {
1410                 if (object->memq_hint != VM_PAGE_NULL) {
1411                         vm_page_lookup_hint_miss++;
1412                 }
1413                 assert(mem->object == object);
1414                 object->memq_hint = mem;
1415         } else
1416                 vm_page_lookup_miss++;
1417
1418         return(mem);
1419 }
1420
1421
1422 /*
1423  *      vm_page_rename:
1424  *
1425  *      Move the given memory entry from its
1426  *      current object to the specified target object/offset.
1427  *
1428  *      The object must be locked.
1429  */
1430 void
1431 vm_page_rename(
1432         register vm_page_t              mem,
1433         register vm_object_t            new_object,
1434         vm_object_offset_t              new_offset,
1435         boolean_t                       encrypted_ok)
1436 {
1437         assert(mem->object != new_object);
1438
1439         /*
1440          * ENCRYPTED SWAP:
1441          * The encryption key is based on the page's memory object
1442          * (aka "pager") and paging offset.  Moving the page to
1443          * another VM object changes its "pager" and "paging_offset"
1444          * so it has to be decrypted first, or we would lose the key.
1445          *
1446          * One exception is VM object collapsing, where we transfer pages
1447          * from one backing object to its parent object.  This operation also
1448          * transfers the paging information, so the <pager,paging_offset> info
1449          * should remain consistent.  The caller (vm_object_do_collapse())
1450          * sets "encrypted_ok" in this case.
1451          */
1452         if (!encrypted_ok && mem->encrypted) {
1453                 panic("vm_page_rename: page %p is encrypted\n", mem);
1454         }
1455
1456         XPR(XPR_VM_PAGE,
1457                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1458                 new_object, new_offset,
1459                 mem, 0,0);
1460
1461         /*
1462          *      Changes to mem->object require the page lock because
1463          *      the pageout daemon uses that lock to get the object.
1464          */
1465         vm_page_lockspin_queues();
1466
1467         vm_page_remove(mem, TRUE);
1468         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1469
1470         vm_page_unlock_queues();
1471 }
1472
1473 /*
1474  *      vm_page_init:
1475  *
1476  *      Initialize the fields in a new page.
1477  *      This takes a structure with random values and initializes it
1478  *      so that it can be given to vm_page_release or vm_page_insert.
1479  */
1480 void
1481 vm_page_init(
1482         vm_page_t       mem,
1483         ppnum_t phys_page)
1484 {
1485         assert(phys_page);
1486         *mem = vm_page_template;
1487         mem->phys_page = phys_page;
1488 }
1489
1490 /*
1491  *      vm_page_grab_fictitious:
1492  *
1493  *      Remove a fictitious page from the free list.
1494  *      Returns VM_PAGE_NULL if there are no free pages.
1495  */
1496 int     c_vm_page_grab_fictitious = 0;
1497 int     c_vm_page_release_fictitious = 0;
1498 int     c_vm_page_more_fictitious = 0;
1499
1500 extern vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
1501
1502 vm_page_t
1503 vm_page_grab_fictitious_common(
1504         ppnum_t phys_addr)
1505 {
1506         register vm_page_t m;
1507
1508         m = (vm_page_t)zget(vm_page_zone);
1509         if (m) {
1510                 vm_page_init(m, phys_addr);
1511                 m->fictitious = TRUE;
1512         }
1513
1514         c_vm_page_grab_fictitious++;
1515         return m;
1516 }
1517
1518 vm_page_t
1519 vm_page_grab_fictitious(void)
1520 {
1521         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1522 }
1523
1524 vm_page_t
1525 vm_page_grab_guard(void)
1526 {
1527         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1528 }
1529
1530 /*
1531  *      vm_page_release_fictitious:
1532  *
1533  *      Release a fictitious page to the free list.
1534  */
1535
1536 void
1537 vm_page_release_fictitious(
1538         register vm_page_t m)
1539 {
1540         assert(!m->free);
1541         assert(m->busy);
1542         assert(m->fictitious);
1543         assert(m->phys_page == vm_page_fictitious_addr ||
1544                m->phys_page == vm_page_guard_addr);
1545
1546         c_vm_page_release_fictitious++;
1547 #if DEBUG
1548         if (m->free)
1549                 panic("vm_page_release_fictitious");
1550 #endif
1551         m->free = TRUE;
1552         zfree(vm_page_zone, m);
1553 }
1554
1555 /*
1556  *      vm_page_more_fictitious:
1557  *
1558  *      Add more fictitious pages to the free list.
1559  *      Allowed to block. This routine is way intimate
1560  *      with the zones code, for several reasons:
1561  *      1. we need to carve some page structures out of physical
1562  *         memory before zones work, so they _cannot_ come from
1563  *         the zone_map.
1564  *      2. the zone needs to be collectable in order to prevent
1565  *         growth without bound. These structures are used by
1566  *         the device pager (by the hundreds and thousands), as
1567  *         private pages for pageout, and as blocking pages for
1568  *         pagein. Temporary bursts in demand should not result in
1569  *         permanent allocation of a resource.
1570  *      3. To smooth allocation humps, we allocate single pages
1571  *         with kernel_memory_allocate(), and cram them into the
1572  *         zone. This also allows us to initialize the vm_page_t's
1573  *         on the way into the zone, so that zget() always returns
1574  *         an initialized structure. The zone free element pointer
1575  *         and the free page pointer are both the first item in the
1576  *         vm_page_t.
1577  *      4. By having the pages in the zone pre-initialized, we need
1578  *         not keep 2 levels of lists. The garbage collector simply
1579  *         scans our list, and reduces physical memory usage as it
1580  *         sees fit.
1581  */
1582
1583 void vm_page_more_fictitious(void)
1584 {
1585         register vm_page_t m;
1586         vm_offset_t addr;
1587         kern_return_t retval;
1588         int i;
1589
1590         c_vm_page_more_fictitious++;
1591
1592         /*
1593          * Allocate a single page from the zone_map. Do not wait if no physical
1594          * pages are immediately available, and do not zero the space. We need
1595          * our own blocking lock here to prevent having multiple,
1596          * simultaneous requests from piling up on the zone_map lock. Exactly
1597          * one (of our) threads should be potentially waiting on the map lock.
1598          * If winner is not vm-privileged, then the page allocation will fail,
1599          * and it will temporarily block here in the vm_page_wait().
1600          */
1601         lck_mtx_lock(&vm_page_alloc_lock);
1602         /*
1603          * If another thread allocated space, just bail out now.
1604          */
1605         if (zone_free_count(vm_page_zone) > 5) {
1606                 /*
1607                  * The number "5" is a small number that is larger than the
1608                  * number of fictitious pages that any single caller will
1609                  * attempt to allocate. Otherwise, a thread will attempt to
1610                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1611                  * release all of the resources and locks already acquired,
1612                  * and then call this routine. This routine finds the pages
1613                  * that the caller released, so fails to allocate new space.
1614                  * The process repeats infinitely. The largest known number
1615                  * of fictitious pages required in this manner is 2. 5 is
1616                  * simply a somewhat larger number.
1617                  */
1618                 lck_mtx_unlock(&vm_page_alloc_lock);
1619                 return;
1620         }
1621
1622         retval = kernel_memory_allocate(zone_map,
1623                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1624                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1625         if (retval != KERN_SUCCESS) {
1626                 /*
1627                  * No page was available. Tell the pageout daemon, drop the
1628                  * lock to give another thread a chance at it, and
1629                  * wait for the pageout daemon to make progress.
1630                  */
1631                 lck_mtx_unlock(&vm_page_alloc_lock);
1632                 vm_page_wait(THREAD_UNINT);
1633                 return;
1634         }
1635         /*
1636          * Initialize as many vm_page_t's as will fit on this page. This
1637          * depends on the zone code disturbing ONLY the first item of
1638          * each zone element.
1639          */
1640         m = (vm_page_t)addr;
1641         for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1642                 vm_page_init(m, vm_page_fictitious_addr);
1643                 m->fictitious = TRUE;
1644                 m++;
1645         }
1646         zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1647         lck_mtx_unlock(&vm_page_alloc_lock);
1648 }
1649
1650
1651 /*
1652  *      vm_pool_low():
1653  *
1654  *      Return true if it is not likely that a non-vm_privileged thread
1655  *      can get memory without blocking.  Advisory only, since the
1656  *      situation may change under us.
1657  */
1658 int
1659 vm_pool_low(void)
1660 {
1661         /* No locking, at worst we will fib. */
1662         return( vm_page_free_count <= vm_page_free_reserved );
1663 }
1664
1665
1666
1667 /*
1668  * this is an interface to support bring-up of drivers
1669  * on platforms with physical memory > 4G...
1670  */
1671 int             vm_himemory_mode = 0;
1672
1673
1674 /*
1675  * this interface exists to support hardware controllers
1676  * incapable of generating DMAs with more than 32 bits
1677  * of address on platforms with physical memory > 4G...
1678  */
1679 unsigned int    vm_lopage_free_count = 0;
1680 unsigned int    vm_lopage_max_count = 0;
1681 queue_head_t    vm_lopage_queue_free;
1682
1683 vm_page_t
1684 vm_page_grablo(void)
1685 {
1686         register vm_page_t      mem;
1687         unsigned int vm_lopage_alloc_count;
1688
1689         if (vm_lopage_poolsize == 0)
1690                 return (vm_page_grab());
1691
1692         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1693
1694         if (! queue_empty(&vm_lopage_queue_free)) {
1695                 queue_remove_first(&vm_lopage_queue_free,
1696                                    mem,
1697                                    vm_page_t,
1698                                    pageq);
1699                 assert(mem->free);
1700                 assert(mem->busy);
1701                 assert(!mem->pmapped);
1702                 assert(!mem->wpmapped);
1703
1704                 mem->pageq.next = NULL;
1705                 mem->pageq.prev = NULL;
1706                 mem->free = FALSE;
1707
1708                 vm_lopage_free_count--;
1709                 vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
1710                 if (vm_lopage_alloc_count > vm_lopage_max_count)
1711                         vm_lopage_max_count = vm_lopage_alloc_count;
1712         } else {
1713                 mem = VM_PAGE_NULL;
1714         }
1715         lck_mtx_unlock(&vm_page_queue_free_lock);
1716
1717         return (mem);
1718 }
1719
1720
1721 /*
1722  *      vm_page_grab:
1723  *
1724  *      first try to grab a page from the per-cpu free list...
1725  *      this must be done while pre-emption is disabled... if
1726  *      a page is available, we're done...
1727  *      if no page is available, grab the vm_page_queue_free_lock
1728  *      and see if current number of free pages would allow us
1729  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
1730  *      if there are pages available, disable preemption and
1731  *      recheck the state of the per-cpu free list... we could
1732  *      have been preempted and moved to a different cpu, or
1733  *      some other thread could have re-filled it... if still
1734  *      empty, figure out how many pages we can steal from the
1735  *      global free queue and move to the per-cpu queue...
1736  *      return 1 of these pages when done... only wakeup the
1737  *      pageout_scan thread if we moved pages from the global
1738  *      list... no need for the wakeup if we've satisfied the
1739  *      request from the per-cpu queue.
1740  */
1741
1742 #define COLOR_GROUPS_TO_STEAL   4
1743
1744
1745 vm_page_t
1746 vm_page_grab( void )
1747 {
1748         vm_page_t       mem;
1749
1750
1751         disable_preemption();
1752
1753         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1754 return_page_from_cpu_list:
1755                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1756                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1757                 mem->pageq.next = NULL;
1758
1759                 enable_preemption();
1760
1761                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1762                 assert(mem->tabled == FALSE);
1763                 assert(mem->object == VM_OBJECT_NULL);
1764                 assert(!mem->laundry);
1765                 assert(!mem->free);
1766                 assert(pmap_verify_free(mem->phys_page));
1767                 assert(mem->busy);
1768                 assert(!mem->encrypted);
1769                 assert(!mem->pmapped);
1770                 assert(!mem->wpmapped);
1771
1772                 return mem;
1773         }
1774         enable_preemption();
1775
1776
1777         /*
1778          *      Optionally produce warnings if the wire or gobble
1779          *      counts exceed some threshold.
1780          */
1781         if (vm_page_wire_count_warning > 0
1782             && vm_page_wire_count >= vm_page_wire_count_warning) {
1783                 printf("mk: vm_page_grab(): high wired page count of %d\n",
1784                         vm_page_wire_count);
1785                 assert(vm_page_wire_count < vm_page_wire_count_warning);
1786         }
1787         if (vm_page_gobble_count_warning > 0
1788             && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1789                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1790                         vm_page_gobble_count);
1791                 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1792         }
1793
1794         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1795
1796         /*
1797          *      Only let privileged threads (involved in pageout)
1798          *      dip into the reserved pool.
1799          */
1800         if ((vm_page_free_count < vm_page_free_reserved) &&
1801             !(current_thread()->options & TH_OPT_VMPRIV)) {
1802                 lck_mtx_unlock(&vm_page_queue_free_lock);
1803                 mem = VM_PAGE_NULL;
1804         }
1805         else {
1806                vm_page_t        head;
1807                vm_page_t        tail;
1808                unsigned int     pages_to_steal;
1809                unsigned int     color;
1810
1811                while ( vm_page_free_count == 0 ) {
1812
1813                         lck_mtx_unlock(&vm_page_queue_free_lock);
1814                         /*
1815                          * must be a privileged thread to be
1816                          * in this state since a non-privileged
1817                          * thread would have bailed if we were
1818                          * under the vm_page_free_reserved mark
1819                          */
1820                         VM_PAGE_WAIT();
1821                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1822                 }
1823
1824                 disable_preemption();
1825
1826                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1827                         lck_mtx_unlock(&vm_page_queue_free_lock);
1828
1829                         /*
1830                          * we got preempted and moved to another processor
1831                          * or we got preempted and someone else ran and filled the cache
1832                          */
1833                         goto return_page_from_cpu_list;
1834                 }
1835                 if (vm_page_free_count <= vm_page_free_reserved)
1836                         pages_to_steal = 1;
1837                 else {
1838                         pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1839
1840                         if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1841                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1842                 }
1843                 color = PROCESSOR_DATA(current_processor(), start_color);
1844                 head = tail = NULL;
1845
1846                 while (pages_to_steal--) {
1847                         if (--vm_page_free_count < vm_page_free_count_minimum)
1848                                 vm_page_free_count_minimum = vm_page_free_count;
1849
1850                         while (queue_empty(&vm_page_queue_free[color]))
1851                                 color = (color + 1) & vm_color_mask;
1852
1853                         queue_remove_first(&vm_page_queue_free[color],
1854                                            mem,
1855                                            vm_page_t,
1856                                            pageq);
1857                         mem->pageq.next = NULL;
1858                         mem->pageq.prev = NULL;
1859
1860                         color = (color + 1) & vm_color_mask;
1861
1862                         if (head == NULL)
1863                                 head = mem;
1864                         else
1865                                 tail->pageq.next = (queue_t)mem;
1866                         tail = mem;
1867
1868                         mem->pageq.prev = NULL;
1869                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1870                         assert(mem->tabled == FALSE);
1871                         assert(mem->object == VM_OBJECT_NULL);
1872                         assert(!mem->laundry);
1873                         assert(mem->free);
1874                         mem->free = FALSE;
1875
1876                         assert(pmap_verify_free(mem->phys_page));
1877                         assert(mem->busy);
1878                         assert(!mem->free);
1879                         assert(!mem->encrypted);
1880                         assert(!mem->pmapped);
1881                         assert(!mem->wpmapped);
1882                 }
1883                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1884                 PROCESSOR_DATA(current_processor(), start_color) = color;
1885
1886                 /*
1887                  * satisfy this request
1888                  */
1889                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1890                 mem = head;
1891                 mem->pageq.next = NULL;
1892
1893                 lck_mtx_unlock(&vm_page_queue_free_lock);
1894
1895                 enable_preemption();
1896         }
1897         /*
1898          *      Decide if we should poke the pageout daemon.
1899          *      We do this if the free count is less than the low
1900          *      water mark, or if the free count is less than the high
1901          *      water mark (but above the low water mark) and the inactive
1902          *      count is less than its target.
1903          *
1904          *      We don't have the counts locked ... if they change a little,
1905          *      it doesn't really matter.
1906          */
1907         if ((vm_page_free_count < vm_page_free_min) ||
1908             ((vm_page_free_count < vm_page_free_target) &&
1909              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1910                 thread_wakeup((event_t) &vm_page_free_wanted);
1911
1912 #if CONFIG_EMBEDDED
1913         {
1914         int     percent_avail;
1915
1916         /*
1917          * Decide if we need to poke the memorystatus notification thread.
1918          */
1919         percent_avail =
1920                 (vm_page_active_count + vm_page_inactive_count +
1921                  vm_page_speculative_count + vm_page_free_count +
1922                  (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1923                 atop_64(max_mem);
1924         if (percent_avail <= (kern_memorystatus_level - 5)) {
1925                 kern_memorystatus_level = percent_avail;
1926                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1927         }
1928         }
1929 #endif
1930
1931 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
1932
1933         return mem;
1934 }
1935
1936 /*
1937  *      vm_page_release:
1938  *
1939  *      Return a page to the free list.
1940  */
1941
1942 void
1943 vm_page_release(
1944         register vm_page_t      mem)
1945 {
1946         unsigned int    color;
1947         int     need_wakeup = 0;
1948         int     need_priv_wakeup = 0;
1949 #if 0
1950         unsigned int pindex;
1951         phys_entry *physent;
1952
1953         physent = mapping_phys_lookup(mem->phys_page, &pindex);         /* (BRINGUP) */
1954         if(physent->ppLink & ppN) {                                                                                     /* (BRINGUP) */
1955                 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1956         }
1957         physent->ppLink = physent->ppLink | ppN;                                                        /* (BRINGUP) */
1958 #endif
1959         assert(!mem->private && !mem->fictitious);
1960         if (vm_page_free_verify) {
1961                 assert(pmap_verify_free(mem->phys_page));
1962         }
1963 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
1964
1965
1966         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1967 #if DEBUG
1968         if (mem->free)
1969                 panic("vm_page_release");
1970 #endif
1971         mem->free = TRUE;
1972
1973         assert(mem->busy);
1974         assert(!mem->laundry);
1975         assert(mem->object == VM_OBJECT_NULL);
1976         assert(mem->pageq.next == NULL &&
1977                mem->pageq.prev == NULL);
1978         assert(mem->listq.next == NULL &&
1979                mem->listq.prev == NULL);
1980
1981         if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
1982                 /*
1983                  * this exists to support hardware controllers
1984                  * incapable of generating DMAs with more than 32 bits
1985                  * of address on platforms with physical memory > 4G...
1986                  */
1987                 queue_enter_first(&vm_lopage_queue_free,
1988                                   mem,
1989                                   vm_page_t,
1990                                   pageq);
1991                 vm_lopage_free_count++;
1992         } else {
1993                 color = mem->phys_page & vm_color_mask;
1994                 queue_enter_first(&vm_page_queue_free[color],
1995                                   mem,
1996                                   vm_page_t,
1997                                   pageq);
1998                 vm_page_free_count++;
1999                 /*
2000                  *      Check if we should wake up someone waiting for page.
2001                  *      But don't bother waking them unless they can allocate.
2002                  *
2003                  *      We wakeup only one thread, to prevent starvation.
2004                  *      Because the scheduling system handles wait queues FIFO,
2005                  *      if we wakeup all waiting threads, one greedy thread
2006                  *      can starve multiple niceguy threads.  When the threads
2007                  *      all wakeup, the greedy threads runs first, grabs the page,
2008                  *      and waits for another page.  It will be the first to run
2009                  *      when the next page is freed.
2010                  *
2011                  *      However, there is a slight danger here.
2012                  *      The thread we wake might not use the free page.
2013                  *      Then the other threads could wait indefinitely
2014                  *      while the page goes unused.  To forestall this,
2015                  *      the pageout daemon will keep making free pages
2016                  *      as long as vm_page_free_wanted is non-zero.
2017                  */
2018
2019                 assert(vm_page_free_count > 0);
2020                 if (vm_page_free_wanted_privileged > 0) {
2021                         vm_page_free_wanted_privileged--;
2022                         need_priv_wakeup = 1;
2023                 } else if (vm_page_free_wanted > 0 &&
2024                            vm_page_free_count > vm_page_free_reserved) {
2025                         vm_page_free_wanted--;
2026                         need_wakeup = 1;
2027                 }
2028         }
2029         lck_mtx_unlock(&vm_page_queue_free_lock);
2030
2031         if (need_priv_wakeup)
2032                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2033         else if (need_wakeup)
2034                 thread_wakeup_one((event_t) &vm_page_free_count);
2035
2036 #if CONFIG_EMBEDDED
2037         {
2038         int     percent_avail;
2039
2040         /*
2041          * Decide if we need to poke the memorystatus notification thread.
2042          * Locking is not a big issue, as only a single thread delivers these.
2043          */
2044         percent_avail =
2045                 (vm_page_active_count + vm_page_inactive_count +
2046                  vm_page_speculative_count + vm_page_free_count +
2047                  (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
2048                 atop_64(max_mem);
2049         if (percent_avail >= (kern_memorystatus_level + 5)) {
2050                 kern_memorystatus_level = percent_avail;
2051                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2052         }
2053         }
2054 #endif
2055 }
2056
2057 /*
2058  *      vm_page_wait:
2059  *
2060  *      Wait for a page to become available.
2061  *      If there are plenty of free pages, then we don't sleep.
2062  *
2063  *      Returns:
2064  *              TRUE:  There may be another page, try again
2065  *              FALSE: We were interrupted out of our wait, don't try again
2066  */
2067
2068 boolean_t
2069 vm_page_wait(
2070         int     interruptible )
2071 {
2072         /*
2073          *      We can't use vm_page_free_reserved to make this
2074          *      determination.  Consider: some thread might
2075          *      need to allocate two pages.  The first allocation
2076          *      succeeds, the second fails.  After the first page is freed,
2077          *      a call to vm_page_wait must really block.
2078          */
2079         kern_return_t   wait_result;
2080         int             need_wakeup = 0;
2081         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2082
2083         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2084
2085         if (is_privileged && vm_page_free_count) {
2086                 lck_mtx_unlock(&vm_page_queue_free_lock);
2087                 return TRUE;
2088         }
2089         if (vm_page_free_count < vm_page_free_target) {
2090
2091                 if (is_privileged) {
2092                         if (vm_page_free_wanted_privileged++ == 0)
2093                                 need_wakeup = 1;
2094                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2095                 } else {
2096                         if (vm_page_free_wanted++ == 0)
2097                                 need_wakeup = 1;
2098                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2099                 }
2100                 lck_mtx_unlock(&vm_page_queue_free_lock);
2101                 counter(c_vm_page_wait_block++);
2102
2103                 if (need_wakeup)
2104                         thread_wakeup((event_t)&vm_page_free_wanted);
2105
2106                 if (wait_result == THREAD_WAITING)
2107                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2108
2109                 return(wait_result == THREAD_AWAKENED);
2110         } else {
2111                 lck_mtx_unlock(&vm_page_queue_free_lock);
2112                 return TRUE;
2113         }
2114 }
2115
2116 /*
2117  *      vm_page_alloc:
2118  *
2119  *      Allocate and return a memory cell associated
2120  *      with this VM object/offset pair.
2121  *
2122  *      Object must be locked.
2123  */
2124
2125 vm_page_t
2126 vm_page_alloc(
2127         vm_object_t             object,
2128         vm_object_offset_t      offset)
2129 {
2130         register vm_page_t      mem;
2131
2132         vm_object_lock_assert_exclusive(object);
2133         mem = vm_page_grab();
2134         if (mem == VM_PAGE_NULL)
2135                 return VM_PAGE_NULL;
2136
2137         vm_page_insert(mem, object, offset);
2138
2139         return(mem);
2140 }
2141
2142 vm_page_t
2143 vm_page_alloclo(
2144         vm_object_t             object,
2145         vm_object_offset_t      offset)
2146 {
2147         register vm_page_t      mem;
2148
2149         vm_object_lock_assert_exclusive(object);
2150         mem = vm_page_grablo();
2151         if (mem == VM_PAGE_NULL)
2152                 return VM_PAGE_NULL;
2153
2154         vm_page_insert(mem, object, offset);
2155
2156         return(mem);
2157 }
2158
2159
2160 /*
2161  *      vm_page_alloc_guard:
2162  *
2163  *      Allocate a fictitious page which will be used
2164  *      as a guard page.  The page will be inserted into
2165  *      the object and returned to the caller.
2166  */
2167
2168 vm_page_t
2169 vm_page_alloc_guard(
2170         vm_object_t             object,
2171         vm_object_offset_t      offset)
2172 {
2173         register vm_page_t      mem;
2174
2175         vm_object_lock_assert_exclusive(object);
2176         mem = vm_page_grab_guard();
2177         if (mem == VM_PAGE_NULL)
2178                 return VM_PAGE_NULL;
2179
2180         vm_page_insert(mem, object, offset);
2181
2182         return(mem);
2183 }
2184
2185
2186 counter(unsigned int c_laundry_pages_freed = 0;)
2187
2188 /*
2189  *      vm_page_free:
2190  *
2191  *      Returns the given page to the free list,
2192  *      disassociating it with any VM object.
2193  *
2194  *      Object and page queues must be locked prior to entry.
2195  */
2196 static void
2197 vm_page_free_prepare(
2198         register vm_page_t      mem)
2199 {
2200         vm_page_free_prepare_queues(mem);
2201         vm_page_free_prepare_object(mem, TRUE);
2202 }
2203
2204
2205 void
2206 vm_page_free_prepare_queues(
2207         vm_page_t       mem)
2208 {
2209         VM_PAGE_CHECK(mem);
2210         assert(!mem->free);
2211         assert(!mem->cleaning);
2212         assert(!mem->pageout);
2213 #if DEBUG
2214         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2215         if (mem->free)
2216                 panic("vm_page_free: freeing page on free list\n");
2217 #endif
2218         if (mem->object) {
2219                 vm_object_lock_assert_exclusive(mem->object);
2220         }
2221
2222         if (mem->laundry) {
2223                 /*
2224                  * We may have to free a page while it's being laundered
2225                  * if we lost its pager (due to a forced unmount, for example).
2226                  * We need to call vm_pageout_throttle_up() before removing
2227                  * the page from its VM object, so that we can find out on
2228                  * which pageout queue the page is on.
2229                  */
2230                 vm_pageout_throttle_up(mem);
2231                 counter(++c_laundry_pages_freed);
2232         }
2233         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2234
2235         if (VM_PAGE_WIRED(mem)) {
2236                 if (mem->object) {
2237                         assert(mem->object->wired_page_count > 0);
2238                         mem->object->wired_page_count--;
2239                         assert(mem->object->resident_page_count >=
2240                                mem->object->wired_page_count);
2241                 }
2242                 if (!mem->private && !mem->fictitious)
2243                         vm_page_wire_count--;
2244                 mem->wire_count = 0;
2245                 assert(!mem->gobbled);
2246         } else if (mem->gobbled) {
2247                 if (!mem->private && !mem->fictitious)
2248                         vm_page_wire_count--;
2249                 vm_page_gobble_count--;
2250         }
2251 }
2252
2253
2254 void
2255 vm_page_free_prepare_object(
2256         vm_page_t       mem,
2257         boolean_t       remove_from_hash)
2258 {
2259         if (mem->object) {
2260                 vm_object_lock_assert_exclusive(mem->object);
2261         }
2262
2263         if (mem->tabled)
2264                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2265
2266         PAGE_WAKEUP(mem);               /* clears wanted */
2267
2268         if (mem->private) {
2269                 mem->private = FALSE;
2270                 mem->fictitious = TRUE;
2271                 mem->phys_page = vm_page_fictitious_addr;
2272         }
2273         if (mem->fictitious) {
2274                 /* Some of these may be unnecessary */
2275                 mem->gobbled = FALSE;
2276                 mem->busy = TRUE;
2277                 mem->absent = FALSE;
2278                 mem->error = FALSE;
2279                 mem->dirty = FALSE;
2280                 mem->precious = FALSE;
2281                 mem->reference = FALSE;
2282                 mem->encrypted = FALSE;
2283                 mem->encrypted_cleaning = FALSE;
2284                 mem->pmapped = FALSE;
2285                 mem->wpmapped = FALSE;
2286                 mem->reusable = FALSE;
2287         } else {
2288                 if (mem->zero_fill == TRUE)
2289                         VM_ZF_COUNT_DECR();
2290                 vm_page_init(mem, mem->phys_page);
2291         }
2292 }
2293
2294
2295 void
2296 vm_page_free(
2297         vm_page_t       mem)
2298 {
2299         vm_page_free_prepare(mem);
2300         if (mem->fictitious) {
2301                 vm_page_release_fictitious(mem);
2302         } else {
2303                 vm_page_release(mem);
2304         }
2305 }
2306
2307
2308 void
2309 vm_page_free_unlocked(
2310         vm_page_t       mem,
2311         boolean_t       remove_from_hash)
2312 {
2313         vm_page_lockspin_queues();
2314         vm_page_free_prepare_queues(mem);
2315         vm_page_unlock_queues();
2316
2317         vm_page_free_prepare_object(mem, remove_from_hash);
2318
2319         if (mem->fictitious) {
2320                 vm_page_release_fictitious(mem);
2321         } else {
2322                 vm_page_release(mem);
2323         }
2324 }
2325
2326 /*
2327  * Free a list of pages.  The list can be up to several hundred pages,
2328  * as blocked up by vm_pageout_scan().
2329  * The big win is not having to take the free list lock once
2330  * per page.  We sort the incoming pages into n lists, one for
2331  * each color.
2332  */
2333 void
2334 vm_page_free_list(
2335         vm_page_t       mem,
2336         boolean_t       prepare_object)
2337 {
2338         vm_page_t       nxt;
2339         int             pg_count = 0;
2340         int             color;
2341         int             inuse_list_head = -1;
2342
2343         queue_head_t    free_list[MAX_COLORS];
2344         int             inuse[MAX_COLORS];
2345
2346         for (color = 0; color < (signed) vm_colors; color++) {
2347                 queue_init(&free_list[color]);
2348         }
2349
2350         while (mem) {
2351                 assert(!mem->inactive);
2352                 assert(!mem->active);
2353                 assert(!mem->throttled);
2354                 assert(!mem->free);
2355                 assert(!mem->speculative);
2356                 assert(mem->pageq.prev == NULL);
2357
2358                 nxt = (vm_page_t)(mem->pageq.next);
2359
2360                 if (prepare_object == TRUE)
2361                         vm_page_free_prepare_object(mem, TRUE);
2362
2363                 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2364                         assert(pmap_verify_free(mem->phys_page));
2365                 }
2366                 assert(mem->busy);
2367
2368                 if (!mem->fictitious) {
2369                         if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
2370                                 mem->pageq.next = NULL;
2371                                 vm_page_release(mem);
2372                         } else {
2373
2374                         /*
2375                          * IMPORTANT: we can't set the page "free" here
2376                          * because that would make the page eligible for
2377                          * a physically-contiguous allocation (see
2378                          * vm_page_find_contiguous()) right away (we don't
2379                          * hold the vm_page_queue_free lock).  That would
2380                          * cause trouble because the page is not actually
2381                          * in the free queue yet...
2382                          */
2383                                 color = mem->phys_page & vm_color_mask;
2384                                 if (queue_empty(&free_list[color])) {
2385                                         inuse[color] = inuse_list_head;
2386                                         inuse_list_head = color;
2387                                 }
2388                                 queue_enter_first(&free_list[color],
2389                                                   mem,
2390                                                   vm_page_t,
2391                                                   pageq);
2392                                 pg_count++;
2393                         }
2394                 } else {
2395                         assert(mem->phys_page == vm_page_fictitious_addr ||
2396                                mem->phys_page == vm_page_guard_addr);
2397                         vm_page_release_fictitious(mem);
2398                 }
2399                 mem = nxt;
2400         }
2401         if (pg_count) {
2402                 unsigned int    avail_free_count;
2403                 unsigned int    need_wakeup = 0;
2404                 unsigned int    need_priv_wakeup = 0;
2405
2406                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2407
2408                 color = inuse_list_head;
2409
2410                 while( color != -1 ) {
2411                         vm_page_t first, last;
2412                         vm_page_t first_free;
2413
2414                         /*
2415                          * Now that we hold the vm_page_queue_free lock,
2416                          * it's safe to mark all pages in our local queue
2417                          * as "free"...
2418                          */
2419                         queue_iterate(&free_list[color],
2420                                       mem,
2421                                       vm_page_t,
2422                                       pageq) {
2423                                 assert(!mem->free);
2424                                 assert(mem->busy);
2425                                 mem->free = TRUE;
2426                         }
2427
2428                         /*
2429                          * ... and insert our local queue at the head of
2430                          * the global free queue.
2431                          */
2432                         first = (vm_page_t) queue_first(&free_list[color]);
2433                         last = (vm_page_t) queue_last(&free_list[color]);
2434                         first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2435                         if (queue_empty(&vm_page_queue_free[color])) {
2436                                 queue_last(&vm_page_queue_free[color]) =
2437                                         (queue_entry_t) last;
2438                         } else {
2439                                 queue_prev(&first_free->pageq) =
2440                                         (queue_entry_t) last;
2441                         }
2442                         queue_first(&vm_page_queue_free[color]) =
2443                                 (queue_entry_t) first;
2444                         queue_prev(&first->pageq) =
2445                                 (queue_entry_t) &vm_page_queue_free[color];
2446                         queue_next(&last->pageq) =
2447                                 (queue_entry_t) first_free;
2448
2449                         /* next color */
2450                         color = inuse[color];
2451                 }
2452
2453                 vm_page_free_count += pg_count;
2454                 avail_free_count = vm_page_free_count;
2455
2456                 if (vm_page_free_wanted_privileged > 0 &&
2457                     avail_free_count > 0) {
2458                         if (avail_free_count < vm_page_free_wanted_privileged) {
2459                                 need_priv_wakeup = avail_free_count;
2460                                 vm_page_free_wanted_privileged -=
2461                                         avail_free_count;
2462                                 avail_free_count = 0;
2463                         } else {
2464                                 need_priv_wakeup = vm_page_free_wanted_privileged;
2465                                 vm_page_free_wanted_privileged = 0;
2466                                 avail_free_count -=
2467                                         vm_page_free_wanted_privileged;
2468                         }
2469                 }
2470
2471                 if (vm_page_free_wanted > 0 &&
2472                     avail_free_count > vm_page_free_reserved) {
2473                         unsigned int  available_pages;
2474
2475                         available_pages = (avail_free_count -
2476                                            vm_page_free_reserved);
2477
2478                         if (available_pages >= vm_page_free_wanted) {
2479                                 need_wakeup = vm_page_free_wanted;
2480                                 vm_page_free_wanted = 0;
2481                         } else {
2482                                 need_wakeup = available_pages;
2483                                 vm_page_free_wanted -= available_pages;
2484                         }
2485                 }
2486                 lck_mtx_unlock(&vm_page_queue_free_lock);
2487
2488                 if (need_priv_wakeup != 0) {
2489                         /*
2490                          * There shouldn't be that many VM-privileged threads,
2491                          * so let's wake them all up, even if we don't quite
2492                          * have enough pages to satisfy them all.
2493                          */
2494                         thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2495                 }
2496                 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2497                         /*
2498                          * We don't expect to have any more waiters
2499                          * after this, so let's wake them all up at
2500                          * once.
2501                          */
2502                         thread_wakeup((event_t) &vm_page_free_count);
2503                 } else for (; need_wakeup != 0; need_wakeup--) {
2504                         /*
2505                          * Wake up one waiter per page we just released.
2506                          */
2507                         thread_wakeup_one((event_t) &vm_page_free_count);
2508                 }
2509 #if CONFIG_EMBEDDED
2510                 {
2511                 int percent_avail;
2512
2513                 /*
2514                  * Decide if we need to poke the memorystatus notification thread.
2515                  */
2516                 percent_avail =
2517                         (vm_page_active_count + vm_page_inactive_count +
2518                          vm_page_speculative_count + vm_page_free_count +
2519                          (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
2520                         atop_64(max_mem);
2521                 if (percent_avail >= (kern_memorystatus_level + 5)) {
2522                         kern_memorystatus_level = percent_avail;
2523                         thread_wakeup((event_t)&kern_memorystatus_wakeup);
2524                 }
2525                 }
2526 #endif
2527         }
2528 }
2529
2530
2531 /*
2532  *      vm_page_wire:
2533  *
2534  *      Mark this page as wired down by yet
2535  *      another map, removing it from paging queues
2536  *      as necessary.
2537  *
2538  *      The page's object and the page queues must be locked.
2539  */
2540 void
2541 vm_page_wire(
2542         register vm_page_t      mem)
2543 {
2544
2545 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2546
2547         VM_PAGE_CHECK(mem);
2548         if (mem->object) {
2549                 vm_object_lock_assert_exclusive(mem->object);
2550         } else {
2551                 /*
2552                  * In theory, the page should be in an object before it
2553                  * gets wired, since we need to hold the object lock
2554                  * to update some fields in the page structure.
2555                  * However, some code (i386 pmap, for example) might want
2556                  * to wire a page before it gets inserted into an object.
2557                  * That's somewhat OK, as long as nobody else can get to
2558                  * that page and update it at the same time.
2559                  */
2560         }
2561 #if DEBUG
2562         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2563 #endif
2564         if ( !VM_PAGE_WIRED(mem)) {
2565                 VM_PAGE_QUEUES_REMOVE(mem);
2566
2567                 if (mem->object) {
2568                         mem->object->wired_page_count++;
2569                         assert(mem->object->resident_page_count >=
2570                                mem->object->wired_page_count);
2571                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2572                                 assert(vm_page_purgeable_count > 0);
2573                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2574                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2575                         }
2576                         if (mem->object->all_reusable) {
2577                                 /*
2578                                  * Wired pages are not counted as "re-usable"
2579                                  * in "all_reusable" VM objects, so nothing
2580                                  * to do here.
2581                                  */
2582                         } else if (mem->reusable) {
2583                                 /*
2584                                  * This page is not "re-usable" when it's
2585                                  * wired, so adjust its state and the
2586                                  * accounting.
2587                                  */
2588                                 vm_object_reuse_pages(mem->object,
2589                                                       mem->offset,
2590                                                       mem->offset+PAGE_SIZE_64,
2591                                                       FALSE);
2592                         }
2593                 }
2594                 assert(!mem->reusable);
2595
2596                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2597                         vm_page_wire_count++;
2598                 if (mem->gobbled)
2599                         vm_page_gobble_count--;
2600                 mem->gobbled = FALSE;
2601                 if (mem->zero_fill == TRUE) {
2602                         mem->zero_fill = FALSE;
2603                         VM_ZF_COUNT_DECR();
2604                 }
2605 #if CONFIG_EMBEDDED
2606                 {
2607                 int     percent_avail;
2608
2609                 /*
2610                  * Decide if we need to poke the memorystatus notification thread.
2611                  */
2612                 percent_avail =
2613                         (vm_page_active_count + vm_page_inactive_count +
2614                          vm_page_speculative_count + vm_page_free_count +
2615                          (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2616                         atop_64(max_mem);
2617                 if (percent_avail <= (kern_memorystatus_level - 5)) {
2618                         kern_memorystatus_level = percent_avail;
2619                         thread_wakeup((event_t)&kern_memorystatus_wakeup);
2620                 }
2621                 }
2622 #endif
2623                 /*
2624                  * ENCRYPTED SWAP:
2625                  * The page could be encrypted, but
2626                  * We don't have to decrypt it here
2627                  * because we don't guarantee that the
2628                  * data is actually valid at this point.
2629                  * The page will get decrypted in
2630                  * vm_fault_wire() if needed.
2631                  */
2632         }
2633         assert(!mem->gobbled);
2634         mem->wire_count++;
2635         VM_PAGE_CHECK(mem);
2636 }
2637
2638 /*
2639  *      vm_page_gobble:
2640  *
2641  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2642  *
2643  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2644  */
2645 void
2646 vm_page_gobble(
2647         register vm_page_t      mem)
2648 {
2649         vm_page_lockspin_queues();
2650         VM_PAGE_CHECK(mem);
2651
2652         assert(!mem->gobbled);
2653         assert( !VM_PAGE_WIRED(mem));
2654
2655         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2656                 if (!mem->private && !mem->fictitious)
2657                         vm_page_wire_count++;
2658         }
2659         vm_page_gobble_count++;
2660         mem->gobbled = TRUE;
2661         vm_page_unlock_queues();
2662 }
2663
2664 /*
2665  *      vm_page_unwire:
2666  *
2667  *      Release one wiring of this page, potentially
2668  *      enabling it to be paged again.
2669  *
2670  *      The page's object and the page queues must be locked.
2671  */
2672 void
2673 vm_page_unwire(
2674         register vm_page_t      mem)
2675 {
2676
2677 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2678
2679         VM_PAGE_CHECK(mem);
2680         assert(VM_PAGE_WIRED(mem));
2681         assert(mem->object != VM_OBJECT_NULL);
2682 #if DEBUG
2683         vm_object_lock_assert_exclusive(mem->object);
2684         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2685 #endif
2686         if (--mem->wire_count == 0) {
2687                 assert(!mem->private && !mem->fictitious);
2688                 vm_page_wire_count--;
2689                 assert(mem->object->wired_page_count > 0);
2690                 mem->object->wired_page_count--;
2691                 assert(mem->object->resident_page_count >=
2692                        mem->object->wired_page_count);
2693                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2694                         OSAddAtomic(+1, &vm_page_purgeable_count);
2695                         assert(vm_page_purgeable_wired_count > 0);
2696                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2697                 }
2698                 assert(!mem->laundry);
2699                 assert(mem->object != kernel_object);
2700                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2701                 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2702                         vm_page_deactivate(mem);
2703                 } else {
2704                         vm_page_activate(mem);
2705                 }
2706 #if CONFIG_EMBEDDED
2707                 {
2708                 int     percent_avail;
2709
2710                 /*
2711                  * Decide if we need to poke the memorystatus notification thread.
2712                  */
2713                 percent_avail =
2714                         (vm_page_active_count + vm_page_inactive_count +
2715                          vm_page_speculative_count + vm_page_free_count +
2716                          (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2717                         atop_64(max_mem);
2718                 if (percent_avail >= (kern_memorystatus_level + 5)) {
2719                         kern_memorystatus_level = percent_avail;
2720                         thread_wakeup((event_t)&kern_memorystatus_wakeup);
2721                 }
2722                 }
2723 #endif
2724         }
2725         VM_PAGE_CHECK(mem);
2726 }
2727
2728 /*
2729  *      vm_page_deactivate:
2730  *
2731  *      Returns the given page to the inactive list,
2732  *      indicating that no physical maps have access
2733  *      to this page.  [Used by the physical mapping system.]
2734  *
2735  *      The page queues must be locked.
2736  */
2737 void
2738 vm_page_deactivate(
2739         vm_page_t       m)
2740 {
2741         vm_page_deactivate_internal(m, TRUE);
2742 }
2743
2744
2745 void
2746 vm_page_deactivate_internal(
2747         vm_page_t       m,
2748         boolean_t       clear_hw_reference)
2749 {
2750
2751         VM_PAGE_CHECK(m);
2752         assert(m->object != kernel_object);
2753         assert(m->phys_page != vm_page_guard_addr);
2754
2755 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
2756 #if DEBUG
2757         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2758 #endif
2759         /*
2760          *      This page is no longer very interesting.  If it was
2761          *      interesting (active or inactive/referenced), then we
2762          *      clear the reference bit and (re)enter it in the
2763          *      inactive queue.  Note wired pages should not have
2764          *      their reference bit cleared.
2765          */
2766         if (m->gobbled) {               /* can this happen? */
2767                 assert( !VM_PAGE_WIRED(m));
2768
2769                 if (!m->private && !m->fictitious)
2770                         vm_page_wire_count--;
2771                 vm_page_gobble_count--;
2772                 m->gobbled = FALSE;
2773         }
2774         if (m->private || (VM_PAGE_WIRED(m)))
2775                 return;
2776
2777         if (!m->fictitious && !m->absent && clear_hw_reference == TRUE)
2778                 pmap_clear_reference(m->phys_page);
2779
2780         m->reference = FALSE;
2781         m->no_cache = FALSE;
2782
2783         if (!m->inactive) {
2784                 VM_PAGE_QUEUES_REMOVE(m);
2785
2786                 assert(!m->laundry);
2787                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2788
2789                 if (!IP_VALID(memory_manager_default) &&
2790                         m->dirty && m->object->internal &&
2791                         (m->object->purgable == VM_PURGABLE_DENY ||
2792                          m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2793                          m->object->purgable == VM_PURGABLE_VOLATILE )) {
2794                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2795                         m->throttled = TRUE;
2796                         vm_page_throttled_count++;
2797                 } else {
2798                         if (!m->fictitious && m->object->named && m->object->ref_count == 1) {
2799                                 vm_page_speculate(m, FALSE);
2800 #if DEVELOPMENT || DEBUG
2801                                 vm_page_speculative_recreated++;
2802 #endif
2803                                 return;
2804                         } else {
2805                                 if (m->zero_fill) {
2806                                         queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2807                                         vm_zf_queue_count++;
2808                                 } else {
2809                                         queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2810                                 }
2811                         }
2812                         m->inactive = TRUE;
2813                         if (!m->fictitious) {
2814                                 vm_page_inactive_count++;
2815                                 token_new_pagecount++;
2816                         }
2817                 }
2818         }
2819 }
2820
2821 /*
2822  *      vm_page_activate:
2823  *
2824  *      Put the specified page on the active list (if appropriate).
2825  *
2826  *      The page queues must be locked.
2827  */
2828
2829 void
2830 vm_page_activate(
2831         register vm_page_t      m)
2832 {
2833         VM_PAGE_CHECK(m);
2834 #ifdef  FIXME_4778297
2835         assert(m->object != kernel_object);
2836 #endif
2837         assert(m->phys_page != vm_page_guard_addr);
2838 #if DEBUG
2839         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2840 #endif
2841         if (m->gobbled) {
2842                 assert( !VM_PAGE_WIRED(m));
2843                 if (!m->private && !m->fictitious)
2844                         vm_page_wire_count--;
2845                 vm_page_gobble_count--;
2846                 m->gobbled = FALSE;
2847         }
2848         if (m->private)
2849                 return;
2850
2851 #if DEBUG
2852         if (m->active)
2853                 panic("vm_page_activate: already active");
2854 #endif
2855
2856         if (m->speculative) {
2857                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2858                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2859         }
2860
2861         VM_PAGE_QUEUES_REMOVE(m);
2862
2863         if ( !VM_PAGE_WIRED(m)) {
2864                 assert(!m->laundry);
2865                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2866                 if (!IP_VALID(memory_manager_default) &&
2867                         !m->fictitious && m->dirty && m->object->internal &&
2868                         (m->object->purgable == VM_PURGABLE_DENY ||
2869                          m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2870                          m->object->purgable == VM_PURGABLE_VOLATILE )) {
2871                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2872                         m->throttled = TRUE;
2873                         vm_page_throttled_count++;
2874                 } else {
2875                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2876                         m->active = TRUE;
2877                         if (!m->fictitious)
2878                                 vm_page_active_count++;
2879                 }
2880                 m->reference = TRUE;
2881                 m->no_cache = FALSE;
2882         }
2883         VM_PAGE_CHECK(m);
2884 }
2885
2886
2887 /*
2888  *      vm_page_speculate:
2889  *
2890  *      Put the specified page on the speculative list (if appropriate).
2891  *
2892  *      The page queues must be locked.
2893  */
2894 void
2895 vm_page_speculate(
2896         vm_page_t       m,
2897         boolean_t       new)
2898 {
2899         struct vm_speculative_age_q     *aq;
2900
2901         VM_PAGE_CHECK(m);
2902         assert(m->object != kernel_object);
2903         assert(m->phys_page != vm_page_guard_addr);
2904 #if DEBUG
2905         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2906 #endif
2907
2908         VM_PAGE_QUEUES_REMOVE(m);
2909
2910         if ( !VM_PAGE_WIRED(m)) {
2911                 mach_timespec_t         ts;
2912                 clock_sec_t sec;
2913                 clock_nsec_t nsec;
2914
2915                 clock_get_system_nanotime(&sec, &nsec);
2916                 ts.tv_sec = (unsigned int) sec;
2917                 ts.tv_nsec = nsec;
2918
2919                 if (vm_page_speculative_count == 0) {
2920
2921                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2922                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2923
2924                         aq = &vm_page_queue_speculative[speculative_age_index];
2925
2926                         /*
2927                          * set the timer to begin a new group
2928                          */
2929                         aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2930                         aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2931
2932                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2933                 } else {
2934                         aq = &vm_page_queue_speculative[speculative_age_index];
2935
2936                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2937
2938                                 speculative_age_index++;
2939
2940                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2941                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2942                                 if (speculative_age_index == speculative_steal_index) {
2943                                         speculative_steal_index = speculative_age_index + 1;
2944
2945                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2946                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2947                                 }
2948                                 aq = &vm_page_queue_speculative[speculative_age_index];
2949
2950                                 if (!queue_empty(&aq->age_q))
2951                                         vm_page_speculate_ageit(aq);
2952
2953                                 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2954                                 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2955
2956                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2957                         }
2958                 }
2959                 enqueue_tail(&aq->age_q, &m->pageq);
2960                 m->speculative = TRUE;
2961                 vm_page_speculative_count++;
2962
2963                 if (new == TRUE) {
2964                         m->object->pages_created++;
2965 #if DEVELOPMENT || DEBUG
2966                         vm_page_speculative_created++;
2967 #endif
2968                 }
2969         }
2970         VM_PAGE_CHECK(m);
2971 }
2972
2973
2974 /*
2975  * move pages from the specified aging bin to
2976  * the speculative bin that pageout_scan claims from
2977  *
2978  *      The page queues must be locked.
2979  */
2980 void
2981 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2982 {
2983         struct vm_speculative_age_q     *sq;
2984         vm_page_t       t;
2985
2986         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2987
2988         if (queue_empty(&sq->age_q)) {
2989                 sq->age_q.next = aq->age_q.next;
2990                 sq->age_q.prev = aq->age_q.prev;
2991
2992                 t = (vm_page_t)sq->age_q.next;
2993                 t->pageq.prev = &sq->age_q;
2994
2995                 t = (vm_page_t)sq->age_q.prev;
2996                 t->pageq.next = &sq->age_q;
2997         } else {
2998                 t = (vm_page_t)sq->age_q.prev;
2999                 t->pageq.next = aq->age_q.next;
3000
3001                 t = (vm_page_t)aq->age_q.next;
3002                 t->pageq.prev = sq->age_q.prev;
3003
3004                 t = (vm_page_t)aq->age_q.prev;
3005                 t->pageq.next = &sq->age_q;
3006
3007                 sq->age_q.prev = aq->age_q.prev;
3008         }
3009         queue_init(&aq->age_q);
3010 }
3011
3012
3013 void
3014 vm_page_lru(
3015         vm_page_t       m)
3016 {
3017         VM_PAGE_CHECK(m);
3018         assert(m->object != kernel_object);
3019         assert(m->phys_page != vm_page_guard_addr);
3020
3021 #if DEBUG
3022         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3023 #endif
3024         if (m->active || m->reference)
3025                 return;
3026
3027         if (m->private || (VM_PAGE_WIRED(m)))
3028                 return;
3029
3030         m->no_cache = FALSE;
3031
3032         VM_PAGE_QUEUES_REMOVE(m);
3033
3034         assert(!m->laundry);
3035         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
3036
3037         queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
3038         m->inactive = TRUE;
3039
3040         vm_page_inactive_count++;
3041         token_new_pagecount++;
3042 }
3043
3044
3045 void
3046 vm_page_reactivate_all_throttled(void)
3047 {
3048         vm_page_t       first_throttled, last_throttled;
3049         vm_page_t       first_active;
3050         vm_page_t       m;
3051         int             extra_active_count;
3052
3053         extra_active_count = 0;
3054         vm_page_lock_queues();
3055         if (! queue_empty(&vm_page_queue_throttled)) {
3056                 /*
3057                  * Switch "throttled" pages to "active".
3058                  */
3059                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3060                         VM_PAGE_CHECK(m);
3061                         assert(m->throttled);
3062                         assert(!m->active);
3063                         assert(!m->inactive);
3064                         assert(!m->speculative);
3065                         assert(!VM_PAGE_WIRED(m));
3066                         if (!m->fictitious) {
3067                                 extra_active_count++;
3068                         }
3069                         m->throttled = FALSE;
3070                         m->active = TRUE;
3071                         VM_PAGE_CHECK(m);
3072                 }
3073
3074                 /*
3075                  * Transfer the entire throttled queue to a regular LRU page queues.
3076                  * We insert it at the head of the active queue, so that these pages
3077                  * get re-evaluated by the LRU algorithm first, since they've been
3078                  * completely out of it until now.
3079                  */
3080                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3081                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3082                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3083                 if (queue_empty(&vm_page_queue_active)) {
3084                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3085                 } else {
3086                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3087                 }
3088                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3089                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3090                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3091
3092 #if DEBUG
3093                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3094 #endif
3095                 queue_init(&vm_page_queue_throttled);
3096                 /*
3097                  * Adjust the global page counts.
3098                  */
3099                 vm_page_active_count += extra_active_count;
3100                 vm_page_throttled_count = 0;
3101         }
3102         assert(vm_page_throttled_count == 0);
3103         assert(queue_empty(&vm_page_queue_throttled));
3104         vm_page_unlock_queues();
3105 }
3106
3107
3108 /*
3109  * move pages from the indicated local queue to the global active queue
3110  * its ok to fail if we're below the hard limit and force == FALSE
3111  * the nolocks == TRUE case is to allow this function to be run on
3112  * the hibernate path
3113  */
3114
3115 void
3116 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3117 {
3118         struct vpl      *lq;
3119         vm_page_t       first_local, last_local;
3120         vm_page_t       first_active;
3121         vm_page_t       m;
3122         uint32_t        count = 0;
3123
3124         if (vm_page_local_q == NULL)
3125                 return;
3126
3127         lq = &vm_page_local_q[lid].vpl_un.vpl;
3128
3129         if (nolocks == FALSE) {
3130                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3131                         if ( !vm_page_trylockspin_queues())
3132                                 return;
3133                 } else
3134                         vm_page_lockspin_queues();
3135
3136                 VPL_LOCK(&lq->vpl_lock);
3137         }
3138         if (lq->vpl_count) {
3139                 /*
3140                  * Switch "local" pages to "active".
3141                  */
3142                 assert(!queue_empty(&lq->vpl_queue));
3143
3144                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3145                         VM_PAGE_CHECK(m);
3146                         assert(m->local);
3147                         assert(!m->active);
3148                         assert(!m->inactive);
3149                         assert(!m->speculative);
3150                         assert(!VM_PAGE_WIRED(m));
3151                         assert(!m->throttled);
3152                         assert(!m->fictitious);
3153
3154                         if (m->local_id != lid)
3155                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3156
3157                         m->local_id = 0;
3158                         m->local = FALSE;
3159                         m->active = TRUE;
3160                         VM_PAGE_CHECK(m);
3161
3162                         count++;
3163                 }
3164                 if (count != lq->vpl_count)
3165                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3166
3167                 /*
3168                  * Transfer the entire local queue to a regular LRU page queues.
3169                  */
3170                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3171                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3172                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3173
3174                 if (queue_empty(&vm_page_queue_active)) {
3175                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3176                 } else {
3177                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3178                 }
3179                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3180                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3181                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3182
3183                 queue_init(&lq->vpl_queue);
3184                 /*
3185                  * Adjust the global page counts.
3186                  */
3187                 vm_page_active_count += lq->vpl_count;
3188                 lq->vpl_count = 0;
3189         }
3190         assert(queue_empty(&lq->vpl_queue));
3191
3192         if (nolocks == FALSE) {
3193                 VPL_UNLOCK(&lq->vpl_lock);
3194                 vm_page_unlock_queues();
3195         }
3196 }
3197
3198 /*
3199  *      vm_page_part_zero_fill:
3200  *
3201  *      Zero-fill a part of the page.
3202  */
3203 void
3204 vm_page_part_zero_fill(
3205         vm_page_t       m,
3206         vm_offset_t     m_pa,
3207         vm_size_t       len)
3208 {
3209         vm_page_t       tmp;
3210
3211         VM_PAGE_CHECK(m);
3212 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3213         pmap_zero_part_page(m->phys_page, m_pa, len);
3214 #else
3215         while (1) {
3216                 tmp = vm_page_grab();
3217                 if (tmp == VM_PAGE_NULL) {
3218                         vm_page_wait(THREAD_UNINT);
3219                         continue;
3220                 }
3221                 break;
3222         }
3223         vm_page_zero_fill(tmp);
3224         if(m_pa != 0) {
3225                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3226         }
3227         if((m_pa + len) <  PAGE_SIZE) {
3228                 vm_page_part_copy(m, m_pa + len, tmp,
3229                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3230         }
3231         vm_page_copy(tmp,m);
3232         VM_PAGE_FREE(tmp);
3233 #endif
3234
3235 }
3236
3237 /*
3238  *      vm_page_zero_fill:
3239  *
3240  *      Zero-fill the specified page.
3241  */
3242 void
3243 vm_page_zero_fill(
3244         vm_page_t       m)
3245 {
3246         XPR(XPR_VM_PAGE,
3247                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3248                 m->object, m->offset, m, 0,0);
3249
3250         VM_PAGE_CHECK(m);
3251
3252 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3253         pmap_zero_page(m->phys_page);
3254 }
3255
3256 /*
3257  *      vm_page_part_copy:
3258  *
3259  *      copy part of one page to another
3260  */
3261
3262 void
3263 vm_page_part_copy(
3264         vm_page_t       src_m,
3265         vm_offset_t     src_pa,
3266         vm_page_t       dst_m,
3267         vm_offset_t     dst_pa,
3268         vm_size_t       len)
3269 {
3270         VM_PAGE_CHECK(src_m);
3271         VM_PAGE_CHECK(dst_m);
3272
3273         pmap_copy_part_page(src_m->phys_page, src_pa,
3274                         dst_m->phys_page, dst_pa, len);
3275 }
3276
3277 /*
3278  *      vm_page_copy:
3279  *
3280  *      Copy one page to another
3281  *
3282  * ENCRYPTED SWAP:
3283  * The source page should not be encrypted.  The caller should
3284  * make sure the page is decrypted first, if necessary.
3285  */
3286
3287 int vm_page_copy_cs_validations = 0;
3288 int vm_page_copy_cs_tainted = 0;
3289
3290 void
3291 vm_page_copy(
3292         vm_page_t       src_m,
3293         vm_page_t       dest_m)
3294 {
3295         XPR(XPR_VM_PAGE,
3296         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3297         src_m->object, src_m->offset,
3298         dest_m->object, dest_m->offset,
3299         0);
3300
3301         VM_PAGE_CHECK(src_m);
3302         VM_PAGE_CHECK(dest_m);
3303
3304         /*
3305          * ENCRYPTED SWAP:
3306          * The source page should not be encrypted at this point.
3307          * The destination page will therefore not contain encrypted
3308          * data after the copy.
3309          */
3310         if (src_m->encrypted) {
3311                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3312         }
3313         dest_m->encrypted = FALSE;
3314
3315         if (src_m->object != VM_OBJECT_NULL &&
3316             src_m->object->code_signed) {
3317                 /*
3318                  * We're copying a page from a code-signed object.
3319                  * Whoever ends up mapping the copy page might care about
3320                  * the original page's integrity, so let's validate the
3321                  * source page now.
3322                  */
3323                 vm_page_copy_cs_validations++;
3324                 vm_page_validate_cs(src_m);
3325         }
3326         /*
3327          * Propagate the cs_tainted bit to the copy page. Do not propagate
3328          * the cs_validated bit.
3329          */
3330         dest_m->cs_tainted = src_m->cs_tainted;
3331         if (dest_m->cs_tainted) {
3332                 vm_page_copy_cs_tainted++;
3333         }
3334
3335         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3336 }
3337
3338 #if MACH_ASSERT
3339 static void
3340 _vm_page_print(
3341         vm_page_t       p)
3342 {
3343         printf("vm_page %p: \n", p);
3344         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3345         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3346         printf("  next=%p\n", p->next);
3347         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3348         printf("  wire_count=%u\n", p->wire_count);
3349
3350         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3351                (p->local ? "" : "!"),
3352                (p->inactive ? "" : "!"),
3353                (p->active ? "" : "!"),
3354                (p->pageout_queue ? "" : "!"),
3355                (p->speculative ? "" : "!"),
3356                (p->laundry ? "" : "!"));
3357         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3358                (p->free ? "" : "!"),
3359                (p->reference ? "" : "!"),
3360                (p->gobbled ? "" : "!"),
3361                (p->private ? "" : "!"),
3362                (p->throttled ? "" : "!"));
3363         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3364                 (p->busy ? "" : "!"),
3365                 (p->wanted ? "" : "!"),
3366                 (p->tabled ? "" : "!"),
3367                 (p->fictitious ? "" : "!"),
3368                 (p->pmapped ? "" : "!"),
3369                 (p->wpmapped ? "" : "!"));
3370         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3371                (p->pageout ? "" : "!"),
3372                (p->absent ? "" : "!"),
3373                (p->error ? "" : "!"),
3374                (p->dirty ? "" : "!"),
3375                (p->cleaning ? "" : "!"),
3376                (p->precious ? "" : "!"),
3377                (p->clustered ? "" : "!"));
3378         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3379                (p->overwriting ? "" : "!"),
3380                (p->restart ? "" : "!"),
3381                (p->unusual ? "" : "!"),
3382                (p->encrypted ? "" : "!"),
3383                (p->encrypted_cleaning ? "" : "!"));
3384         printf("  %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3385                (p->list_req_pending ? "" : "!"),
3386                (p->dump_cleaning ? "" : "!"),
3387                (p->cs_validated ? "" : "!"),
3388                (p->cs_tainted ? "" : "!"),
3389                (p->no_cache ? "" : "!"));
3390         printf("  %szero_fill\n",
3391                (p->zero_fill ? "" : "!"));
3392
3393         printf("phys_page=0x%x\n", p->phys_page);
3394 }
3395
3396 /*
3397  *      Check that the list of pages is ordered by
3398  *      ascending physical address and has no holes.
3399  */
3400 static int
3401 vm_page_verify_contiguous(
3402         vm_page_t       pages,
3403         unsigned int    npages)
3404 {
3405         register vm_page_t      m;
3406         unsigned int            page_count;
3407         vm_offset_t             prev_addr;
3408
3409         prev_addr = pages->phys_page;
3410         page_count = 1;
3411         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3412                 if (m->phys_page != prev_addr + 1) {
3413                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3414                                m, (long)prev_addr, m->phys_page);
3415                         printf("pages %p page_count %d\n", pages, page_count);
3416                         panic("vm_page_verify_contiguous:  not contiguous!");
3417                 }
3418                 prev_addr = m->phys_page;
3419                 ++page_count;
3420         }
3421         if (page_count != npages) {
3422                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3423                        pages, page_count, npages);
3424                 panic("vm_page_verify_contiguous:  count error");
3425         }
3426         return 1;
3427 }
3428
3429
3430 /*
3431  *      Check the free lists for proper length etc.
3432  */
3433 static unsigned int
3434 vm_page_verify_free_list(
3435         unsigned int    color,
3436         vm_page_t       look_for_page,
3437         boolean_t       expect_page)
3438 {
3439         unsigned int    npages;
3440         vm_page_t       m;
3441         vm_page_t       prev_m;
3442         boolean_t       found_page;
3443
3444         found_page = FALSE;
3445         npages = 0;
3446         prev_m = (vm_page_t) &vm_page_queue_free[color];
3447         queue_iterate(&vm_page_queue_free[color],
3448                       m,
3449                       vm_page_t,
3450                       pageq) {
3451                 if (m == look_for_page) {
3452                         found_page = TRUE;
3453                 }
3454                 if ((vm_page_t) m->pageq.prev != prev_m)
3455                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3456                               color, npages, m, m->pageq.prev, prev_m);
3457                 if ( ! m->free )
3458                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3459                               color, npages, m);
3460                 if ( ! m->busy )
3461                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3462                               color, npages, m);
3463                 if ( (m->phys_page & vm_color_mask) != color)
3464                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3465                               color, npages, m, m->phys_page & vm_color_mask, color);
3466                 ++npages;
3467                 prev_m = m;
3468         }
3469         if (look_for_page != VM_PAGE_NULL) {
3470                 unsigned int other_color;
3471
3472                 if (expect_page && !found_page) {
3473                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3474                                color, npages, look_for_page, look_for_page->phys_page);
3475                         _vm_page_print(look_for_page);
3476                         for (other_color = 0;
3477                              other_color < vm_colors;
3478                              other_color++) {
3479                                 if (other_color == color)
3480                                         continue;
3481                                 vm_page_verify_free_list(other_color, look_for_page, FALSE);
3482                         }
3483                         panic("vm_page_verify_free_list(color=%u)\n", color);
3484                 }
3485                 if (!expect_page && found_page) {
3486                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3487                                color, npages, look_for_page, look_for_page->phys_page);
3488                 }
3489         }
3490         return npages;
3491 }
3492
3493 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3494 static void
3495 vm_page_verify_free_lists( void )
3496 {
3497         unsigned int    color, npages;
3498
3499         if (! vm_page_verify_free_lists_enabled)
3500                 return;
3501
3502         npages = 0;
3503
3504         lck_mtx_lock(&vm_page_queue_free_lock);
3505
3506         for( color = 0; color < vm_colors; color++ ) {
3507                 npages += vm_page_verify_free_list(color, VM_PAGE_NULL, FALSE);
3508         }
3509         if (npages != vm_page_free_count)
3510                 panic("vm_page_verify_free_lists:  npages %u free_count %d",
3511                       npages, vm_page_free_count);
3512
3513         lck_mtx_unlock(&vm_page_queue_free_lock);
3514 }
3515
3516 void
3517 vm_page_queues_assert(
3518         vm_page_t       mem,
3519         int             val)
3520 {
3521         if (mem->free + mem->active + mem->inactive + mem->speculative +
3522             mem->throttled + mem->pageout_queue > (val)) {
3523                 _vm_page_print(mem);
3524                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3525         }
3526         if (VM_PAGE_WIRED(mem)) {
3527                 assert(!mem->active);
3528                 assert(!mem->inactive);
3529                 assert(!mem->speculative);
3530                 assert(!mem->throttled);
3531         }
3532 }
3533 #endif  /* MACH_ASSERT */
3534
3535
3536 /*
3537  *      CONTIGUOUS PAGE ALLOCATION
3538  *
3539  *      Find a region large enough to contain at least n pages
3540  *      of contiguous physical memory.
3541  *
3542  *      This is done by traversing the vm_page_t array in a linear fashion
3543  *      we assume that the vm_page_t array has the avaiable physical pages in an
3544  *      ordered, ascending list... this is currently true of all our implementations
3545  *      and must remain so... there can be 'holes' in the array...  we also can
3546  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3547  *      which use to happen via 'vm_page_convert'... that function was no longer
3548  *      being called and was removed...
3549  *
3550  *      The basic flow consists of stabilizing some of the interesting state of
3551  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3552  *      sweep at the beginning of the array looking for pages that meet our criterea
3553  *      for a 'stealable' page... currently we are pretty conservative... if the page
3554  *      meets this criterea and is physically contiguous to the previous page in the 'run'
3555  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
3556  *      and start to develop a new run... if at this point we've already considered
3557  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3558  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
3559  *      to other threads trying to acquire free pages (or move pages from q to q),
3560  *      and then continue from the spot we left off... we only make 1 pass through the
3561  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
3562  *      which steals the pages from the queues they're currently on... pages on the free
3563  *      queue can be stolen directly... pages that are on any of the other queues
3564  *      must be removed from the object they are tabled on... this requires taking the
3565  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3566  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
3567  *      dump the pages we've currently stolen back to the free list, and pick up our
3568  *      scan from the point where we aborted the 'current' run.
3569  *
3570  *
3571  *      Requirements:
3572  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
3573  *
3574  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3575  *
3576  * Algorithm:
3577  */
3578
3579 #define MAX_CONSIDERED_BEFORE_YIELD     1000
3580
3581
3582 #define RESET_STATE_OF_RUN()    \
3583         MACRO_BEGIN             \
3584         prevcontaddr = -2;      \
3585         start_pnum = -1;        \
3586         free_considered = 0;    \
3587         substitute_needed = 0;  \
3588         npages = 0;             \
3589         MACRO_END
3590
3591 /*
3592  * Can we steal in-use (i.e. not free) pages when searching for
3593  * physically-contiguous pages ?
3594  */
3595 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3596
3597 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
3598 #if DEBUG
3599 int vm_page_find_contig_debug = 0;
3600 #endif
3601
3602 static vm_page_t
3603 vm_page_find_contiguous(
3604         unsigned int    contig_pages,
3605         ppnum_t         max_pnum,
3606         ppnum_t     pnum_mask,
3607         boolean_t       wire,
3608         int             flags)
3609 {
3610         vm_page_t       m = NULL;
3611         ppnum_t         prevcontaddr;
3612         ppnum_t         start_pnum;
3613         unsigned int    npages, considered, scanned;
3614         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
3615         unsigned int    idx_last_contig_page_found = 0;
3616         int             free_considered, free_available;
3617         int             substitute_needed;
3618         boolean_t       wrapped;
3619 #if DEBUG
3620         clock_sec_t     tv_start_sec, tv_end_sec;
3621         clock_usec_t    tv_start_usec, tv_end_usec;
3622 #endif
3623 #if MACH_ASSERT
3624         int             yielded = 0;
3625         int             dumped_run = 0;
3626         int             stolen_pages = 0;
3627 #endif
3628
3629         if (contig_pages == 0)
3630                 return VM_PAGE_NULL;
3631
3632 #if MACH_ASSERT
3633         vm_page_verify_free_lists();
3634 #endif
3635 #if DEBUG
3636         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3637 #endif
3638         vm_page_lock_queues();
3639         lck_mtx_lock(&vm_page_queue_free_lock);
3640
3641         RESET_STATE_OF_RUN();
3642
3643         scanned = 0;
3644         considered = 0;
3645         free_available = vm_page_free_count - vm_page_free_reserved;
3646
3647         wrapped = FALSE;
3648
3649         if(flags & KMA_LOMEM)
3650                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3651         else
3652                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
3653
3654         orig_last_idx = idx_last_contig_page_found;
3655         last_idx = orig_last_idx;
3656
3657         for (page_idx = last_idx, start_idx = last_idx;
3658              npages < contig_pages && page_idx < vm_pages_count;
3659              page_idx++) {
3660 retry:
3661                 if (wrapped &&
3662                     npages == 0 &&
3663                     page_idx >= orig_last_idx) {
3664                         /*
3665                          * We're back where we started and we haven't
3666                          * found any suitable contiguous range.  Let's
3667                          * give up.
3668                          */
3669                         break;
3670                 }
3671                 scanned++;
3672                 m = &vm_pages[page_idx];
3673
3674                 assert(!m->fictitious);
3675                 assert(!m->private);
3676
3677                 if (max_pnum && m->phys_page > max_pnum) {
3678                         /* no more low pages... */
3679                         break;
3680                 }
3681                 if ( !(flags & KMA_LOMEM) && m->phys_page <= vm_lopage_poolend &&
3682                     m->phys_page >= vm_lopage_poolstart) {
3683                         /*
3684                          * don't want to take pages from our
3685                          * reserved pool of low memory
3686                          * so don't consider it which
3687                          * means starting a new run
3688                          */
3689                         RESET_STATE_OF_RUN();
3690
3691                 } else if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3692                         /*
3693                          * not aligned
3694                          */
3695                         RESET_STATE_OF_RUN();
3696
3697                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3698                            m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3699                            m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3700                            m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3701                            m->pageout) {
3702                         /*
3703                          * page is in a transient state
3704                          * or a state we don't want to deal
3705                          * with, so don't consider it which
3706                          * means starting a new run
3707                          */
3708                         RESET_STATE_OF_RUN();
3709
3710                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3711                         /*
3712                          * page needs to be on one of our queues
3713                          * in order for it to be stable behind the
3714                          * locks we hold at this point...
3715                          * if not, don't consider it which
3716                          * means starting a new run
3717                          */
3718                         RESET_STATE_OF_RUN();
3719
3720                 } else if (!m->free && (!m->tabled || m->busy)) {
3721                         /*
3722                          * pages on the free list are always 'busy'
3723                          * so we couldn't test for 'busy' in the check
3724                          * for the transient states... pages that are
3725                          * 'free' are never 'tabled', so we also couldn't
3726                          * test for 'tabled'.  So we check here to make
3727                          * sure that a non-free page is not busy and is
3728                          * tabled on an object...
3729                          * if not, don't consider it which
3730                          * means starting a new run
3731                          */
3732                         RESET_STATE_OF_RUN();
3733
3734                 } else {
3735                         if (m->phys_page != prevcontaddr + 1) {
3736                                 if ((m->phys_page & pnum_mask) != 0) {
3737                                         RESET_STATE_OF_RUN();
3738                                         goto did_consider;
3739                                 } else {
3740                                         npages = 1;
3741                                         start_idx = page_idx;
3742                                         start_pnum = m->phys_page;
3743                                 }
3744                         } else {
3745                                 npages++;
3746                         }
3747                         prevcontaddr = m->phys_page;
3748
3749                         VM_PAGE_CHECK(m);
3750                         if (m->free) {
3751                                 free_considered++;
3752                         } else {
3753                                 /*
3754                                  * This page is not free.
3755                                  * If we can't steal used pages,
3756                                  * we have to give up this run
3757                                  * and keep looking.
3758                                  * Otherwise, we might need to
3759                                  * move the contents of this page
3760                                  * into a substitute page.
3761                                  */
3762 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3763                                 if (m->pmapped || m->dirty) {
3764                                         substitute_needed++;
3765                                 }
3766 #else
3767                                 RESET_STATE_OF_RUN();
3768 #endif
3769                         }
3770
3771                         if ((free_considered + substitute_needed) > free_available) {
3772                                 /*
3773                                  * if we let this run continue
3774                                  * we will end up dropping the vm_page_free_count
3775                                  * below the reserve limit... we need to abort
3776                                  * this run, but we can at least re-consider this
3777                                  * page... thus the jump back to 'retry'
3778                                  */
3779                                 RESET_STATE_OF_RUN();
3780
3781                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3782                                         considered++;
3783                                         goto retry;
3784                                 }
3785                                 /*
3786                                  * free_available == 0
3787                                  * so can't consider any free pages... if
3788                                  * we went to retry in this case, we'd
3789                                  * get stuck looking at the same page
3790                                  * w/o making any forward progress
3791                                  * we also want to take this path if we've already
3792                                  * reached our limit that controls the lock latency
3793                                  */
3794                         }
3795                 }
3796 did_consider:
3797                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3798
3799                         lck_mtx_unlock(&vm_page_queue_free_lock);
3800                         vm_page_unlock_queues();
3801
3802                         mutex_pause(0);
3803
3804                         vm_page_lock_queues();
3805                         lck_mtx_lock(&vm_page_queue_free_lock);
3806
3807                         RESET_STATE_OF_RUN();
3808                         /*
3809                          * reset our free page limit since we
3810                          * dropped the lock protecting the vm_page_free_queue
3811                          */
3812                         free_available = vm_page_free_count - vm_page_free_reserved;
3813                         considered = 0;
3814 #if MACH_ASSERT
3815                         yielded++;
3816 #endif
3817                         goto retry;
3818                 }
3819                 considered++;
3820         }
3821         m = VM_PAGE_NULL;
3822
3823         if (npages != contig_pages) {
3824                 if (!wrapped) {
3825                         /*
3826                          * We didn't find a contiguous range but we didn't
3827                          * start from the very first page.
3828                          * Start again from the very first page.
3829                          */
3830                         RESET_STATE_OF_RUN();
3831                         if( flags & KMA_LOMEM)
3832                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
3833                         else
3834                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3835                         last_idx = 0;
3836                         page_idx = last_idx;
3837                         wrapped = TRUE;
3838                         goto retry;
3839                 }
3840                 lck_mtx_unlock(&vm_page_queue_free_lock);
3841         } else {
3842                 vm_page_t       m1;
3843                 vm_page_t       m2;
3844                 unsigned int    cur_idx;
3845                 unsigned int    tmp_start_idx;
3846                 vm_object_t     locked_object = VM_OBJECT_NULL;
3847                 boolean_t       abort_run = FALSE;
3848
3849                 assert(page_idx - start_idx == contig_pages);
3850
3851                 tmp_start_idx = start_idx;
3852
3853                 /*
3854                  * first pass through to pull the free pages
3855                  * off of the free queue so that in case we
3856                  * need substitute pages, we won't grab any
3857                  * of the free pages in the run... we'll clear
3858                  * the 'free' bit in the 2nd pass, and even in
3859                  * an abort_run case, we'll collect all of the
3860                  * free pages in this run and return them to the free list
3861                  */
3862                 while (start_idx < page_idx) {
3863
3864                         m1 = &vm_pages[start_idx++];
3865
3866 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3867                         assert(m1->free);
3868 #endif
3869
3870                         if (m1->free) {
3871                                 unsigned int color;
3872
3873                                 color = m1->phys_page & vm_color_mask;
3874 #if MACH_ASSERT
3875                                 vm_page_verify_free_list(color, m1, TRUE);
3876 #endif
3877                                 queue_remove(&vm_page_queue_free[color],
3878                                              m1,
3879                                              vm_page_t,
3880                                              pageq);
3881                                 m1->pageq.next = NULL;
3882                                 m1->pageq.prev = NULL;
3883 #if MACH_ASSERT
3884                                 vm_page_verify_free_list(color, VM_PAGE_NULL, FALSE);
3885 #endif
3886                                 /*
3887                                  * Clear the "free" bit so that this page
3888                                  * does not get considered for another
3889                                  * concurrent physically-contiguous allocation.
3890                                  */
3891                                 m1->free = FALSE;
3892                                 assert(m1->busy);
3893
3894                                 vm_page_free_count--;
3895                         }
3896                 }
3897                 /*
3898                  * adjust global freelist counts
3899                  */
3900                 if (vm_page_free_count < vm_page_free_count_minimum)
3901                         vm_page_free_count_minimum = vm_page_free_count;
3902
3903                 if( flags & KMA_LOMEM)
3904                         vm_page_lomem_find_contiguous_last_idx = page_idx;
3905                 else
3906                         vm_page_find_contiguous_last_idx = page_idx;
3907
3908                 /*
3909                  * we can drop the free queue lock at this point since
3910                  * we've pulled any 'free' candidates off of the list
3911                  * we need it dropped so that we can do a vm_page_grab
3912                  * when substituing for pmapped/dirty pages
3913                  */
3914                 lck_mtx_unlock(&vm_page_queue_free_lock);
3915
3916                 start_idx = tmp_start_idx;
3917                 cur_idx = page_idx - 1;
3918
3919                 while (start_idx++ < page_idx) {
3920                         /*
3921                          * must go through the list from back to front
3922                          * so that the page list is created in the
3923                          * correct order - low -> high phys addresses
3924                          */
3925                         m1 = &vm_pages[cur_idx--];
3926
3927                         assert(!m1->free);
3928                         if (m1->object == VM_OBJECT_NULL) {
3929                                 /*
3930                                  * page has already been removed from
3931                                  * the free list in the 1st pass
3932                                  */
3933                                 assert(m1->offset == (vm_object_offset_t) -1);
3934                                 assert(m1->busy);
3935                                 assert(!m1->wanted);
3936                                 assert(!m1->laundry);
3937                         } else {
3938                                 vm_object_t object;
3939
3940                                 if (abort_run == TRUE)
3941                                         continue;
3942
3943                                 object = m1->object;
3944
3945                                 if (object != locked_object) {
3946                                         if (locked_object) {
3947                                                 vm_object_unlock(locked_object);
3948                                                 locked_object = VM_OBJECT_NULL;
3949                                         }
3950                                         if (vm_object_lock_try(object))
3951                                                 locked_object = object;
3952                                 }
3953                                 if (locked_object == VM_OBJECT_NULL ||
3954                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
3955                                      m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3956                                      m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3957                                      m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3958
3959                                         if (locked_object) {
3960                                                 vm_object_unlock(locked_object);
3961                                                 locked_object = VM_OBJECT_NULL;
3962                                         }
3963                                         tmp_start_idx = cur_idx;
3964                                         abort_run = TRUE;
3965                                         continue;
3966                                 }
3967                                 if (m1->pmapped || m1->dirty) {
3968                                         int refmod;
3969                                         vm_object_offset_t offset;
3970
3971                                         m2 = vm_page_grab();
3972
3973                                         if (m2 == VM_PAGE_NULL) {
3974                                                 if (locked_object) {
3975                                                         vm_object_unlock(locked_object);
3976                                                         locked_object = VM_OBJECT_NULL;
3977                                                 }
3978                                                 tmp_start_idx = cur_idx;
3979                                                 abort_run = TRUE;
3980                                                 continue;
3981                                         }
3982                                         if (m1->pmapped)
3983                                                 refmod = pmap_disconnect(m1->phys_page);
3984                                         else
3985                                                 refmod = 0;
3986                                         vm_page_copy(m1, m2);
3987
3988                                         m2->reference = m1->reference;
3989                                         m2->dirty     = m1->dirty;
3990
3991                                         if (refmod & VM_MEM_REFERENCED)
3992                                                 m2->reference = TRUE;
3993                                         if (refmod & VM_MEM_MODIFIED)
3994                                                 m2->dirty = TRUE;
3995                                         offset = m1->offset;
3996
3997                                         /*
3998                                          * completely cleans up the state
3999                                          * of the page so that it is ready
4000                                          * to be put onto the free list, or
4001                                          * for this purpose it looks like it
4002                                          * just came off of the free list
4003                                          */
4004                                         vm_page_free_prepare(m1);
4005
4006                                         /*
4007                                          * make sure we clear the ref/mod state
4008                                          * from the pmap layer... else we risk
4009                                          * inheriting state from the last time
4010                                          * this page was used...
4011                                          */
4012                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4013                                         /*
4014                                          * now put the substitute page on the object
4015                                          */
4016                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
4017
4018                                         if (m2->reference)
4019                                                 vm_page_activate(m2);
4020                                         else
4021                                                 vm_page_deactivate(m2);
4022
4023                                         PAGE_WAKEUP_DONE(m2);
4024
4025                                 } else {
4026                                         /*
4027                                          * completely cleans up the state
4028                                          * of the page so that it is ready
4029                                          * to be put onto the free list, or
4030                                          * for this purpose it looks like it
4031                                          * just came off of the free list
4032                                          */
4033                                         vm_page_free_prepare(m1);
4034                                 }
4035 #if MACH_ASSERT
4036                                 stolen_pages++;
4037 #endif
4038                         }
4039                         m1->pageq.next = (queue_entry_t) m;
4040                         m1->pageq.prev = NULL;
4041                         m = m1;
4042                 }
4043                 if (locked_object) {
4044                         vm_object_unlock(locked_object);
4045                         locked_object = VM_OBJECT_NULL;
4046                 }
4047
4048                 if (abort_run == TRUE) {
4049                         if (m != VM_PAGE_NULL) {
4050                                 vm_page_free_list(m, FALSE);
4051                         }
4052 #if MACH_ASSERT
4053                         dumped_run++;
4054 #endif
4055                         /*
4056                          * want the index of the last
4057                          * page in this run that was
4058                          * successfully 'stolen', so back
4059                          * it up 1 for the auto-decrement on use
4060                          * and 1 more to bump back over this page
4061                          */
4062                         page_idx = tmp_start_idx + 2;
4063                         if (page_idx >= vm_pages_count) {
4064                                 if (wrapped)
4065                                         goto done_scanning;
4066                                 page_idx = last_idx = 0;
4067                                 wrapped = TRUE;
4068                         }
4069                         abort_run = FALSE;
4070
4071                         /*
4072                          * We didn't find a contiguous range but we didn't
4073                          * start from the very first page.
4074                          * Start again from the very first page.
4075                          */
4076                         RESET_STATE_OF_RUN();
4077
4078                         if( flags & KMA_LOMEM)
4079                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4080                         else
4081                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4082
4083                         last_idx = page_idx;
4084
4085                         lck_mtx_lock(&vm_page_queue_free_lock);
4086                         /*
4087                         * reset our free page limit since we
4088                         * dropped the lock protecting the vm_page_free_queue
4089                         */
4090                         free_available = vm_page_free_count - vm_page_free_reserved;
4091                         goto retry;
4092                 }
4093
4094                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4095
4096                         if (wire == TRUE)
4097                                 m1->wire_count++;
4098                         else
4099                                 m1->gobbled = TRUE;
4100                 }
4101                 if (wire == FALSE)
4102                         vm_page_gobble_count += npages;
4103
4104                 /*
4105                  * gobbled pages are also counted as wired pages
4106                  */
4107                 vm_page_wire_count += npages;
4108
4109                 assert(vm_page_verify_contiguous(m, npages));
4110         }
4111 done_scanning:
4112         vm_page_unlock_queues();
4113
4114 #if DEBUG
4115         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4116
4117         tv_end_sec -= tv_start_sec;
4118         if (tv_end_usec < tv_start_usec) {
4119                 tv_end_sec--;
4120                 tv_end_usec += 1000000;
4121         }
4122         tv_end_usec -= tv_start_usec;
4123         if (tv_end_usec >= 1000000) {
4124                 tv_end_sec++;
4125                 tv_end_sec -= 1000000;
4126         }
4127         if (vm_page_find_contig_debug) {
4128                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d... scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
4129                __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4130                (long)tv_end_sec, tv_end_usec, orig_last_idx,
4131                scanned, yielded, dumped_run, stolen_pages);
4132         }
4133
4134 #endif
4135 #if MACH_ASSERT
4136         vm_page_verify_free_lists();
4137 #endif
4138         return m;
4139 }
4140
4141 /*
4142  *      Allocate a list of contiguous, wired pages.
4143  */
4144 kern_return_t
4145 cpm_allocate(
4146         vm_size_t       size,
4147         vm_page_t       *list,
4148         ppnum_t         max_pnum,
4149         ppnum_t         pnum_mask,
4150         boolean_t       wire,
4151         int             flags)
4152 {
4153         vm_page_t               pages;
4154         unsigned int            npages;
4155
4156         if (size % page_size != 0)
4157                 return KERN_INVALID_ARGUMENT;
4158
4159         npages = (unsigned int) (size / PAGE_SIZE);
4160         if (npages != size / PAGE_SIZE) {
4161                 /* 32-bit overflow */
4162                 return KERN_INVALID_ARGUMENT;
4163         }
4164
4165         /*
4166          *      Obtain a pointer to a subset of the free
4167          *      list large enough to satisfy the request;
4168          *      the region will be physically contiguous.
4169          */
4170         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4171
4172         if (pages == VM_PAGE_NULL)
4173                 return KERN_NO_SPACE;
4174         /*
4175          * determine need for wakeups
4176          */
4177         if ((vm_page_free_count < vm_page_free_min) ||
4178             ((vm_page_free_count < vm_page_free_target) &&
4179              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4180                 thread_wakeup((event_t) &vm_page_free_wanted);
4181
4182 #if CONFIG_EMBEDDED
4183         {
4184         int                     percent_avail;
4185
4186         /*
4187          * Decide if we need to poke the memorystatus notification thread.
4188          */
4189         percent_avail =
4190                 (vm_page_active_count + vm_page_inactive_count +
4191                  vm_page_speculative_count + vm_page_free_count +
4192                  (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
4193                 atop_64(max_mem);
4194         if (percent_avail <= (kern_memorystatus_level - 5)) {
4195                 kern_memorystatus_level = percent_avail;
4196                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4197         }
4198         }
4199 #endif
4200         /*
4201          *      The CPM pages should now be available and
4202          *      ordered by ascending physical address.
4203          */
4204         assert(vm_page_verify_contiguous(pages, npages));
4205
4206         *list = pages;
4207         return KERN_SUCCESS;
4208 }
4209
4210 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4211
4212 static vm_page_t hibernate_gobble_queue;
4213
4214 static void
4215 hibernate_page_list_zero(hibernate_page_list_t *list)
4216 {
4217     uint32_t             bank;
4218     hibernate_bitmap_t * bitmap;
4219
4220     bitmap = &list->bank_bitmap[0];
4221     for (bank = 0; bank < list->bank_count; bank++)
4222     {
4223         uint32_t last_bit;
4224
4225         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4226         // set out-of-bound bits at end of bitmap.
4227         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4228         if (last_bit)
4229             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4230
4231         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4232     }
4233 }
4234
4235 void
4236 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4237 {
4238     uint32_t i;
4239     vm_page_t m;
4240     uint64_t start, end, timeout, nsec;
4241     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4242     clock_get_uptime(&start);
4243
4244     for (i = 0; i < gobble_count; i++)
4245     {
4246         while (VM_PAGE_NULL == (m = vm_page_grab()))
4247         {
4248             clock_get_uptime(&end);
4249             if (end >= timeout)
4250                 break;
4251             VM_PAGE_WAIT();
4252         }
4253         if (!m)
4254             break;
4255         m->busy = FALSE;
4256         vm_page_gobble(m);
4257
4258         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4259         hibernate_gobble_queue = m;
4260     }
4261
4262     clock_get_uptime(&end);
4263     absolutetime_to_nanoseconds(end - start, &nsec);
4264     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4265 }
4266
4267 void
4268 hibernate_free_gobble_pages(void)
4269 {
4270     vm_page_t m, next;
4271     uint32_t  count = 0;
4272
4273     m = (vm_page_t) hibernate_gobble_queue;
4274     while(m)
4275     {
4276         next = (vm_page_t) m->pageq.next;
4277         vm_page_free(m);
4278         count++;
4279         m = next;
4280     }
4281     hibernate_gobble_queue = VM_PAGE_NULL;
4282
4283     if (count)
4284         HIBLOG("Freed %d pages\n", count);
4285 }
4286
4287 static boolean_t
4288 hibernate_consider_discard(vm_page_t m)
4289 {
4290     vm_object_t object = NULL;
4291     int                  refmod_state;
4292     boolean_t            discard = FALSE;
4293
4294     do
4295     {
4296         if(m->private)
4297             panic("hibernate_consider_discard: private");
4298
4299         if (!vm_object_lock_try(m->object))
4300             break;
4301
4302         object = m->object;
4303
4304         if (VM_PAGE_WIRED(m))
4305             break;
4306         if (m->precious)
4307             break;
4308
4309         if (m->busy || !object->alive)
4310            /*
4311             *   Somebody is playing with this page.
4312             */
4313             break;
4314
4315         if (m->absent || m->unusual || m->error)
4316            /*
4317             * If it's unusual in anyway, ignore it
4318             */
4319             break;
4320
4321         if (m->cleaning)
4322             break;
4323
4324         if (m->laundry || m->list_req_pending)
4325             break;
4326
4327         if (!m->dirty)
4328         {
4329             refmod_state = pmap_get_refmod(m->phys_page);
4330
4331             if (refmod_state & VM_MEM_REFERENCED)
4332                 m->reference = TRUE;
4333             if (refmod_state & VM_MEM_MODIFIED)
4334                 m->dirty = TRUE;
4335         }
4336
4337         /*
4338          * If it's clean or purgeable we can discard the page on wakeup.
4339          */
4340         discard = (!m->dirty)
4341                     || (VM_PURGABLE_VOLATILE == object->purgable)
4342                     || (VM_PURGABLE_EMPTY    == m->object->purgable);
4343     }
4344     while (FALSE);
4345
4346     if (object)
4347         vm_object_unlock(object);
4348
4349     return (discard);
4350 }
4351
4352
4353 static void
4354 hibernate_discard_page(vm_page_t m)
4355 {
4356     if (m->absent || m->unusual || m->error)
4357        /*
4358         * If it's unusual in anyway, ignore
4359         */
4360         return;
4361
4362     if (m->pmapped == TRUE)
4363     {
4364         __unused int refmod_state = pmap_disconnect(m->phys_page);
4365     }
4366
4367     if (m->laundry)
4368         panic("hibernate_discard_page(%p) laundry", m);
4369     if (m->private)
4370         panic("hibernate_discard_page(%p) private", m);
4371     if (m->fictitious)
4372         panic("hibernate_discard_page(%p) fictitious", m);
4373
4374     if (VM_PURGABLE_VOLATILE == m->object->purgable)
4375     {
4376         /* object should be on a queue */
4377         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4378         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4379         assert(old_queue);
4380         /* No need to lock page queue for token delete, hibernate_vm_unlock()
4381            makes sure these locks are uncontended before sleep */
4382         vm_purgeable_token_delete_first(old_queue);
4383         m->object->purgable = VM_PURGABLE_EMPTY;
4384     }
4385
4386     vm_page_free(m);
4387 }
4388
4389 /*
4390  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4391  pages known to VM to not need saving are subtracted.
4392  Wired pages to be saved are present in page_list_wired, pageable in page_list.
4393 */
4394
4395 void
4396 hibernate_page_list_setall(hibernate_page_list_t * page_list,
4397                            hibernate_page_list_t * page_list_wired,
4398                            uint32_t * pagesOut)
4399 {
4400     uint64_t start, end, nsec;
4401     vm_page_t m;
4402     uint32_t pages = page_list->page_count;
4403     uint32_t count_zf = 0, count_throttled = 0;
4404     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
4405     uint32_t count_wire = pages;
4406     uint32_t count_discard_active    = 0;
4407     uint32_t count_discard_inactive  = 0;
4408     uint32_t count_discard_purgeable = 0;
4409     uint32_t count_discard_speculative = 0;
4410     uint32_t i;
4411     uint32_t             bank;
4412     hibernate_bitmap_t * bitmap;
4413     hibernate_bitmap_t * bitmap_wired;
4414
4415
4416     HIBLOG("hibernate_page_list_setall start\n");
4417
4418     clock_get_uptime(&start);
4419
4420     hibernate_page_list_zero(page_list);
4421     hibernate_page_list_zero(page_list_wired);
4422
4423     if (vm_page_local_q) {
4424             for (i = 0; i < vm_page_local_q_count; i++)
4425                     vm_page_reactivate_local(i, TRUE, TRUE);
4426     }
4427
4428     m = (vm_page_t) hibernate_gobble_queue;
4429     while(m)
4430     {
4431         pages--;
4432         count_wire--;
4433         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
4434         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4435         m = (vm_page_t) m->pageq.next;
4436     }
4437
4438     for( i = 0; i < vm_colors; i++ )
4439     {
4440         queue_iterate(&vm_page_queue_free[i],
4441                       m,
4442                       vm_page_t,
4443                       pageq)
4444         {
4445             pages--;
4446             count_wire--;
4447             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
4448             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4449         }
4450     }
4451
4452     queue_iterate(&vm_lopage_queue_free,
4453                   m,
4454                   vm_page_t,
4455                   pageq)
4456     {
4457         pages--;
4458         count_wire--;
4459         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
4460         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4461     }
4462
4463     queue_iterate( &vm_page_queue_throttled,
4464                     m,
4465                     vm_page_t,
4466                     pageq )
4467     {
4468         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4469          && hibernate_consider_discard(m))
4470         {
4471             hibernate_page_bitset(page_list, TRUE, m->phys_page);
4472             count_discard_inactive++;
4473         }
4474         else
4475             count_throttled++;
4476         count_wire--;
4477         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4478     }
4479
4480     queue_iterate( &vm_page_queue_zf,
4481                     m,
4482                     vm_page_t,
4483                    pageq )
4484     {
4485         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4486          && hibernate_consider_discard(m))
4487         {
4488             hibernate_page_bitset(page_list, TRUE, m->phys_page);
4489             if (m->dirty)
4490                 count_discard_purgeable++;
4491             else
4492                 count_discard_inactive++;
4493         }
4494         else
4495             count_zf++;
4496         count_wire--;
4497         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4498     }
4499
4500     queue_iterate( &vm_page_queue_inactive,
4501                     m,
4502                     vm_page_t,
4503                     pageq )
4504     {
4505         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4506          && hibernate_consider_discard(m))
4507         {
4508             hibernate_page_bitset(page_list, TRUE, m->phys_page);
4509             if (m->dirty)
4510                 count_discard_purgeable++;
4511             else
4512                 count_discard_inactive++;
4513         }
4514         else
4515             count_inactive++;
4516         count_wire--;
4517         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4518     }
4519
4520     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
4521     {
4522        queue_iterate(&vm_page_queue_speculative[i].age_q,
4523                      m,
4524                      vm_page_t,
4525                      pageq)
4526        {
4527            if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4528             && hibernate_consider_discard(m))
4529            {
4530                hibernate_page_bitset(page_list, TRUE, m->phys_page);
4531                count_discard_speculative++;
4532            }
4533            else
4534                count_speculative++;
4535            count_wire--;
4536            hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4537        }
4538     }
4539
4540     queue_iterate( &vm_page_queue_active,
4541                     m,
4542                     vm_page_t,
4543                     pageq )
4544     {
4545         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
4546          && hibernate_consider_discard(m))
4547         {
4548             hibernate_page_bitset(page_list, TRUE, m->phys_page);
4549             if (m->dirty)
4550                 count_discard_purgeable++;
4551             else
4552                 count_discard_active++;
4553         }
4554         else
4555             count_active++;
4556         count_wire--;
4557         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4558     }
4559
4560     // pull wired from hibernate_bitmap
4561
4562     bitmap = &page_list->bank_bitmap[0];
4563     bitmap_wired = &page_list_wired->bank_bitmap[0];
4564     for (bank = 0; bank < page_list->bank_count; bank++)
4565     {
4566         for (i = 0; i < bitmap->bitmapwords; i++)
4567             bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
4568         bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
4569         bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
4570     }
4571
4572     // machine dependent adjustments
4573     hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
4574
4575     clock_get_uptime(&end);
4576     absolutetime_to_nanoseconds(end - start, &nsec);
4577     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
4578
4579     HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
4580                 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
4581                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
4582
4583     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
4584 }
4585
4586 void
4587 hibernate_page_list_discard(hibernate_page_list_t * page_list)
4588 {
4589     uint64_t  start, end, nsec;
4590     vm_page_t m;
4591     vm_page_t next;
4592     uint32_t  i;
4593     uint32_t  count_discard_active    = 0;
4594     uint32_t  count_discard_inactive  = 0;
4595     uint32_t  count_discard_purgeable = 0;
4596     uint32_t  count_discard_speculative = 0;
4597
4598     clock_get_uptime(&start);
4599
4600     m = (vm_page_t) queue_first(&vm_page_queue_zf);
4601     while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
4602     {
4603         next = (vm_page_t) m->pageq.next;
4604         if (hibernate_page_bittst(page_list, m->phys_page))
4605         {
4606             if (m->dirty)
4607                 count_discard_purgeable++;
4608             else
4609                 count_discard_inactive++;
4610             hibernate_discard_page(m);
4611         }
4612         m = next;
4613     }
4614
4615     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
4616     {
4617        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
4618        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
4619        {
4620            next = (vm_page_t) m->pageq.next;
4621            if (hibernate_page_bittst(page_list, m->phys_page))
4622            {
4623                count_discard_speculative++;
4624                hibernate_discard_page(m);
4625            }
4626            m = next;
4627        }
4628     }
4629
4630     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
4631     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
4632     {
4633         next = (vm_page_t) m->pageq.next;
4634         if (hibernate_page_bittst(page_list, m->phys_page))
4635         {
4636             if (m->dirty)
4637                 count_discard_purgeable++;
4638             else
4639                 count_discard_inactive++;
4640             hibernate_discard_page(m);
4641         }
4642         m = next;
4643     }
4644
4645     m = (vm_page_t) queue_first(&vm_page_queue_active);
4646     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
4647     {
4648         next = (vm_page_t) m->pageq.next;
4649         if (hibernate_page_bittst(page_list, m->phys_page))
4650         {
4651             if (m->dirty)
4652                 count_discard_purgeable++;
4653             else
4654                 count_discard_active++;
4655             hibernate_discard_page(m);
4656         }
4657         m = next;
4658     }
4659
4660     clock_get_uptime(&end);
4661     absolutetime_to_nanoseconds(end - start, &nsec);
4662     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
4663                 nsec / 1000000ULL,
4664                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
4665 }
4666
4667 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4668
4669 #include <mach_vm_debug.h>
4670 #if     MACH_VM_DEBUG
4671
4672 #include <mach_debug/hash_info.h>
4673 #include <vm/vm_debug.h>
4674
4675 /*
4676  *      Routine:        vm_page_info
4677  *      Purpose:
4678  *              Return information about the global VP table.
4679  *              Fills the buffer with as much information as possible
4680  *              and returns the desired size of the buffer.
4681  *      Conditions:
4682  *              Nothing locked.  The caller should provide
4683  *              possibly-pageable memory.
4684  */
4685
4686 unsigned int
4687 vm_page_info(
4688         hash_info_bucket_t *info,
4689         unsigned int count)
4690 {
4691         unsigned int i;
4692         lck_spin_t      *bucket_lock;
4693
4694         if (vm_page_bucket_count < count)
4695                 count = vm_page_bucket_count;
4696
4697         for (i = 0; i < count; i++) {
4698                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
4699                 unsigned int bucket_count = 0;
4700                 vm_page_t m;
4701
4702                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
4703                 lck_spin_lock(bucket_lock);
4704
4705                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
4706                         bucket_count++;
4707
4708                 lck_spin_unlock(bucket_lock);
4709
4710                 /* don't touch pageable memory while holding locks */
4711                 info[i].hib_count = bucket_count;
4712         }
4713
4714         return vm_page_bucket_count;
4715 }
4716 #endif  /* MACH_VM_DEBUG */
4717
4718 #include <mach_kdb.h>
4719 #if     MACH_KDB
4720
4721 #include <ddb/db_output.h>
4722 #include <vm/vm_print.h>
4723 #define printf  kdbprintf
4724
4725 /*
4726  *      Routine:        vm_page_print [exported]
4727  */
4728 void
4729 vm_page_print(
4730         db_addr_t       db_addr)
4731 {
4732         vm_page_t       p;
4733
4734         p = (vm_page_t) (long) db_addr;
4735
4736         iprintf("page 0x%x\n", p);
4737
4738         db_indent += 2;
4739
4740         iprintf("object=0x%x", p->object);
4741         printf(", offset=0x%x", p->offset);
4742         printf(", wire_count=%d", p->wire_count);
4743
4744         iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
4745                 (p->local ? "" : "!"),
4746                 (p->inactive ? "" : "!"),
4747                 (p->active ? "" : "!"),
4748                 (p->throttled ? "" : "!"),
4749                 (p->gobbled ? "" : "!"),
4750                 (p->laundry ? "" : "!"),
4751                 (p->free ? "" : "!"),
4752                 (p->reference ? "" : "!"),
4753                 (p->encrypted ? "" : "!"));
4754         iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
4755                 (p->busy ? "" : "!"),
4756                 (p->wanted ? "" : "!"),
4757                 (p->tabled ? "" : "!"),
4758                 (p->fictitious ? "" : "!"),
4759                 (p->private ? "" : "!"),
4760                 (p->precious ? "" : "!"));
4761         iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
4762                 (p->absent ? "" : "!"),
4763                 (p->error ? "" : "!"),
4764                 (p->dirty ? "" : "!"),
4765                 (p->cleaning ? "" : "!"),
4766                 (p->pageout ? "" : "!"),
4767                 (p->clustered ? "" : "!"));
4768         iprintf("%soverwriting, %srestart, %sunusual\n",
4769                 (p->overwriting ? "" : "!"),
4770                 (p->restart ? "" : "!"),
4771                 (p->unusual ? "" : "!"));
4772
4773         iprintf("phys_page=0x%x", p->phys_page);
4774
4775         db_indent -= 2;
4776 }
4777 #endif  /* MACH_KDB */