osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <vm/pmap.h>
  80 #include <vm/vm_init.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_page.h>
  83 #include <vm/vm_pageout.h>
  84 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  85 #include <kern/misc_protos.h>
  86 #include <zone_debug.h>
  87 #include <vm/cpm.h>
  88 #include <pexpert/pexpert.h>
  89
  90 #include <vm/vm_protos.h>
  91 #include <vm/memory_object.h>
  92 #include <vm/vm_purgeable_internal.h>
  93
  94 #include <IOKit/IOHibernatePrivate.h>
  95
  96
  97 #include <sys/kern_memorystatus.h>
  98
  99 #include <sys/kdebug.h>
 100
 101 boolean_t       vm_page_free_verify = TRUE;
 102
 103 uint32_t        vm_lopage_free_count = 0;
 104 uint32_t        vm_lopage_free_limit = 0;
 105 uint32_t        vm_lopage_lowater    = 0;
 106 boolean_t       vm_lopage_refill = FALSE;
 107 boolean_t       vm_lopage_needed = FALSE;
 108
 109 lck_mtx_ext_t   vm_page_queue_lock_ext;
 110 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 111 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 112
 113 int             speculative_age_index = 0;
 114 int             speculative_steal_index = 0;
 115 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 116
 117
 118 __private_extern__ void         vm_page_init_lck_grp(void);
 119
 120 static void             vm_page_free_prepare(vm_page_t  page);
 121 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 122
 123
 124
 125
 126 /*
 127  *      Associated with page of user-allocatable memory is a
 128  *      page structure.
 129  */
 130
 131 /*
 132  *      These variables record the values returned by vm_page_bootstrap,
 133  *      for debugging purposes.  The implementation of pmap_steal_memory
 134  *      and pmap_startup here also uses them internally.
 135  */
 136
 137 vm_offset_t virtual_space_start;
 138 vm_offset_t virtual_space_end;
 139 uint32_t        vm_page_pages;
 140
 141 /*
 142  *      The vm_page_lookup() routine, which provides for fast
 143  *      (virtual memory object, offset) to page lookup, employs
 144  *      the following hash table.  The vm_page_{insert,remove}
 145  *      routines install and remove associations in the table.
 146  *      [This table is often called the virtual-to-physical,
 147  *      or VP, table.]
 148  */
 149 typedef struct {
 150         vm_page_t       pages;
 151 #if     MACH_PAGE_HASH_STATS
 152         int             cur_count;              /* current count */
 153         int             hi_count;               /* high water mark */
 154 #endif /* MACH_PAGE_HASH_STATS */
 155 } vm_page_bucket_t;
 156
 157
 158 #define BUCKETS_PER_LOCK        16
 159
 160 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 161 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 162 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 163 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 164 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 165 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 166
 167 lck_spin_t      *vm_page_bucket_locks;
 168
 169
 170 #if     MACH_PAGE_HASH_STATS
 171 /* This routine is only for debug.  It is intended to be called by
 172  * hand by a developer using a kernel debugger.  This routine prints
 173  * out vm_page_hash table statistics to the kernel debug console.
 174  */
 175 void
 176 hash_debug(void)
 177 {
 178         int     i;
 179         int     numbuckets = 0;
 180         int     highsum = 0;
 181         int     maxdepth = 0;
 182
 183         for (i = 0; i < vm_page_bucket_count; i++) {
 184                 if (vm_page_buckets[i].hi_count) {
 185                         numbuckets++;
 186                         highsum += vm_page_buckets[i].hi_count;
 187                         if (vm_page_buckets[i].hi_count > maxdepth)
 188                                 maxdepth = vm_page_buckets[i].hi_count;
 189                 }
 190         }
 191         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 192         printf("Number used buckets:     %d = %d%%\n",
 193                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 194         printf("Number unused buckets:   %d = %d%%\n",
 195                 vm_page_bucket_count - numbuckets,
 196                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 197         printf("Sum of bucket max depth: %d\n", highsum);
 198         printf("Average bucket depth:    %d.%2d\n",
 199                 highsum/vm_page_bucket_count,
 200                 highsum%vm_page_bucket_count);
 201         printf("Maximum bucket depth:    %d\n", maxdepth);
 202 }
 203 #endif /* MACH_PAGE_HASH_STATS */
 204
 205 /*
 206  *      The virtual page size is currently implemented as a runtime
 207  *      variable, but is constant once initialized using vm_set_page_size.
 208  *      This initialization must be done in the machine-dependent
 209  *      bootstrap sequence, before calling other machine-independent
 210  *      initializations.
 211  *
 212  *      All references to the virtual page size outside this
 213  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 214  *      constants.
 215  */
 216 vm_size_t       page_size  = PAGE_SIZE;
 217 vm_size_t       page_mask  = PAGE_MASK;
 218 int             page_shift = PAGE_SHIFT;
 219
 220 /*
 221  *      Resident page structures are initialized from
 222  *      a template (see vm_page_alloc).
 223  *
 224  *      When adding a new field to the virtual memory
 225  *      object structure, be sure to add initialization
 226  *      (see vm_page_bootstrap).
 227  */
 228 struct vm_page  vm_page_template;
 229
 230 vm_page_t       vm_pages = VM_PAGE_NULL;
 231 unsigned int    vm_pages_count = 0;
 232 ppnum_t         vm_page_lowest = 0;
 233
 234 /*
 235  *      Resident pages that represent real memory
 236  *      are allocated from a set of free lists,
 237  *      one per color.
 238  */
 239 unsigned int    vm_colors;
 240 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 241 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 242 queue_head_t    vm_page_queue_free[MAX_COLORS];
 243 unsigned int    vm_page_free_wanted;
 244 unsigned int    vm_page_free_wanted_privileged;
 245 unsigned int    vm_page_free_count;
 246 unsigned int    vm_page_fictitious_count;
 247
 248 unsigned int    vm_page_free_count_minimum;     /* debugging */
 249
 250 /*
 251  *      Occasionally, the virtual memory system uses
 252  *      resident page structures that do not refer to
 253  *      real pages, for example to leave a page with
 254  *      important state information in the VP table.
 255  *
 256  *      These page structures are allocated the way
 257  *      most other kernel structures are.
 258  */
 259 zone_t  vm_page_zone;
 260 vm_locks_array_t vm_page_locks;
 261 decl_lck_mtx_data(,vm_page_alloc_lock)
 262 unsigned int io_throttle_zero_fill;
 263
 264 unsigned int    vm_page_local_q_count = 0;
 265 unsigned int    vm_page_local_q_soft_limit = 250;
 266 unsigned int    vm_page_local_q_hard_limit = 500;
 267 struct vplq     *vm_page_local_q = NULL;
 268
 269 /*
 270  *      Fictitious pages don't have a physical address,
 271  *      but we must initialize phys_page to something.
 272  *      For debugging, this should be a strange value
 273  *      that the pmap module can recognize in assertions.
 274  */
 275 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 276
 277 /*
 278  *      Guard pages are not accessible so they don't
 279  *      need a physical address, but we need to enter
 280  *      one in the pmap.
 281  *      Let's make it recognizable and make sure that
 282  *      we don't use a real physical page with that
 283  *      physical address.
 284  */
 285 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 286
 287 /*
 288  *      Resident page structures are also chained on
 289  *      queues that are used by the page replacement
 290  *      system (pageout daemon).  These queues are
 291  *      defined here, but are shared by the pageout
 292  *      module.  The inactive queue is broken into
 293  *      inactive and zf for convenience as the
 294  *      pageout daemon often assignes a higher
 295  *      affinity to zf pages
 296  */
 297 queue_head_t    vm_page_queue_active;
 298 queue_head_t    vm_page_queue_inactive;
 299 queue_head_t    vm_page_queue_zf;       /* inactive memory queue for zero fill */
 300 queue_head_t    vm_page_queue_throttled;
 301
 302 unsigned int    vm_page_active_count;
 303 unsigned int    vm_page_inactive_count;
 304 unsigned int    vm_page_throttled_count;
 305 unsigned int    vm_page_speculative_count;
 306 unsigned int    vm_page_wire_count;
 307 unsigned int    vm_page_wire_count_initial;
 308 unsigned int    vm_page_gobble_count = 0;
 309 unsigned int    vm_page_wire_count_warning = 0;
 310 unsigned int    vm_page_gobble_count_warning = 0;
 311
 312 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 313 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 314 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 315
 316 #if DEVELOPMENT || DEBUG
 317 unsigned int    vm_page_speculative_recreated = 0;
 318 unsigned int    vm_page_speculative_created = 0;
 319 unsigned int    vm_page_speculative_used = 0;
 320 #endif
 321
 322 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 323 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 324
 325
 326 /*
 327  *      Several page replacement parameters are also
 328  *      shared with this module, so that page allocation
 329  *      (done here in vm_page_alloc) can trigger the
 330  *      pageout daemon.
 331  */
 332 unsigned int    vm_page_free_target = 0;
 333 unsigned int    vm_page_free_min = 0;
 334 unsigned int    vm_page_throttle_limit = 0;
 335 uint32_t        vm_page_creation_throttle = 0;
 336 unsigned int    vm_page_inactive_target = 0;
 337 unsigned int    vm_page_inactive_min = 0;
 338 unsigned int    vm_page_free_reserved = 0;
 339 unsigned int    vm_page_throttle_count = 0;
 340
 341 /*
 342  *      The VM system has a couple of heuristics for deciding
 343  *      that pages are "uninteresting" and should be placed
 344  *      on the inactive queue as likely candidates for replacement.
 345  *      These variables let the heuristics be controlled at run-time
 346  *      to make experimentation easier.
 347  */
 348
 349 boolean_t vm_page_deactivate_hint = TRUE;
 350
 351 struct vm_page_stats_reusable vm_page_stats_reusable;
 352
 353 /*
 354  *      vm_set_page_size:
 355  *
 356  *      Sets the page size, perhaps based upon the memory
 357  *      size.  Must be called before any use of page-size
 358  *      dependent functions.
 359  *
 360  *      Sets page_shift and page_mask from page_size.
 361  */
 362 void
 363 vm_set_page_size(void)
 364 {
 365         page_mask = page_size - 1;
 366
 367         if ((page_mask & page_size) != 0)
 368                 panic("vm_set_page_size: page size not a power of two");
 369
 370         for (page_shift = 0; ; page_shift++)
 371                 if ((1U << page_shift) == page_size)
 372                         break;
 373 }
 374
 375
 376 /* Called once during statup, once the cache geometry is known.
 377  */
 378 static void
 379 vm_page_set_colors( void )
 380 {
 381         unsigned int    n, override;
 382
 383         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 384                 n = override;
 385         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 386                 n = vm_cache_geometry_colors;
 387         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 388
 389         if ( n == 0 )
 390                 n = 1;
 391         if ( n > MAX_COLORS )
 392                 n = MAX_COLORS;
 393
 394         /* the count must be a power of 2  */
 395         if ( ( n & (n - 1)) != 0  )
 396                 panic("vm_page_set_colors");
 397
 398         vm_colors = n;
 399         vm_color_mask = n - 1;
 400 }
 401
 402
 403 lck_grp_t               vm_page_lck_grp_free;
 404 lck_grp_t               vm_page_lck_grp_queue;
 405 lck_grp_t               vm_page_lck_grp_local;
 406 lck_grp_t               vm_page_lck_grp_purge;
 407 lck_grp_t               vm_page_lck_grp_alloc;
 408 lck_grp_t               vm_page_lck_grp_bucket;
 409 lck_grp_attr_t          vm_page_lck_grp_attr;
 410 lck_attr_t              vm_page_lck_attr;
 411
 412
 413 __private_extern__ void
 414 vm_page_init_lck_grp(void)
 415 {
 416         /*
 417          * initialze the vm_page lock world
 418          */
 419         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 420         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 421         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 422         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 423         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 424         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 425         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 426         lck_attr_setdefault(&vm_page_lck_attr);
 427 }
 428
 429 void
 430 vm_page_init_local_q()
 431 {
 432         unsigned int            num_cpus;
 433         unsigned int            i;
 434         struct vplq             *t_local_q;
 435
 436         num_cpus = ml_get_max_cpus();
 437
 438         /*
 439          * no point in this for a uni-processor system
 440          */
 441         if (num_cpus >= 2) {
 442                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 443
 444                 for (i = 0; i < num_cpus; i++) {
 445                         struct vpl      *lq;
 446
 447                         lq = &t_local_q[i].vpl_un.vpl;
 448                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 449                         queue_init(&lq->vpl_queue);
 450                         lq->vpl_count = 0;
 451                 }
 452                 vm_page_local_q_count = num_cpus;
 453
 454                 vm_page_local_q = (struct vplq *)t_local_q;
 455         }
 456 }
 457
 458
 459 /*
 460  *      vm_page_bootstrap:
 461  *
 462  *      Initializes the resident memory module.
 463  *
 464  *      Allocates memory for the page cells, and
 465  *      for the object/offset-to-page hash table headers.
 466  *      Each page cell is initialized and placed on the free list.
 467  *      Returns the range of available kernel virtual memory.
 468  */
 469
 470 void
 471 vm_page_bootstrap(
 472         vm_offset_t             *startp,
 473         vm_offset_t             *endp)
 474 {
 475         register vm_page_t      m;
 476         unsigned int            i;
 477         unsigned int            log1;
 478         unsigned int            log2;
 479         unsigned int            size;
 480
 481         /*
 482          *      Initialize the vm_page template.
 483          */
 484
 485         m = &vm_page_template;
 486         bzero(m, sizeof (*m));
 487
 488         m->pageq.next = NULL;
 489         m->pageq.prev = NULL;
 490         m->listq.next = NULL;
 491         m->listq.prev = NULL;
 492         m->next = VM_PAGE_NULL;
 493
 494         m->object = VM_OBJECT_NULL;             /* reset later */
 495         m->offset = (vm_object_offset_t) -1;    /* reset later */
 496
 497         m->wire_count = 0;
 498         m->local = FALSE;
 499         m->inactive = FALSE;
 500         m->active = FALSE;
 501         m->pageout_queue = FALSE;
 502         m->speculative = FALSE;
 503         m->laundry = FALSE;
 504         m->free = FALSE;
 505         m->reference = FALSE;
 506         m->gobbled = FALSE;
 507         m->private = FALSE;
 508         m->throttled = FALSE;
 509         m->__unused_pageq_bits = 0;
 510
 511         m->phys_page = 0;               /* reset later */
 512
 513         m->busy = TRUE;
 514         m->wanted = FALSE;
 515         m->tabled = FALSE;
 516         m->fictitious = FALSE;
 517         m->pmapped = FALSE;
 518         m->wpmapped = FALSE;
 519         m->pageout = FALSE;
 520         m->absent = FALSE;
 521         m->error = FALSE;
 522         m->dirty = FALSE;
 523         m->cleaning = FALSE;
 524         m->precious = FALSE;
 525         m->clustered = FALSE;
 526         m->overwriting = FALSE;
 527         m->restart = FALSE;
 528         m->unusual = FALSE;
 529         m->encrypted = FALSE;
 530         m->encrypted_cleaning = FALSE;
 531         m->list_req_pending = FALSE;
 532         m->dump_cleaning = FALSE;
 533         m->cs_validated = FALSE;
 534         m->cs_tainted = FALSE;
 535         m->no_cache = FALSE;
 536         m->zero_fill = FALSE;
 537         m->reusable = FALSE;
 538         m->slid = FALSE;
 539         m->__unused_object_bits = 0;
 540
 541
 542         /*
 543          *      Initialize the page queues.
 544          */
 545         vm_page_init_lck_grp();
 546
 547         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 548         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 549         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 550
 551         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 552                 int group;
 553
 554                 purgeable_queues[i].token_q_head = 0;
 555                 purgeable_queues[i].token_q_tail = 0;
 556                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 557                         queue_init(&purgeable_queues[i].objq[group]);
 558
 559                 purgeable_queues[i].type = i;
 560                 purgeable_queues[i].new_pages = 0;
 561 #if MACH_ASSERT
 562                 purgeable_queues[i].debug_count_tokens = 0;
 563                 purgeable_queues[i].debug_count_objects = 0;
 564 #endif
 565         };
 566
 567         for (i = 0; i < MAX_COLORS; i++ )
 568                 queue_init(&vm_page_queue_free[i]);
 569
 570         queue_init(&vm_lopage_queue_free);
 571         queue_init(&vm_page_queue_active);
 572         queue_init(&vm_page_queue_inactive);
 573         queue_init(&vm_page_queue_throttled);
 574         queue_init(&vm_page_queue_zf);
 575
 576         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 577                 queue_init(&vm_page_queue_speculative[i].age_q);
 578
 579                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 580                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 581         }
 582         vm_page_free_wanted = 0;
 583         vm_page_free_wanted_privileged = 0;
 584
 585         vm_page_set_colors();
 586
 587
 588         /*
 589          *      Steal memory for the map and zone subsystems.
 590          */
 591
 592         vm_map_steal_memory();
 593         zone_steal_memory();
 594
 595         /*
 596          *      Allocate (and initialize) the virtual-to-physical
 597          *      table hash buckets.
 598          *
 599          *      The number of buckets should be a power of two to
 600          *      get a good hash function.  The following computation
 601          *      chooses the first power of two that is greater
 602          *      than the number of physical pages in the system.
 603          */
 604
 605         if (vm_page_bucket_count == 0) {
 606                 unsigned int npages = pmap_free_pages();
 607
 608                 vm_page_bucket_count = 1;
 609                 while (vm_page_bucket_count < npages)
 610                         vm_page_bucket_count <<= 1;
 611         }
 612         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 613
 614         vm_page_hash_mask = vm_page_bucket_count - 1;
 615
 616         /*
 617          *      Calculate object shift value for hashing algorithm:
 618          *              O = log2(sizeof(struct vm_object))
 619          *              B = log2(vm_page_bucket_count)
 620          *              hash shifts the object left by
 621          *              B/2 - O
 622          */
 623         size = vm_page_bucket_count;
 624         for (log1 = 0; size > 1; log1++)
 625                 size /= 2;
 626         size = sizeof(struct vm_object);
 627         for (log2 = 0; size > 1; log2++)
 628                 size /= 2;
 629         vm_page_hash_shift = log1/2 - log2 + 1;
 630
 631         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 632         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 633         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 634
 635         if (vm_page_hash_mask & vm_page_bucket_count)
 636                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 637
 638         vm_page_buckets = (vm_page_bucket_t *)
 639                 pmap_steal_memory(vm_page_bucket_count *
 640                                   sizeof(vm_page_bucket_t));
 641
 642         vm_page_bucket_locks = (lck_spin_t *)
 643                 pmap_steal_memory(vm_page_bucket_lock_count *
 644                                   sizeof(lck_spin_t));
 645
 646         for (i = 0; i < vm_page_bucket_count; i++) {
 647                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 648
 649                 bucket->pages = VM_PAGE_NULL;
 650 #if     MACH_PAGE_HASH_STATS
 651                 bucket->cur_count = 0;
 652                 bucket->hi_count = 0;
 653 #endif /* MACH_PAGE_HASH_STATS */
 654         }
 655
 656         for (i = 0; i < vm_page_bucket_lock_count; i++)
 657                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 658
 659         /*
 660          *      Machine-dependent code allocates the resident page table.
 661          *      It uses vm_page_init to initialize the page frames.
 662          *      The code also returns to us the virtual space available
 663          *      to the kernel.  We don't trust the pmap module
 664          *      to get the alignment right.
 665          */
 666
 667         pmap_startup(&virtual_space_start, &virtual_space_end);
 668         virtual_space_start = round_page(virtual_space_start);
 669         virtual_space_end = trunc_page(virtual_space_end);
 670
 671         *startp = virtual_space_start;
 672         *endp = virtual_space_end;
 673
 674         /*
 675          *      Compute the initial "wire" count.
 676          *      Up until now, the pages which have been set aside are not under
 677          *      the VM system's control, so although they aren't explicitly
 678          *      wired, they nonetheless can't be moved. At this moment,
 679          *      all VM managed pages are "free", courtesy of pmap_startup.
 680          */
 681         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 682         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 683         vm_page_wire_count_initial = vm_page_wire_count;
 684         vm_page_free_count_minimum = vm_page_free_count;
 685
 686         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 687                vm_page_free_count, vm_page_wire_count);
 688
 689         simple_lock_init(&vm_paging_lock, 0);
 690 }
 691
 692 #ifndef MACHINE_PAGES
 693 /*
 694  *      We implement pmap_steal_memory and pmap_startup with the help
 695  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 696  */
 697
 698 void *
 699 pmap_steal_memory(
 700         vm_size_t size)
 701 {
 702         vm_offset_t addr, vaddr;
 703         ppnum_t phys_page;
 704
 705         /*
 706          *      We round the size to a round multiple.
 707          */
 708
 709         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 710
 711         /*
 712          *      If this is the first call to pmap_steal_memory,
 713          *      we have to initialize ourself.
 714          */
 715
 716         if (virtual_space_start == virtual_space_end) {
 717                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 718
 719                 /*
 720                  *      The initial values must be aligned properly, and
 721                  *      we don't trust the pmap module to do it right.
 722                  */
 723
 724                 virtual_space_start = round_page(virtual_space_start);
 725                 virtual_space_end = trunc_page(virtual_space_end);
 726         }
 727
 728         /*
 729          *      Allocate virtual memory for this request.
 730          */
 731
 732         addr = virtual_space_start;
 733         virtual_space_start += size;
 734
 735         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
 736
 737         /*
 738          *      Allocate and map physical pages to back new virtual pages.
 739          */
 740
 741         for (vaddr = round_page(addr);
 742              vaddr < addr + size;
 743              vaddr += PAGE_SIZE) {
 744
 745                 if (!pmap_next_page_hi(&phys_page))
 746                         panic("pmap_steal_memory");
 747
 748                 /*
 749                  *      XXX Logically, these mappings should be wired,
 750                  *      but some pmap modules barf if they are.
 751                  */
 752 #if defined(__LP64__)
 753                 pmap_pre_expand(kernel_pmap, vaddr);
 754 #endif
 755
 756                 pmap_enter(kernel_pmap, vaddr, phys_page,
 757                            VM_PROT_READ|VM_PROT_WRITE,
 758                                 VM_WIMG_USE_DEFAULT, FALSE);
 759                 /*
 760                  * Account for newly stolen memory
 761                  */
 762                 vm_page_wire_count++;
 763
 764         }
 765
 766         return (void *) addr;
 767 }
 768
 769 void
 770 pmap_startup(
 771         vm_offset_t *startp,
 772         vm_offset_t *endp)
 773 {
 774         unsigned int i, npages, pages_initialized, fill, fillval;
 775         ppnum_t         phys_page;
 776         addr64_t        tmpaddr;
 777
 778         /*
 779          *      We calculate how many page frames we will have
 780          *      and then allocate the page structures in one chunk.
 781          */
 782
 783         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 784         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 785         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 786
 787         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 788
 789         /*
 790          *      Initialize the page frames.
 791          */
 792         for (i = 0, pages_initialized = 0; i < npages; i++) {
 793                 if (!pmap_next_page(&phys_page))
 794                         break;
 795                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
 796                         vm_page_lowest = phys_page;
 797
 798                 vm_page_init(&vm_pages[i], phys_page, FALSE);
 799                 vm_page_pages++;
 800                 pages_initialized++;
 801         }
 802         vm_pages_count = pages_initialized;
 803
 804         /*
 805          * Check if we want to initialize pages to a known value
 806          */
 807         fill = 0;                                                               /* Assume no fill */
 808         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 809
 810         // -debug code remove
 811         if (2 == vm_himemory_mode) {
 812                 // free low -> high so high is preferred
 813                 for (i = 1; i <= pages_initialized; i++) {
 814                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 815                         vm_page_release(&vm_pages[i - 1]);
 816                 }
 817         }
 818         else
 819         // debug code remove-
 820
 821         /*
 822          * Release pages in reverse order so that physical pages
 823          * initially get allocated in ascending addresses. This keeps
 824          * the devices (which must address physical memory) happy if
 825          * they require several consecutive pages.
 826          */
 827         for (i = pages_initialized; i > 0; i--) {
 828                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 829                 vm_page_release(&vm_pages[i - 1]);
 830         }
 831
 832 #if 0
 833         {
 834                 vm_page_t xx, xxo, xxl;
 835                 int i, j, k, l;
 836
 837                 j = 0;                                                                                                  /* (BRINGUP) */
 838                 xxl = 0;
 839
 840                 for( i = 0; i < vm_colors; i++ ) {
 841                         queue_iterate(&vm_page_queue_free[i],
 842                                       xx,
 843                                       vm_page_t,
 844                                       pageq) {  /* BRINGUP */
 845                                 j++;                                                                                            /* (BRINGUP) */
 846                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 847                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 848                                 }
 849
 850                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 851                                 k = 0;                                                                                          /* (BRINGUP) */
 852
 853                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 854
 855                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 856                                         k++;
 857                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 858                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 859                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 860                                         }
 861                                 }
 862
 863                                 xxl = xx;
 864                         }
 865                 }
 866
 867                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 868                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 869                 }
 870         }
 871 #endif
 872
 873
 874         /*
 875          *      We have to re-align virtual_space_start,
 876          *      because pmap_steal_memory has been using it.
 877          */
 878
 879         virtual_space_start = round_page(virtual_space_start);
 880
 881         *startp = virtual_space_start;
 882         *endp = virtual_space_end;
 883 }
 884 #endif  /* MACHINE_PAGES */
 885
 886 /*
 887  *      Routine:        vm_page_module_init
 888  *      Purpose:
 889  *              Second initialization pass, to be done after
 890  *              the basic VM system is ready.
 891  */
 892 void
 893 vm_page_module_init(void)
 894 {
 895         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
 896                              0, PAGE_SIZE, "vm pages");
 897
 898 #if     ZONE_DEBUG
 899         zone_debug_disable(vm_page_zone);
 900 #endif  /* ZONE_DEBUG */
 901
 902         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
 903         zone_change(vm_page_zone, Z_EXPAND, FALSE);
 904         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
 905         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
 906
 907         /*
 908          * Adjust zone statistics to account for the real pages allocated
 909          * in vm_page_create(). [Q: is this really what we want?]
 910          */
 911         vm_page_zone->count += vm_page_pages;
 912         vm_page_zone->sum_count += vm_page_pages;
 913         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
 914
 915         lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 916 }
 917
 918 /*
 919  *      Routine:        vm_page_create
 920  *      Purpose:
 921  *              After the VM system is up, machine-dependent code
 922  *              may stumble across more physical memory.  For example,
 923  *              memory that it was reserving for a frame buffer.
 924  *              vm_page_create turns this memory into available pages.
 925  */
 926
 927 void
 928 vm_page_create(
 929         ppnum_t start,
 930         ppnum_t end)
 931 {
 932         ppnum_t         phys_page;
 933         vm_page_t       m;
 934
 935         for (phys_page = start;
 936              phys_page < end;
 937              phys_page++) {
 938                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
 939                         == VM_PAGE_NULL)
 940                         vm_page_more_fictitious();
 941
 942                 m->fictitious = FALSE;
 943                 pmap_clear_noencrypt(phys_page);
 944
 945                 vm_page_pages++;
 946                 vm_page_release(m);
 947         }
 948 }
 949
 950 /*
 951  *      vm_page_hash:
 952  *
 953  *      Distributes the object/offset key pair among hash buckets.
 954  *
 955  *      NOTE:   The bucket count must be a power of 2
 956  */
 957 #define vm_page_hash(object, offset) (\
 958         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
 959          & vm_page_hash_mask)
 960
 961
 962 /*
 963  *      vm_page_insert:         [ internal use only ]
 964  *
 965  *      Inserts the given mem entry into the object/object-page
 966  *      table and object list.
 967  *
 968  *      The object must be locked.
 969  */
 970 void
 971 vm_page_insert(
 972         vm_page_t               mem,
 973         vm_object_t             object,
 974         vm_object_offset_t      offset)
 975 {
 976         vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
 977 }
 978
 979 void
 980 vm_page_insert_internal(
 981         vm_page_t               mem,
 982         vm_object_t             object,
 983         vm_object_offset_t      offset,
 984         boolean_t               queues_lock_held,
 985         boolean_t               insert_in_hash)
 986 {
 987         vm_page_bucket_t *bucket;
 988         lck_spin_t      *bucket_lock;
 989         int     hash_id;
 990
 991         XPR(XPR_VM_PAGE,
 992                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
 993                 object, offset, mem, 0,0);
 994
 995         VM_PAGE_CHECK(mem);
 996
 997         if (object == vm_submap_object) {
 998                 /* the vm_submap_object is only a placeholder for submaps */
 999                 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1000         }
1001
1002         vm_object_lock_assert_exclusive(object);
1003 #if DEBUG
1004         lck_mtx_assert(&vm_page_queue_lock,
1005                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1006                                         : LCK_MTX_ASSERT_NOTOWNED);
1007 #endif  /* DEBUG */
1008
1009         if (insert_in_hash == TRUE) {
1010 #if DEBUG
1011                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1012                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1013                               "already in (obj=%p,off=0x%llx)",
1014                               mem, object, offset, mem->object, mem->offset);
1015 #endif
1016                 assert(!object->internal || offset < object->vo_size);
1017
1018                 /* only insert "pageout" pages into "pageout" objects,
1019                  * and normal pages into normal objects */
1020                 assert(object->pageout == mem->pageout);
1021
1022                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1023
1024                 /*
1025                  *      Record the object/offset pair in this page
1026                  */
1027
1028                 mem->object = object;
1029                 mem->offset = offset;
1030
1031                 /*
1032                  *      Insert it into the object_object/offset hash table
1033                  */
1034                 hash_id = vm_page_hash(object, offset);
1035                 bucket = &vm_page_buckets[hash_id];
1036                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1037
1038                 lck_spin_lock(bucket_lock);
1039
1040                 mem->next = bucket->pages;
1041                 bucket->pages = mem;
1042 #if     MACH_PAGE_HASH_STATS
1043                 if (++bucket->cur_count > bucket->hi_count)
1044                         bucket->hi_count = bucket->cur_count;
1045 #endif /* MACH_PAGE_HASH_STATS */
1046
1047                 lck_spin_unlock(bucket_lock);
1048         }
1049
1050         {       unsigned int    cache_attr;
1051
1052                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1053
1054                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1055                         pmap_set_cache_attributes(mem->phys_page, cache_attr);
1056                         object->set_cache_attr = TRUE;
1057                 }
1058         }
1059         /*
1060          *      Now link into the object's list of backed pages.
1061          */
1062
1063         VM_PAGE_INSERT(mem, object);
1064         mem->tabled = TRUE;
1065
1066         /*
1067          *      Show that the object has one more resident page.
1068          */
1069
1070         object->resident_page_count++;
1071         if (VM_PAGE_WIRED(mem)) {
1072                 object->wired_page_count++;
1073         }
1074         assert(object->resident_page_count >= object->wired_page_count);
1075
1076         assert(!mem->reusable);
1077
1078         if (object->purgable == VM_PURGABLE_VOLATILE) {
1079                 if (VM_PAGE_WIRED(mem)) {
1080                         OSAddAtomic(1, &vm_page_purgeable_wired_count);
1081                 } else {
1082                         OSAddAtomic(1, &vm_page_purgeable_count);
1083                 }
1084         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1085                    mem->throttled) {
1086                 /*
1087                  * This page belongs to a purged VM object but hasn't
1088                  * been purged (because it was "busy").
1089                  * It's in the "throttled" queue and hence not
1090                  * visible to vm_pageout_scan().  Move it to a pageable
1091                  * queue, so that it can eventually be reclaimed, instead
1092                  * of lingering in the "empty" object.
1093                  */
1094                 if (queues_lock_held == FALSE)
1095                         vm_page_lockspin_queues();
1096                 vm_page_deactivate(mem);
1097                 if (queues_lock_held == FALSE)
1098                         vm_page_unlock_queues();
1099         }
1100 }
1101
1102 /*
1103  *      vm_page_replace:
1104  *
1105  *      Exactly like vm_page_insert, except that we first
1106  *      remove any existing page at the given offset in object.
1107  *
1108  *      The object must be locked.
1109  */
1110 void
1111 vm_page_replace(
1112         register vm_page_t              mem,
1113         register vm_object_t            object,
1114         register vm_object_offset_t     offset)
1115 {
1116         vm_page_bucket_t *bucket;
1117         vm_page_t        found_m = VM_PAGE_NULL;
1118         lck_spin_t      *bucket_lock;
1119         int             hash_id;
1120
1121         VM_PAGE_CHECK(mem);
1122         vm_object_lock_assert_exclusive(object);
1123 #if DEBUG
1124         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1125                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1126                       "already in (obj=%p,off=0x%llx)",
1127                       mem, object, offset, mem->object, mem->offset);
1128         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1129 #endif
1130         /*
1131          *      Record the object/offset pair in this page
1132          */
1133
1134         mem->object = object;
1135         mem->offset = offset;
1136
1137         /*
1138          *      Insert it into the object_object/offset hash table,
1139          *      replacing any page that might have been there.
1140          */
1141
1142         hash_id = vm_page_hash(object, offset);
1143         bucket = &vm_page_buckets[hash_id];
1144         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1145
1146         lck_spin_lock(bucket_lock);
1147
1148         if (bucket->pages) {
1149                 vm_page_t *mp = &bucket->pages;
1150                 vm_page_t m = *mp;
1151
1152                 do {
1153                         if (m->object == object && m->offset == offset) {
1154                                 /*
1155                                  * Remove old page from hash list
1156                                  */
1157                                 *mp = m->next;
1158
1159                                 found_m = m;
1160                                 break;
1161                         }
1162                         mp = &m->next;
1163                 } while ((m = *mp));
1164
1165                 mem->next = bucket->pages;
1166         } else {
1167                 mem->next = VM_PAGE_NULL;
1168         }
1169         /*
1170          * insert new page at head of hash list
1171          */
1172         bucket->pages = mem;
1173
1174         lck_spin_unlock(bucket_lock);
1175
1176         if (found_m) {
1177                 /*
1178                  * there was already a page at the specified
1179                  * offset for this object... remove it from
1180                  * the object and free it back to the free list
1181                  */
1182                 vm_page_free_unlocked(found_m, FALSE);
1183         }
1184         vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1185 }
1186
1187 /*
1188  *      vm_page_remove:         [ internal use only ]
1189  *
1190  *      Removes the given mem entry from the object/offset-page
1191  *      table and the object page list.
1192  *
1193  *      The object must be locked.
1194  */
1195
1196 void
1197 vm_page_remove(
1198         vm_page_t       mem,
1199         boolean_t       remove_from_hash)
1200 {
1201         vm_page_bucket_t *bucket;
1202         vm_page_t       this;
1203         lck_spin_t      *bucket_lock;
1204         int             hash_id;
1205
1206         XPR(XPR_VM_PAGE,
1207                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1208                 mem->object, mem->offset,
1209                 mem, 0,0);
1210
1211         vm_object_lock_assert_exclusive(mem->object);
1212         assert(mem->tabled);
1213         assert(!mem->cleaning);
1214         VM_PAGE_CHECK(mem);
1215
1216         if (remove_from_hash == TRUE) {
1217                 /*
1218                  *      Remove from the object_object/offset hash table
1219                  */
1220                 hash_id = vm_page_hash(mem->object, mem->offset);
1221                 bucket = &vm_page_buckets[hash_id];
1222                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1223
1224                 lck_spin_lock(bucket_lock);
1225
1226                 if ((this = bucket->pages) == mem) {
1227                         /* optimize for common case */
1228
1229                         bucket->pages = mem->next;
1230                 } else {
1231                         vm_page_t       *prev;
1232
1233                         for (prev = &this->next;
1234                              (this = *prev) != mem;
1235                              prev = &this->next)
1236                                 continue;
1237                         *prev = this->next;
1238                 }
1239 #if     MACH_PAGE_HASH_STATS
1240                 bucket->cur_count--;
1241 #endif /* MACH_PAGE_HASH_STATS */
1242
1243                 lck_spin_unlock(bucket_lock);
1244         }
1245         /*
1246          *      Now remove from the object's list of backed pages.
1247          */
1248
1249         VM_PAGE_REMOVE(mem);
1250
1251         /*
1252          *      And show that the object has one fewer resident
1253          *      page.
1254          */
1255
1256         assert(mem->object->resident_page_count > 0);
1257         mem->object->resident_page_count--;
1258
1259         if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1260                 if (mem->object->resident_page_count == 0)
1261                         vm_object_cache_remove(mem->object);
1262         }
1263
1264         if (VM_PAGE_WIRED(mem)) {
1265                 assert(mem->object->wired_page_count > 0);
1266                 mem->object->wired_page_count--;
1267         }
1268         assert(mem->object->resident_page_count >=
1269                mem->object->wired_page_count);
1270         if (mem->reusable) {
1271                 assert(mem->object->reusable_page_count > 0);
1272                 mem->object->reusable_page_count--;
1273                 assert(mem->object->reusable_page_count <=
1274                        mem->object->resident_page_count);
1275                 mem->reusable = FALSE;
1276                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1277                 vm_page_stats_reusable.reused_remove++;
1278         } else if (mem->object->all_reusable) {
1279                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1280                 vm_page_stats_reusable.reused_remove++;
1281         }
1282
1283         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1284                 if (VM_PAGE_WIRED(mem)) {
1285                         assert(vm_page_purgeable_wired_count > 0);
1286                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1287                 } else {
1288                         assert(vm_page_purgeable_count > 0);
1289                         OSAddAtomic(-1, &vm_page_purgeable_count);
1290                 }
1291         }
1292         if (mem->object->set_cache_attr == TRUE)
1293                 pmap_set_cache_attributes(mem->phys_page, 0);
1294
1295         mem->tabled = FALSE;
1296         mem->object = VM_OBJECT_NULL;
1297         mem->offset = (vm_object_offset_t) -1;
1298 }
1299
1300
1301 /*
1302  *      vm_page_lookup:
1303  *
1304  *      Returns the page associated with the object/offset
1305  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1306  *
1307  *      The object must be locked.  No side effects.
1308  */
1309
1310 unsigned long vm_page_lookup_hint = 0;
1311 unsigned long vm_page_lookup_hint_next = 0;
1312 unsigned long vm_page_lookup_hint_prev = 0;
1313 unsigned long vm_page_lookup_hint_miss = 0;
1314 unsigned long vm_page_lookup_bucket_NULL = 0;
1315 unsigned long vm_page_lookup_miss = 0;
1316
1317
1318 vm_page_t
1319 vm_page_lookup(
1320         vm_object_t             object,
1321         vm_object_offset_t      offset)
1322 {
1323         vm_page_t       mem;
1324         vm_page_bucket_t *bucket;
1325         queue_entry_t   qe;
1326         lck_spin_t      *bucket_lock;
1327         int             hash_id;
1328
1329         vm_object_lock_assert_held(object);
1330         mem = object->memq_hint;
1331
1332         if (mem != VM_PAGE_NULL) {
1333                 assert(mem->object == object);
1334
1335                 if (mem->offset == offset) {
1336                         vm_page_lookup_hint++;
1337                         return mem;
1338                 }
1339                 qe = queue_next(&mem->listq);
1340
1341                 if (! queue_end(&object->memq, qe)) {
1342                         vm_page_t       next_page;
1343
1344                         next_page = (vm_page_t) qe;
1345                         assert(next_page->object == object);
1346
1347                         if (next_page->offset == offset) {
1348                                 vm_page_lookup_hint_next++;
1349                                 object->memq_hint = next_page; /* new hint */
1350                                 return next_page;
1351                         }
1352                 }
1353                 qe = queue_prev(&mem->listq);
1354
1355                 if (! queue_end(&object->memq, qe)) {
1356                         vm_page_t prev_page;
1357
1358                         prev_page = (vm_page_t) qe;
1359                         assert(prev_page->object == object);
1360
1361                         if (prev_page->offset == offset) {
1362                                 vm_page_lookup_hint_prev++;
1363                                 object->memq_hint = prev_page; /* new hint */
1364                                 return prev_page;
1365                         }
1366                 }
1367         }
1368         /*
1369          * Search the hash table for this object/offset pair
1370          */
1371         hash_id = vm_page_hash(object, offset);
1372         bucket = &vm_page_buckets[hash_id];
1373
1374         /*
1375          * since we hold the object lock, we are guaranteed that no
1376          * new pages can be inserted into this object... this in turn
1377          * guarantess that the page we're looking for can't exist
1378          * if the bucket it hashes to is currently NULL even when looked
1379          * at outside the scope of the hash bucket lock... this is a
1380          * really cheap optimiztion to avoid taking the lock
1381          */
1382         if (bucket->pages == VM_PAGE_NULL) {
1383                 vm_page_lookup_bucket_NULL++;
1384
1385                 return (VM_PAGE_NULL);
1386         }
1387         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1388
1389         lck_spin_lock(bucket_lock);
1390
1391         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1392                 VM_PAGE_CHECK(mem);
1393                 if ((mem->object == object) && (mem->offset == offset))
1394                         break;
1395         }
1396         lck_spin_unlock(bucket_lock);
1397
1398         if (mem != VM_PAGE_NULL) {
1399                 if (object->memq_hint != VM_PAGE_NULL) {
1400                         vm_page_lookup_hint_miss++;
1401                 }
1402                 assert(mem->object == object);
1403                 object->memq_hint = mem;
1404         } else
1405                 vm_page_lookup_miss++;
1406
1407         return(mem);
1408 }
1409
1410
1411 /*
1412  *      vm_page_rename:
1413  *
1414  *      Move the given memory entry from its
1415  *      current object to the specified target object/offset.
1416  *
1417  *      The object must be locked.
1418  */
1419 void
1420 vm_page_rename(
1421         register vm_page_t              mem,
1422         register vm_object_t            new_object,
1423         vm_object_offset_t              new_offset,
1424         boolean_t                       encrypted_ok)
1425 {
1426         assert(mem->object != new_object);
1427
1428         /*
1429          * ENCRYPTED SWAP:
1430          * The encryption key is based on the page's memory object
1431          * (aka "pager") and paging offset.  Moving the page to
1432          * another VM object changes its "pager" and "paging_offset"
1433          * so it has to be decrypted first, or we would lose the key.
1434          *
1435          * One exception is VM object collapsing, where we transfer pages
1436          * from one backing object to its parent object.  This operation also
1437          * transfers the paging information, so the <pager,paging_offset> info
1438          * should remain consistent.  The caller (vm_object_do_collapse())
1439          * sets "encrypted_ok" in this case.
1440          */
1441         if (!encrypted_ok && mem->encrypted) {
1442                 panic("vm_page_rename: page %p is encrypted\n", mem);
1443         }
1444
1445         XPR(XPR_VM_PAGE,
1446                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1447                 new_object, new_offset,
1448                 mem, 0,0);
1449
1450         /*
1451          *      Changes to mem->object require the page lock because
1452          *      the pageout daemon uses that lock to get the object.
1453          */
1454         vm_page_lockspin_queues();
1455
1456         vm_page_remove(mem, TRUE);
1457         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1458
1459         vm_page_unlock_queues();
1460 }
1461
1462 /*
1463  *      vm_page_init:
1464  *
1465  *      Initialize the fields in a new page.
1466  *      This takes a structure with random values and initializes it
1467  *      so that it can be given to vm_page_release or vm_page_insert.
1468  */
1469 void
1470 vm_page_init(
1471         vm_page_t       mem,
1472         ppnum_t         phys_page,
1473         boolean_t       lopage)
1474 {
1475         assert(phys_page);
1476
1477 #if     DEBUG
1478         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1479                 if (!(pmap_valid_page(phys_page))) {
1480                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1481                 }
1482         }
1483 #endif
1484         *mem = vm_page_template;
1485         mem->phys_page = phys_page;
1486 #if 0
1487         /*
1488          * we're leaving this turned off for now... currently pages
1489          * come off the free list and are either immediately dirtied/referenced
1490          * due to zero-fill or COW faults, or are used to read or write files...
1491          * in the file I/O case, the UPL mechanism takes care of clearing
1492          * the state of the HW ref/mod bits in a somewhat fragile way.
1493          * Since we may change the way this works in the future (to toughen it up),
1494          * I'm leaving this as a reminder of where these bits could get cleared
1495          */
1496
1497         /*
1498          * make sure both the h/w referenced and modified bits are
1499          * clear at this point... we are especially dependent on
1500          * not finding a 'stale' h/w modified in a number of spots
1501          * once this page goes back into use
1502          */
1503         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1504 #endif
1505         mem->lopage = lopage;
1506 }
1507
1508 /*
1509  *      vm_page_grab_fictitious:
1510  *
1511  *      Remove a fictitious page from the free list.
1512  *      Returns VM_PAGE_NULL if there are no free pages.
1513  */
1514 int     c_vm_page_grab_fictitious = 0;
1515 int     c_vm_page_grab_fictitious_failed = 0;
1516 int     c_vm_page_release_fictitious = 0;
1517 int     c_vm_page_more_fictitious = 0;
1518
1519 vm_page_t
1520 vm_page_grab_fictitious_common(
1521         ppnum_t phys_addr)
1522 {
1523         vm_page_t       m;
1524
1525         if ((m = (vm_page_t)zget(vm_page_zone))) {
1526
1527                 vm_page_init(m, phys_addr, FALSE);
1528                 m->fictitious = TRUE;
1529
1530                 c_vm_page_grab_fictitious++;
1531         } else
1532                 c_vm_page_grab_fictitious_failed++;
1533
1534         return m;
1535 }
1536
1537 vm_page_t
1538 vm_page_grab_fictitious(void)
1539 {
1540         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1541 }
1542
1543 vm_page_t
1544 vm_page_grab_guard(void)
1545 {
1546         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1547 }
1548
1549
1550 /*
1551  *      vm_page_release_fictitious:
1552  *
1553  *      Release a fictitious page to the zone pool
1554  */
1555 void
1556 vm_page_release_fictitious(
1557         vm_page_t m)
1558 {
1559         assert(!m->free);
1560         assert(m->fictitious);
1561         assert(m->phys_page == vm_page_fictitious_addr ||
1562                m->phys_page == vm_page_guard_addr);
1563
1564         c_vm_page_release_fictitious++;
1565
1566         zfree(vm_page_zone, m);
1567 }
1568
1569 /*
1570  *      vm_page_more_fictitious:
1571  *
1572  *      Add more fictitious pages to the zone.
1573  *      Allowed to block. This routine is way intimate
1574  *      with the zones code, for several reasons:
1575  *      1. we need to carve some page structures out of physical
1576  *         memory before zones work, so they _cannot_ come from
1577  *         the zone_map.
1578  *      2. the zone needs to be collectable in order to prevent
1579  *         growth without bound. These structures are used by
1580  *         the device pager (by the hundreds and thousands), as
1581  *         private pages for pageout, and as blocking pages for
1582  *         pagein. Temporary bursts in demand should not result in
1583  *         permanent allocation of a resource.
1584  *      3. To smooth allocation humps, we allocate single pages
1585  *         with kernel_memory_allocate(), and cram them into the
1586  *         zone.
1587  */
1588
1589 void vm_page_more_fictitious(void)
1590 {
1591         vm_offset_t     addr;
1592         kern_return_t   retval;
1593
1594         c_vm_page_more_fictitious++;
1595
1596         /*
1597          * Allocate a single page from the zone_map. Do not wait if no physical
1598          * pages are immediately available, and do not zero the space. We need
1599          * our own blocking lock here to prevent having multiple,
1600          * simultaneous requests from piling up on the zone_map lock. Exactly
1601          * one (of our) threads should be potentially waiting on the map lock.
1602          * If winner is not vm-privileged, then the page allocation will fail,
1603          * and it will temporarily block here in the vm_page_wait().
1604          */
1605         lck_mtx_lock(&vm_page_alloc_lock);
1606         /*
1607          * If another thread allocated space, just bail out now.
1608          */
1609         if (zone_free_count(vm_page_zone) > 5) {
1610                 /*
1611                  * The number "5" is a small number that is larger than the
1612                  * number of fictitious pages that any single caller will
1613                  * attempt to allocate. Otherwise, a thread will attempt to
1614                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1615                  * release all of the resources and locks already acquired,
1616                  * and then call this routine. This routine finds the pages
1617                  * that the caller released, so fails to allocate new space.
1618                  * The process repeats infinitely. The largest known number
1619                  * of fictitious pages required in this manner is 2. 5 is
1620                  * simply a somewhat larger number.
1621                  */
1622                 lck_mtx_unlock(&vm_page_alloc_lock);
1623                 return;
1624         }
1625
1626         retval = kernel_memory_allocate(zone_map,
1627                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1628                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1629         if (retval != KERN_SUCCESS) {
1630                 /*
1631                  * No page was available. Drop the
1632                  * lock to give another thread a chance at it, and
1633                  * wait for the pageout daemon to make progress.
1634                  */
1635                 lck_mtx_unlock(&vm_page_alloc_lock);
1636                 vm_page_wait(THREAD_UNINT);
1637                 return;
1638         }
1639         zcram(vm_page_zone, addr, PAGE_SIZE);
1640
1641         lck_mtx_unlock(&vm_page_alloc_lock);
1642 }
1643
1644
1645 /*
1646  *      vm_pool_low():
1647  *
1648  *      Return true if it is not likely that a non-vm_privileged thread
1649  *      can get memory without blocking.  Advisory only, since the
1650  *      situation may change under us.
1651  */
1652 int
1653 vm_pool_low(void)
1654 {
1655         /* No locking, at worst we will fib. */
1656         return( vm_page_free_count <= vm_page_free_reserved );
1657 }
1658
1659
1660
1661 /*
1662  * this is an interface to support bring-up of drivers
1663  * on platforms with physical memory > 4G...
1664  */
1665 int             vm_himemory_mode = 0;
1666
1667
1668 /*
1669  * this interface exists to support hardware controllers
1670  * incapable of generating DMAs with more than 32 bits
1671  * of address on platforms with physical memory > 4G...
1672  */
1673 unsigned int    vm_lopages_allocated_q = 0;
1674 unsigned int    vm_lopages_allocated_cpm_success = 0;
1675 unsigned int    vm_lopages_allocated_cpm_failed = 0;
1676 queue_head_t    vm_lopage_queue_free;
1677
1678 vm_page_t
1679 vm_page_grablo(void)
1680 {
1681         vm_page_t       mem;
1682
1683         if (vm_lopage_needed == FALSE)
1684                 return (vm_page_grab());
1685
1686         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1687
1688         if ( !queue_empty(&vm_lopage_queue_free)) {
1689                 queue_remove_first(&vm_lopage_queue_free,
1690                                    mem,
1691                                    vm_page_t,
1692                                    pageq);
1693                 assert(vm_lopage_free_count);
1694
1695                 vm_lopage_free_count--;
1696                 vm_lopages_allocated_q++;
1697
1698                 if (vm_lopage_free_count < vm_lopage_lowater)
1699                         vm_lopage_refill = TRUE;
1700
1701                 lck_mtx_unlock(&vm_page_queue_free_lock);
1702         } else {
1703                 lck_mtx_unlock(&vm_page_queue_free_lock);
1704
1705                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1706
1707                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1708                         vm_lopages_allocated_cpm_failed++;
1709                         lck_mtx_unlock(&vm_page_queue_free_lock);
1710
1711                         return (VM_PAGE_NULL);
1712                 }
1713                 mem->busy = TRUE;
1714
1715                 vm_page_lockspin_queues();
1716
1717                 mem->gobbled = FALSE;
1718                 vm_page_gobble_count--;
1719                 vm_page_wire_count--;
1720
1721                 vm_lopages_allocated_cpm_success++;
1722                 vm_page_unlock_queues();
1723         }
1724         assert(mem->busy);
1725         assert(!mem->free);
1726         assert(!mem->pmapped);
1727         assert(!mem->wpmapped);
1728         assert(!pmap_is_noencrypt(mem->phys_page));
1729
1730         mem->pageq.next = NULL;
1731         mem->pageq.prev = NULL;
1732
1733         return (mem);
1734 }
1735
1736
1737 /*
1738  *      vm_page_grab:
1739  *
1740  *      first try to grab a page from the per-cpu free list...
1741  *      this must be done while pre-emption is disabled... if
1742  *      a page is available, we're done...
1743  *      if no page is available, grab the vm_page_queue_free_lock
1744  *      and see if current number of free pages would allow us
1745  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
1746  *      if there are pages available, disable preemption and
1747  *      recheck the state of the per-cpu free list... we could
1748  *      have been preempted and moved to a different cpu, or
1749  *      some other thread could have re-filled it... if still
1750  *      empty, figure out how many pages we can steal from the
1751  *      global free queue and move to the per-cpu queue...
1752  *      return 1 of these pages when done... only wakeup the
1753  *      pageout_scan thread if we moved pages from the global
1754  *      list... no need for the wakeup if we've satisfied the
1755  *      request from the per-cpu queue.
1756  */
1757
1758 #define COLOR_GROUPS_TO_STEAL   4
1759
1760
1761 vm_page_t
1762 vm_page_grab( void )
1763 {
1764         vm_page_t       mem;
1765
1766
1767         disable_preemption();
1768
1769         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1770 return_page_from_cpu_list:
1771                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1772                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1773                 mem->pageq.next = NULL;
1774
1775                 enable_preemption();
1776
1777                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1778                 assert(mem->tabled == FALSE);
1779                 assert(mem->object == VM_OBJECT_NULL);
1780                 assert(!mem->laundry);
1781                 assert(!mem->free);
1782                 assert(pmap_verify_free(mem->phys_page));
1783                 assert(mem->busy);
1784                 assert(!mem->encrypted);
1785                 assert(!mem->pmapped);
1786                 assert(!mem->wpmapped);
1787                 assert(!mem->active);
1788                 assert(!mem->inactive);
1789                 assert(!mem->throttled);
1790                 assert(!mem->speculative);
1791                 assert(!pmap_is_noencrypt(mem->phys_page));
1792
1793                 return mem;
1794         }
1795         enable_preemption();
1796
1797
1798         /*
1799          *      Optionally produce warnings if the wire or gobble
1800          *      counts exceed some threshold.
1801          */
1802         if (vm_page_wire_count_warning > 0
1803             && vm_page_wire_count >= vm_page_wire_count_warning) {
1804                 printf("mk: vm_page_grab(): high wired page count of %d\n",
1805                         vm_page_wire_count);
1806                 assert(vm_page_wire_count < vm_page_wire_count_warning);
1807         }
1808         if (vm_page_gobble_count_warning > 0
1809             && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1810                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1811                         vm_page_gobble_count);
1812                 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1813         }
1814
1815         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1816
1817         /*
1818          *      Only let privileged threads (involved in pageout)
1819          *      dip into the reserved pool.
1820          */
1821         if ((vm_page_free_count < vm_page_free_reserved) &&
1822             !(current_thread()->options & TH_OPT_VMPRIV)) {
1823                 lck_mtx_unlock(&vm_page_queue_free_lock);
1824                 mem = VM_PAGE_NULL;
1825         }
1826         else {
1827                vm_page_t        head;
1828                vm_page_t        tail;
1829                unsigned int     pages_to_steal;
1830                unsigned int     color;
1831
1832                while ( vm_page_free_count == 0 ) {
1833
1834                         lck_mtx_unlock(&vm_page_queue_free_lock);
1835                         /*
1836                          * must be a privileged thread to be
1837                          * in this state since a non-privileged
1838                          * thread would have bailed if we were
1839                          * under the vm_page_free_reserved mark
1840                          */
1841                         VM_PAGE_WAIT();
1842                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1843                 }
1844
1845                 disable_preemption();
1846
1847                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1848                         lck_mtx_unlock(&vm_page_queue_free_lock);
1849
1850                         /*
1851                          * we got preempted and moved to another processor
1852                          * or we got preempted and someone else ran and filled the cache
1853                          */
1854                         goto return_page_from_cpu_list;
1855                 }
1856                 if (vm_page_free_count <= vm_page_free_reserved)
1857                         pages_to_steal = 1;
1858                 else {
1859                         pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1860
1861                         if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1862                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1863                 }
1864                 color = PROCESSOR_DATA(current_processor(), start_color);
1865                 head = tail = NULL;
1866
1867                 while (pages_to_steal--) {
1868                         if (--vm_page_free_count < vm_page_free_count_minimum)
1869                                 vm_page_free_count_minimum = vm_page_free_count;
1870
1871                         while (queue_empty(&vm_page_queue_free[color]))
1872                                 color = (color + 1) & vm_color_mask;
1873
1874                         queue_remove_first(&vm_page_queue_free[color],
1875                                            mem,
1876                                            vm_page_t,
1877                                            pageq);
1878                         mem->pageq.next = NULL;
1879                         mem->pageq.prev = NULL;
1880
1881                         assert(!mem->active);
1882                         assert(!mem->inactive);
1883                         assert(!mem->throttled);
1884                         assert(!mem->speculative);
1885
1886                         color = (color + 1) & vm_color_mask;
1887
1888                         if (head == NULL)
1889                                 head = mem;
1890                         else
1891                                 tail->pageq.next = (queue_t)mem;
1892                         tail = mem;
1893
1894                         mem->pageq.prev = NULL;
1895                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1896                         assert(mem->tabled == FALSE);
1897                         assert(mem->object == VM_OBJECT_NULL);
1898                         assert(!mem->laundry);
1899                         assert(mem->free);
1900                         mem->free = FALSE;
1901
1902                         assert(pmap_verify_free(mem->phys_page));
1903                         assert(mem->busy);
1904                         assert(!mem->free);
1905                         assert(!mem->encrypted);
1906                         assert(!mem->pmapped);
1907                         assert(!mem->wpmapped);
1908                         assert(!pmap_is_noencrypt(mem->phys_page));
1909                 }
1910                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1911                 PROCESSOR_DATA(current_processor(), start_color) = color;
1912
1913                 /*
1914                  * satisfy this request
1915                  */
1916                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1917                 mem = head;
1918                 mem->pageq.next = NULL;
1919
1920                 lck_mtx_unlock(&vm_page_queue_free_lock);
1921
1922                 enable_preemption();
1923         }
1924         /*
1925          *      Decide if we should poke the pageout daemon.
1926          *      We do this if the free count is less than the low
1927          *      water mark, or if the free count is less than the high
1928          *      water mark (but above the low water mark) and the inactive
1929          *      count is less than its target.
1930          *
1931          *      We don't have the counts locked ... if they change a little,
1932          *      it doesn't really matter.
1933          */
1934         if ((vm_page_free_count < vm_page_free_min) ||
1935             ((vm_page_free_count < vm_page_free_target) &&
1936              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1937                 thread_wakeup((event_t) &vm_page_free_wanted);
1938
1939         VM_CHECK_MEMORYSTATUS;
1940
1941 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
1942
1943         return mem;
1944 }
1945
1946 /*
1947  *      vm_page_release:
1948  *
1949  *      Return a page to the free list.
1950  */
1951
1952 void
1953 vm_page_release(
1954         register vm_page_t      mem)
1955 {
1956         unsigned int    color;
1957         int     need_wakeup = 0;
1958         int     need_priv_wakeup = 0;
1959
1960
1961         assert(!mem->private && !mem->fictitious);
1962         if (vm_page_free_verify) {
1963                 assert(pmap_verify_free(mem->phys_page));
1964         }
1965 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
1966
1967
1968         pmap_clear_noencrypt(mem->phys_page);
1969
1970         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1971 #if DEBUG
1972         if (mem->free)
1973                 panic("vm_page_release");
1974 #endif
1975
1976         assert(mem->busy);
1977         assert(!mem->laundry);
1978         assert(mem->object == VM_OBJECT_NULL);
1979         assert(mem->pageq.next == NULL &&
1980                mem->pageq.prev == NULL);
1981         assert(mem->listq.next == NULL &&
1982                mem->listq.prev == NULL);
1983
1984         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
1985             vm_lopage_free_count < vm_lopage_free_limit &&
1986             mem->phys_page < max_valid_low_ppnum) {
1987                 /*
1988                  * this exists to support hardware controllers
1989                  * incapable of generating DMAs with more than 32 bits
1990                  * of address on platforms with physical memory > 4G...
1991                  */
1992                 queue_enter_first(&vm_lopage_queue_free,
1993                                   mem,
1994                                   vm_page_t,
1995                                   pageq);
1996                 vm_lopage_free_count++;
1997
1998                 if (vm_lopage_free_count >= vm_lopage_free_limit)
1999                         vm_lopage_refill = FALSE;
2000
2001                 mem->lopage = TRUE;
2002         } else {
2003                 mem->lopage = FALSE;
2004                 mem->free = TRUE;
2005
2006                 color = mem->phys_page & vm_color_mask;
2007                 queue_enter_first(&vm_page_queue_free[color],
2008                                   mem,
2009                                   vm_page_t,
2010                                   pageq);
2011                 vm_page_free_count++;
2012                 /*
2013                  *      Check if we should wake up someone waiting for page.
2014                  *      But don't bother waking them unless they can allocate.
2015                  *
2016                  *      We wakeup only one thread, to prevent starvation.
2017                  *      Because the scheduling system handles wait queues FIFO,
2018                  *      if we wakeup all waiting threads, one greedy thread
2019                  *      can starve multiple niceguy threads.  When the threads
2020                  *      all wakeup, the greedy threads runs first, grabs the page,
2021                  *      and waits for another page.  It will be the first to run
2022                  *      when the next page is freed.
2023                  *
2024                  *      However, there is a slight danger here.
2025                  *      The thread we wake might not use the free page.
2026                  *      Then the other threads could wait indefinitely
2027                  *      while the page goes unused.  To forestall this,
2028                  *      the pageout daemon will keep making free pages
2029                  *      as long as vm_page_free_wanted is non-zero.
2030                  */
2031
2032                 assert(vm_page_free_count > 0);
2033                 if (vm_page_free_wanted_privileged > 0) {
2034                         vm_page_free_wanted_privileged--;
2035                         need_priv_wakeup = 1;
2036                 } else if (vm_page_free_wanted > 0 &&
2037                            vm_page_free_count > vm_page_free_reserved) {
2038                         vm_page_free_wanted--;
2039                         need_wakeup = 1;
2040                 }
2041         }
2042         lck_mtx_unlock(&vm_page_queue_free_lock);
2043
2044         if (need_priv_wakeup)
2045                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2046         else if (need_wakeup)
2047                 thread_wakeup_one((event_t) &vm_page_free_count);
2048
2049         VM_CHECK_MEMORYSTATUS;
2050 }
2051
2052 /*
2053  *      vm_page_wait:
2054  *
2055  *      Wait for a page to become available.
2056  *      If there are plenty of free pages, then we don't sleep.
2057  *
2058  *      Returns:
2059  *              TRUE:  There may be another page, try again
2060  *              FALSE: We were interrupted out of our wait, don't try again
2061  */
2062
2063 boolean_t
2064 vm_page_wait(
2065         int     interruptible )
2066 {
2067         /*
2068          *      We can't use vm_page_free_reserved to make this
2069          *      determination.  Consider: some thread might
2070          *      need to allocate two pages.  The first allocation
2071          *      succeeds, the second fails.  After the first page is freed,
2072          *      a call to vm_page_wait must really block.
2073          */
2074         kern_return_t   wait_result;
2075         int             need_wakeup = 0;
2076         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2077
2078         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2079
2080         if (is_privileged && vm_page_free_count) {
2081                 lck_mtx_unlock(&vm_page_queue_free_lock);
2082                 return TRUE;
2083         }
2084         if (vm_page_free_count < vm_page_free_target) {
2085
2086                 if (is_privileged) {
2087                         if (vm_page_free_wanted_privileged++ == 0)
2088                                 need_wakeup = 1;
2089                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2090                 } else {
2091                         if (vm_page_free_wanted++ == 0)
2092                                 need_wakeup = 1;
2093                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2094                 }
2095                 lck_mtx_unlock(&vm_page_queue_free_lock);
2096                 counter(c_vm_page_wait_block++);
2097
2098                 if (need_wakeup)
2099                         thread_wakeup((event_t)&vm_page_free_wanted);
2100
2101                 if (wait_result == THREAD_WAITING)
2102                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2103
2104                 return(wait_result == THREAD_AWAKENED);
2105         } else {
2106                 lck_mtx_unlock(&vm_page_queue_free_lock);
2107                 return TRUE;
2108         }
2109 }
2110
2111 /*
2112  *      vm_page_alloc:
2113  *
2114  *      Allocate and return a memory cell associated
2115  *      with this VM object/offset pair.
2116  *
2117  *      Object must be locked.
2118  */
2119
2120 vm_page_t
2121 vm_page_alloc(
2122         vm_object_t             object,
2123         vm_object_offset_t      offset)
2124 {
2125         register vm_page_t      mem;
2126
2127         vm_object_lock_assert_exclusive(object);
2128         mem = vm_page_grab();
2129         if (mem == VM_PAGE_NULL)
2130                 return VM_PAGE_NULL;
2131
2132         vm_page_insert(mem, object, offset);
2133
2134         return(mem);
2135 }
2136
2137 vm_page_t
2138 vm_page_alloclo(
2139         vm_object_t             object,
2140         vm_object_offset_t      offset)
2141 {
2142         register vm_page_t      mem;
2143
2144         vm_object_lock_assert_exclusive(object);
2145         mem = vm_page_grablo();
2146         if (mem == VM_PAGE_NULL)
2147                 return VM_PAGE_NULL;
2148
2149         vm_page_insert(mem, object, offset);
2150
2151         return(mem);
2152 }
2153
2154
2155 /*
2156  *      vm_page_alloc_guard:
2157  *
2158  *      Allocate a fictitious page which will be used
2159  *      as a guard page.  The page will be inserted into
2160  *      the object and returned to the caller.
2161  */
2162
2163 vm_page_t
2164 vm_page_alloc_guard(
2165         vm_object_t             object,
2166         vm_object_offset_t      offset)
2167 {
2168         register vm_page_t      mem;
2169
2170         vm_object_lock_assert_exclusive(object);
2171         mem = vm_page_grab_guard();
2172         if (mem == VM_PAGE_NULL)
2173                 return VM_PAGE_NULL;
2174
2175         vm_page_insert(mem, object, offset);
2176
2177         return(mem);
2178 }
2179
2180
2181 counter(unsigned int c_laundry_pages_freed = 0;)
2182
2183 /*
2184  *      vm_page_free_prepare:
2185  *
2186  *      Removes page from any queue it may be on
2187  *      and disassociates it from its VM object.
2188  *
2189  *      Object and page queues must be locked prior to entry.
2190  */
2191 static void
2192 vm_page_free_prepare(
2193         vm_page_t       mem)
2194 {
2195         vm_page_free_prepare_queues(mem);
2196         vm_page_free_prepare_object(mem, TRUE);
2197 }
2198
2199
2200 void
2201 vm_page_free_prepare_queues(
2202         vm_page_t       mem)
2203 {
2204         VM_PAGE_CHECK(mem);
2205         assert(!mem->free);
2206         assert(!mem->cleaning);
2207         assert(!mem->pageout);
2208 #if DEBUG
2209         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2210         if (mem->free)
2211                 panic("vm_page_free: freeing page on free list\n");
2212 #endif
2213         if (mem->object) {
2214                 vm_object_lock_assert_exclusive(mem->object);
2215         }
2216
2217         if (mem->laundry) {
2218                 /*
2219                  * We may have to free a page while it's being laundered
2220                  * if we lost its pager (due to a forced unmount, for example).
2221                  * We need to call vm_pageout_throttle_up() before removing
2222                  * the page from its VM object, so that we can find out on
2223                  * which pageout queue the page is on.
2224                  */
2225                 vm_pageout_throttle_up(mem);
2226                 counter(++c_laundry_pages_freed);
2227         }
2228         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2229
2230         if (VM_PAGE_WIRED(mem)) {
2231                 if (mem->object) {
2232                         assert(mem->object->wired_page_count > 0);
2233                         mem->object->wired_page_count--;
2234                         assert(mem->object->resident_page_count >=
2235                                mem->object->wired_page_count);
2236
2237                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2238                                 OSAddAtomic(+1, &vm_page_purgeable_count);
2239                                 assert(vm_page_purgeable_wired_count > 0);
2240                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2241                         }
2242                 }
2243                 if (!mem->private && !mem->fictitious)
2244                         vm_page_wire_count--;
2245                 mem->wire_count = 0;
2246                 assert(!mem->gobbled);
2247         } else if (mem->gobbled) {
2248                 if (!mem->private && !mem->fictitious)
2249                         vm_page_wire_count--;
2250                 vm_page_gobble_count--;
2251         }
2252 }
2253
2254
2255 void
2256 vm_page_free_prepare_object(
2257         vm_page_t       mem,
2258         boolean_t       remove_from_hash)
2259 {
2260         if (mem->tabled)
2261                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2262
2263         PAGE_WAKEUP(mem);               /* clears wanted */
2264
2265         if (mem->private) {
2266                 mem->private = FALSE;
2267                 mem->fictitious = TRUE;
2268                 mem->phys_page = vm_page_fictitious_addr;
2269         }
2270         if ( !mem->fictitious) {
2271                 if (mem->zero_fill == TRUE)
2272                         VM_ZF_COUNT_DECR();
2273                 vm_page_init(mem, mem->phys_page, mem->lopage);
2274         }
2275 }
2276
2277
2278 /*
2279  *      vm_page_free:
2280  *
2281  *      Returns the given page to the free list,
2282  *      disassociating it with any VM object.
2283  *
2284  *      Object and page queues must be locked prior to entry.
2285  */
2286 void
2287 vm_page_free(
2288         vm_page_t       mem)
2289 {
2290         vm_page_free_prepare(mem);
2291
2292         if (mem->fictitious) {
2293                 vm_page_release_fictitious(mem);
2294         } else {
2295                 vm_page_release(mem);
2296         }
2297 }
2298
2299
2300 void
2301 vm_page_free_unlocked(
2302         vm_page_t       mem,
2303         boolean_t       remove_from_hash)
2304 {
2305         vm_page_lockspin_queues();
2306         vm_page_free_prepare_queues(mem);
2307         vm_page_unlock_queues();
2308
2309         vm_page_free_prepare_object(mem, remove_from_hash);
2310
2311         if (mem->fictitious) {
2312                 vm_page_release_fictitious(mem);
2313         } else {
2314                 vm_page_release(mem);
2315         }
2316 }
2317
2318 /*
2319  * Free a list of pages.  The list can be up to several hundred pages,
2320  * as blocked up by vm_pageout_scan().
2321  * The big win is not having to take the free list lock once
2322  * per page.  We sort the incoming pages into n lists, one for
2323  * each color.
2324  */
2325 void
2326 vm_page_free_list(
2327         vm_page_t       mem,
2328         boolean_t       prepare_object)
2329 {
2330         vm_page_t       nxt;
2331         int             pg_count = 0;
2332         int             color;
2333         int             inuse_list_head = -1;
2334
2335         queue_head_t    free_list[MAX_COLORS];
2336         int             inuse[MAX_COLORS];
2337
2338         for (color = 0; color < (signed) vm_colors; color++) {
2339                 queue_init(&free_list[color]);
2340         }
2341
2342         while (mem) {
2343                 assert(!mem->inactive);
2344                 assert(!mem->active);
2345                 assert(!mem->throttled);
2346                 assert(!mem->free);
2347                 assert(!mem->speculative);
2348                 assert(!VM_PAGE_WIRED(mem));
2349                 assert(mem->pageq.prev == NULL);
2350
2351                 nxt = (vm_page_t)(mem->pageq.next);
2352
2353                 if (prepare_object == TRUE)
2354                         vm_page_free_prepare_object(mem, TRUE);
2355
2356                 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2357                         assert(pmap_verify_free(mem->phys_page));
2358                 }
2359
2360                 if (!mem->fictitious) {
2361                         assert(mem->busy);
2362                         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2363                             vm_lopage_free_count < vm_lopage_free_limit &&
2364                             mem->phys_page < max_valid_low_ppnum) {
2365                                 mem->pageq.next = NULL;
2366                                 vm_page_release(mem);
2367                         } else {
2368
2369                         /*
2370                          * IMPORTANT: we can't set the page "free" here
2371                          * because that would make the page eligible for
2372                          * a physically-contiguous allocation (see
2373                          * vm_page_find_contiguous()) right away (we don't
2374                          * hold the vm_page_queue_free lock).  That would
2375                          * cause trouble because the page is not actually
2376                          * in the free queue yet...
2377                          */
2378                                 color = mem->phys_page & vm_color_mask;
2379                                 if (queue_empty(&free_list[color])) {
2380                                         inuse[color] = inuse_list_head;
2381                                         inuse_list_head = color;
2382                                 }
2383                                 queue_enter_first(&free_list[color],
2384                                                   mem,
2385                                                   vm_page_t,
2386                                                   pageq);
2387                                 pg_count++;
2388
2389                                 pmap_clear_noencrypt(mem->phys_page);
2390                         }
2391                 } else {
2392                         assert(mem->phys_page == vm_page_fictitious_addr ||
2393                                mem->phys_page == vm_page_guard_addr);
2394                         vm_page_release_fictitious(mem);
2395                 }
2396                 mem = nxt;
2397         }
2398         if (pg_count) {
2399                 unsigned int    avail_free_count;
2400                 unsigned int    need_wakeup = 0;
2401                 unsigned int    need_priv_wakeup = 0;
2402
2403                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2404
2405                 color = inuse_list_head;
2406
2407                 while( color != -1 ) {
2408                         vm_page_t first, last;
2409                         vm_page_t first_free;
2410
2411                         /*
2412                          * Now that we hold the vm_page_queue_free lock,
2413                          * it's safe to mark all pages in our local queue
2414                          * as "free"...
2415                          */
2416                         queue_iterate(&free_list[color],
2417                                       mem,
2418                                       vm_page_t,
2419                                       pageq) {
2420                                 assert(!mem->free);
2421                                 assert(mem->busy);
2422                                 mem->free = TRUE;
2423                         }
2424
2425                         /*
2426                          * ... and insert our local queue at the head of
2427                          * the global free queue.
2428                          */
2429                         first = (vm_page_t) queue_first(&free_list[color]);
2430                         last = (vm_page_t) queue_last(&free_list[color]);
2431                         first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2432                         if (queue_empty(&vm_page_queue_free[color])) {
2433                                 queue_last(&vm_page_queue_free[color]) =
2434                                         (queue_entry_t) last;
2435                         } else {
2436                                 queue_prev(&first_free->pageq) =
2437                                         (queue_entry_t) last;
2438                         }
2439                         queue_first(&vm_page_queue_free[color]) =
2440                                 (queue_entry_t) first;
2441                         queue_prev(&first->pageq) =
2442                                 (queue_entry_t) &vm_page_queue_free[color];
2443                         queue_next(&last->pageq) =
2444                                 (queue_entry_t) first_free;
2445
2446                         /* next color */
2447                         color = inuse[color];
2448                 }
2449
2450                 vm_page_free_count += pg_count;
2451                 avail_free_count = vm_page_free_count;
2452
2453                 if (vm_page_free_wanted_privileged > 0 &&
2454                     avail_free_count > 0) {
2455                         if (avail_free_count < vm_page_free_wanted_privileged) {
2456                                 need_priv_wakeup = avail_free_count;
2457                                 vm_page_free_wanted_privileged -=
2458                                         avail_free_count;
2459                                 avail_free_count = 0;
2460                         } else {
2461                                 need_priv_wakeup = vm_page_free_wanted_privileged;
2462                                 vm_page_free_wanted_privileged = 0;
2463                                 avail_free_count -=
2464                                         vm_page_free_wanted_privileged;
2465                         }
2466                 }
2467
2468                 if (vm_page_free_wanted > 0 &&
2469                     avail_free_count > vm_page_free_reserved) {
2470                         unsigned int  available_pages;
2471
2472                         available_pages = (avail_free_count -
2473                                            vm_page_free_reserved);
2474
2475                         if (available_pages >= vm_page_free_wanted) {
2476                                 need_wakeup = vm_page_free_wanted;
2477                                 vm_page_free_wanted = 0;
2478                         } else {
2479                                 need_wakeup = available_pages;
2480                                 vm_page_free_wanted -= available_pages;
2481                         }
2482                 }
2483                 lck_mtx_unlock(&vm_page_queue_free_lock);
2484
2485                 if (need_priv_wakeup != 0) {
2486                         /*
2487                          * There shouldn't be that many VM-privileged threads,
2488                          * so let's wake them all up, even if we don't quite
2489                          * have enough pages to satisfy them all.
2490                          */
2491                         thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2492                 }
2493                 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2494                         /*
2495                          * We don't expect to have any more waiters
2496                          * after this, so let's wake them all up at
2497                          * once.
2498                          */
2499                         thread_wakeup((event_t) &vm_page_free_count);
2500                 } else for (; need_wakeup != 0; need_wakeup--) {
2501                         /*
2502                          * Wake up one waiter per page we just released.
2503                          */
2504                         thread_wakeup_one((event_t) &vm_page_free_count);
2505                 }
2506
2507                 VM_CHECK_MEMORYSTATUS;
2508         }
2509 }
2510
2511
2512 /*
2513  *      vm_page_wire:
2514  *
2515  *      Mark this page as wired down by yet
2516  *      another map, removing it from paging queues
2517  *      as necessary.
2518  *
2519  *      The page's object and the page queues must be locked.
2520  */
2521 void
2522 vm_page_wire(
2523         register vm_page_t      mem)
2524 {
2525
2526 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2527
2528         VM_PAGE_CHECK(mem);
2529         if (mem->object) {
2530                 vm_object_lock_assert_exclusive(mem->object);
2531         } else {
2532                 /*
2533                  * In theory, the page should be in an object before it
2534                  * gets wired, since we need to hold the object lock
2535                  * to update some fields in the page structure.
2536                  * However, some code (i386 pmap, for example) might want
2537                  * to wire a page before it gets inserted into an object.
2538                  * That's somewhat OK, as long as nobody else can get to
2539                  * that page and update it at the same time.
2540                  */
2541         }
2542 #if DEBUG
2543         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2544 #endif
2545         if ( !VM_PAGE_WIRED(mem)) {
2546                 VM_PAGE_QUEUES_REMOVE(mem);
2547
2548                 if (mem->object) {
2549                         mem->object->wired_page_count++;
2550                         assert(mem->object->resident_page_count >=
2551                                mem->object->wired_page_count);
2552                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2553                                 assert(vm_page_purgeable_count > 0);
2554                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2555                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2556                         }
2557                         if (mem->object->all_reusable) {
2558                                 /*
2559                                  * Wired pages are not counted as "re-usable"
2560                                  * in "all_reusable" VM objects, so nothing
2561                                  * to do here.
2562                                  */
2563                         } else if (mem->reusable) {
2564                                 /*
2565                                  * This page is not "re-usable" when it's
2566                                  * wired, so adjust its state and the
2567                                  * accounting.
2568                                  */
2569                                 vm_object_reuse_pages(mem->object,
2570                                                       mem->offset,
2571                                                       mem->offset+PAGE_SIZE_64,
2572                                                       FALSE);
2573                         }
2574                 }
2575                 assert(!mem->reusable);
2576
2577                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2578                         vm_page_wire_count++;
2579                 if (mem->gobbled)
2580                         vm_page_gobble_count--;
2581                 mem->gobbled = FALSE;
2582                 if (mem->zero_fill == TRUE) {
2583                         mem->zero_fill = FALSE;
2584                         VM_ZF_COUNT_DECR();
2585                 }
2586
2587                 VM_CHECK_MEMORYSTATUS;
2588
2589                 /*
2590                  * ENCRYPTED SWAP:
2591                  * The page could be encrypted, but
2592                  * We don't have to decrypt it here
2593                  * because we don't guarantee that the
2594                  * data is actually valid at this point.
2595                  * The page will get decrypted in
2596                  * vm_fault_wire() if needed.
2597                  */
2598         }
2599         assert(!mem->gobbled);
2600         mem->wire_count++;
2601         VM_PAGE_CHECK(mem);
2602 }
2603
2604 /*
2605  *      vm_page_gobble:
2606  *
2607  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2608  *
2609  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2610  */
2611 void
2612 vm_page_gobble(
2613         register vm_page_t      mem)
2614 {
2615         vm_page_lockspin_queues();
2616         VM_PAGE_CHECK(mem);
2617
2618         assert(!mem->gobbled);
2619         assert( !VM_PAGE_WIRED(mem));
2620
2621         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2622                 if (!mem->private && !mem->fictitious)
2623                         vm_page_wire_count++;
2624         }
2625         vm_page_gobble_count++;
2626         mem->gobbled = TRUE;
2627         vm_page_unlock_queues();
2628 }
2629
2630 /*
2631  *      vm_page_unwire:
2632  *
2633  *      Release one wiring of this page, potentially
2634  *      enabling it to be paged again.
2635  *
2636  *      The page's object and the page queues must be locked.
2637  */
2638 void
2639 vm_page_unwire(
2640         vm_page_t       mem,
2641         boolean_t       queueit)
2642 {
2643
2644 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2645
2646         VM_PAGE_CHECK(mem);
2647         assert(VM_PAGE_WIRED(mem));
2648         assert(mem->object != VM_OBJECT_NULL);
2649 #if DEBUG
2650         vm_object_lock_assert_exclusive(mem->object);
2651         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2652 #endif
2653         if (--mem->wire_count == 0) {
2654                 assert(!mem->private && !mem->fictitious);
2655                 vm_page_wire_count--;
2656                 assert(mem->object->wired_page_count > 0);
2657                 mem->object->wired_page_count--;
2658                 assert(mem->object->resident_page_count >=
2659                        mem->object->wired_page_count);
2660                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2661                         OSAddAtomic(+1, &vm_page_purgeable_count);
2662                         assert(vm_page_purgeable_wired_count > 0);
2663                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2664                 }
2665                 assert(!mem->laundry);
2666                 assert(mem->object != kernel_object);
2667                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2668
2669                 if (queueit == TRUE) {
2670                         if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2671                                 vm_page_deactivate(mem);
2672                         } else {
2673                                 vm_page_activate(mem);
2674                         }
2675                 }
2676
2677                 VM_CHECK_MEMORYSTATUS;
2678
2679         }
2680         VM_PAGE_CHECK(mem);
2681 }
2682
2683 /*
2684  *      vm_page_deactivate:
2685  *
2686  *      Returns the given page to the inactive list,
2687  *      indicating that no physical maps have access
2688  *      to this page.  [Used by the physical mapping system.]
2689  *
2690  *      The page queues must be locked.
2691  */
2692 void
2693 vm_page_deactivate(
2694         vm_page_t       m)
2695 {
2696         vm_page_deactivate_internal(m, TRUE);
2697 }
2698
2699
2700 void
2701 vm_page_deactivate_internal(
2702         vm_page_t       m,
2703         boolean_t       clear_hw_reference)
2704 {
2705
2706         VM_PAGE_CHECK(m);
2707         assert(m->object != kernel_object);
2708         assert(m->phys_page != vm_page_guard_addr);
2709
2710 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
2711 #if DEBUG
2712         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2713 #endif
2714         /*
2715          *      This page is no longer very interesting.  If it was
2716          *      interesting (active or inactive/referenced), then we
2717          *      clear the reference bit and (re)enter it in the
2718          *      inactive queue.  Note wired pages should not have
2719          *      their reference bit cleared.
2720          */
2721         assert ( !(m->absent && !m->unusual));
2722
2723         if (m->gobbled) {               /* can this happen? */
2724                 assert( !VM_PAGE_WIRED(m));
2725
2726                 if (!m->private && !m->fictitious)
2727                         vm_page_wire_count--;
2728                 vm_page_gobble_count--;
2729                 m->gobbled = FALSE;
2730         }
2731         if (m->private || m->fictitious || (VM_PAGE_WIRED(m)))
2732                 return;
2733
2734         if (!m->absent && clear_hw_reference == TRUE)
2735                 pmap_clear_reference(m->phys_page);
2736
2737         m->reference = FALSE;
2738         m->no_cache = FALSE;
2739
2740         if (!m->inactive) {
2741                 VM_PAGE_QUEUES_REMOVE(m);
2742
2743                 assert(!m->laundry);
2744                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2745
2746                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2747                     m->dirty && m->object->internal &&
2748                     (m->object->purgable == VM_PURGABLE_DENY ||
2749                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2750                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2751                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2752                         m->throttled = TRUE;
2753                         vm_page_throttled_count++;
2754                 } else {
2755                         if (m->object->named && m->object->ref_count == 1) {
2756                                 vm_page_speculate(m, FALSE);
2757 #if DEVELOPMENT || DEBUG
2758                                 vm_page_speculative_recreated++;
2759 #endif
2760                         } else {
2761                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2762                         }
2763                 }
2764         }
2765 }
2766
2767 /*
2768  *      vm_page_activate:
2769  *
2770  *      Put the specified page on the active list (if appropriate).
2771  *
2772  *      The page queues must be locked.
2773  */
2774
2775 void
2776 vm_page_activate(
2777         register vm_page_t      m)
2778 {
2779         VM_PAGE_CHECK(m);
2780 #ifdef  FIXME_4778297
2781         assert(m->object != kernel_object);
2782 #endif
2783         assert(m->phys_page != vm_page_guard_addr);
2784 #if DEBUG
2785         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2786 #endif
2787         assert( !(m->absent && !m->unusual));
2788
2789         if (m->gobbled) {
2790                 assert( !VM_PAGE_WIRED(m));
2791                 if (!m->private && !m->fictitious)
2792                         vm_page_wire_count--;
2793                 vm_page_gobble_count--;
2794                 m->gobbled = FALSE;
2795         }
2796         if (m->private || m->fictitious)
2797                 return;
2798
2799 #if DEBUG
2800         if (m->active)
2801                 panic("vm_page_activate: already active");
2802 #endif
2803
2804         if (m->speculative) {
2805                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2806                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2807         }
2808
2809         VM_PAGE_QUEUES_REMOVE(m);
2810
2811         if ( !VM_PAGE_WIRED(m)) {
2812                 assert(!m->laundry);
2813                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2814                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2815                     m->dirty && m->object->internal &&
2816                     (m->object->purgable == VM_PURGABLE_DENY ||
2817                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2818                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2819                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2820                         m->throttled = TRUE;
2821                         vm_page_throttled_count++;
2822                 } else {
2823                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2824                         m->active = TRUE;
2825                         vm_page_active_count++;
2826                 }
2827                 m->reference = TRUE;
2828                 m->no_cache = FALSE;
2829         }
2830         VM_PAGE_CHECK(m);
2831 }
2832
2833
2834 /*
2835  *      vm_page_speculate:
2836  *
2837  *      Put the specified page on the speculative list (if appropriate).
2838  *
2839  *      The page queues must be locked.
2840  */
2841 void
2842 vm_page_speculate(
2843         vm_page_t       m,
2844         boolean_t       new)
2845 {
2846         struct vm_speculative_age_q     *aq;
2847
2848         VM_PAGE_CHECK(m);
2849         assert(m->object != kernel_object);
2850         assert(m->phys_page != vm_page_guard_addr);
2851 #if DEBUG
2852         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2853 #endif
2854         assert( !(m->absent && !m->unusual));
2855
2856         if (m->private || m->fictitious)
2857                 return;
2858
2859         VM_PAGE_QUEUES_REMOVE(m);
2860
2861         if ( !VM_PAGE_WIRED(m)) {
2862                 mach_timespec_t         ts;
2863                 clock_sec_t sec;
2864                 clock_nsec_t nsec;
2865
2866                 clock_get_system_nanotime(&sec, &nsec);
2867                 ts.tv_sec = (unsigned int) sec;
2868                 ts.tv_nsec = nsec;
2869
2870                 if (vm_page_speculative_count == 0) {
2871
2872                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2873                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2874
2875                         aq = &vm_page_queue_speculative[speculative_age_index];
2876
2877                         /*
2878                          * set the timer to begin a new group
2879                          */
2880                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2881                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2882
2883                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2884                 } else {
2885                         aq = &vm_page_queue_speculative[speculative_age_index];
2886
2887                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2888
2889                                 speculative_age_index++;
2890
2891                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2892                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2893                                 if (speculative_age_index == speculative_steal_index) {
2894                                         speculative_steal_index = speculative_age_index + 1;
2895
2896                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2897                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2898                                 }
2899                                 aq = &vm_page_queue_speculative[speculative_age_index];
2900
2901                                 if (!queue_empty(&aq->age_q))
2902                                         vm_page_speculate_ageit(aq);
2903
2904                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2905                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2906
2907                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2908                         }
2909                 }
2910                 enqueue_tail(&aq->age_q, &m->pageq);
2911                 m->speculative = TRUE;
2912                 vm_page_speculative_count++;
2913
2914                 if (new == TRUE) {
2915                         vm_object_lock_assert_exclusive(m->object);
2916
2917                         m->object->pages_created++;
2918 #if DEVELOPMENT || DEBUG
2919                         vm_page_speculative_created++;
2920 #endif
2921                 }
2922         }
2923         VM_PAGE_CHECK(m);
2924 }
2925
2926
2927 /*
2928  * move pages from the specified aging bin to
2929  * the speculative bin that pageout_scan claims from
2930  *
2931  *      The page queues must be locked.
2932  */
2933 void
2934 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2935 {
2936         struct vm_speculative_age_q     *sq;
2937         vm_page_t       t;
2938
2939         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2940
2941         if (queue_empty(&sq->age_q)) {
2942                 sq->age_q.next = aq->age_q.next;
2943                 sq->age_q.prev = aq->age_q.prev;
2944
2945                 t = (vm_page_t)sq->age_q.next;
2946                 t->pageq.prev = &sq->age_q;
2947
2948                 t = (vm_page_t)sq->age_q.prev;
2949                 t->pageq.next = &sq->age_q;
2950         } else {
2951                 t = (vm_page_t)sq->age_q.prev;
2952                 t->pageq.next = aq->age_q.next;
2953
2954                 t = (vm_page_t)aq->age_q.next;
2955                 t->pageq.prev = sq->age_q.prev;
2956
2957                 t = (vm_page_t)aq->age_q.prev;
2958                 t->pageq.next = &sq->age_q;
2959
2960                 sq->age_q.prev = aq->age_q.prev;
2961         }
2962         queue_init(&aq->age_q);
2963 }
2964
2965
2966 void
2967 vm_page_lru(
2968         vm_page_t       m)
2969 {
2970         VM_PAGE_CHECK(m);
2971         assert(m->object != kernel_object);
2972         assert(m->phys_page != vm_page_guard_addr);
2973
2974 #if DEBUG
2975         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2976 #endif
2977         if (m->active || m->reference)
2978                 return;
2979
2980         if (m->private || (VM_PAGE_WIRED(m)))
2981                 return;
2982
2983         m->no_cache = FALSE;
2984
2985         VM_PAGE_QUEUES_REMOVE(m);
2986
2987         assert(!m->laundry);
2988         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2989
2990         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2991 }
2992
2993
2994 void
2995 vm_page_reactivate_all_throttled(void)
2996 {
2997         vm_page_t       first_throttled, last_throttled;
2998         vm_page_t       first_active;
2999         vm_page_t       m;
3000         int             extra_active_count;
3001
3002         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3003                 return;
3004
3005         extra_active_count = 0;
3006         vm_page_lock_queues();
3007         if (! queue_empty(&vm_page_queue_throttled)) {
3008                 /*
3009                  * Switch "throttled" pages to "active".
3010                  */
3011                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3012                         VM_PAGE_CHECK(m);
3013                         assert(m->throttled);
3014                         assert(!m->active);
3015                         assert(!m->inactive);
3016                         assert(!m->speculative);
3017                         assert(!VM_PAGE_WIRED(m));
3018
3019                         extra_active_count++;
3020
3021                         m->throttled = FALSE;
3022                         m->active = TRUE;
3023                         VM_PAGE_CHECK(m);
3024                 }
3025
3026                 /*
3027                  * Transfer the entire throttled queue to a regular LRU page queues.
3028                  * We insert it at the head of the active queue, so that these pages
3029                  * get re-evaluated by the LRU algorithm first, since they've been
3030                  * completely out of it until now.
3031                  */
3032                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3033                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3034                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3035                 if (queue_empty(&vm_page_queue_active)) {
3036                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3037                 } else {
3038                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3039                 }
3040                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3041                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3042                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3043
3044 #if DEBUG
3045                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3046 #endif
3047                 queue_init(&vm_page_queue_throttled);
3048                 /*
3049                  * Adjust the global page counts.
3050                  */
3051                 vm_page_active_count += extra_active_count;
3052                 vm_page_throttled_count = 0;
3053         }
3054         assert(vm_page_throttled_count == 0);
3055         assert(queue_empty(&vm_page_queue_throttled));
3056         vm_page_unlock_queues();
3057 }
3058
3059
3060 /*
3061  * move pages from the indicated local queue to the global active queue
3062  * its ok to fail if we're below the hard limit and force == FALSE
3063  * the nolocks == TRUE case is to allow this function to be run on
3064  * the hibernate path
3065  */
3066
3067 void
3068 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3069 {
3070         struct vpl      *lq;
3071         vm_page_t       first_local, last_local;
3072         vm_page_t       first_active;
3073         vm_page_t       m;
3074         uint32_t        count = 0;
3075
3076         if (vm_page_local_q == NULL)
3077                 return;
3078
3079         lq = &vm_page_local_q[lid].vpl_un.vpl;
3080
3081         if (nolocks == FALSE) {
3082                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3083                         if ( !vm_page_trylockspin_queues())
3084                                 return;
3085                 } else
3086                         vm_page_lockspin_queues();
3087
3088                 VPL_LOCK(&lq->vpl_lock);
3089         }
3090         if (lq->vpl_count) {
3091                 /*
3092                  * Switch "local" pages to "active".
3093                  */
3094                 assert(!queue_empty(&lq->vpl_queue));
3095
3096                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3097                         VM_PAGE_CHECK(m);
3098                         assert(m->local);
3099                         assert(!m->active);
3100                         assert(!m->inactive);
3101                         assert(!m->speculative);
3102                         assert(!VM_PAGE_WIRED(m));
3103                         assert(!m->throttled);
3104                         assert(!m->fictitious);
3105
3106                         if (m->local_id != lid)
3107                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3108
3109                         m->local_id = 0;
3110                         m->local = FALSE;
3111                         m->active = TRUE;
3112                         VM_PAGE_CHECK(m);
3113
3114                         count++;
3115                 }
3116                 if (count != lq->vpl_count)
3117                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3118
3119                 /*
3120                  * Transfer the entire local queue to a regular LRU page queues.
3121                  */
3122                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3123                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3124                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3125
3126                 if (queue_empty(&vm_page_queue_active)) {
3127                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3128                 } else {
3129                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3130                 }
3131                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3132                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3133                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3134
3135                 queue_init(&lq->vpl_queue);
3136                 /*
3137                  * Adjust the global page counts.
3138                  */
3139                 vm_page_active_count += lq->vpl_count;
3140                 lq->vpl_count = 0;
3141         }
3142         assert(queue_empty(&lq->vpl_queue));
3143
3144         if (nolocks == FALSE) {
3145                 VPL_UNLOCK(&lq->vpl_lock);
3146                 vm_page_unlock_queues();
3147         }
3148 }
3149
3150 /*
3151  *      vm_page_part_zero_fill:
3152  *
3153  *      Zero-fill a part of the page.
3154  */
3155 void
3156 vm_page_part_zero_fill(
3157         vm_page_t       m,
3158         vm_offset_t     m_pa,
3159         vm_size_t       len)
3160 {
3161         vm_page_t       tmp;
3162
3163         VM_PAGE_CHECK(m);
3164 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3165         pmap_zero_part_page(m->phys_page, m_pa, len);
3166 #else
3167         while (1) {
3168                 tmp = vm_page_grab();
3169                 if (tmp == VM_PAGE_NULL) {
3170                         vm_page_wait(THREAD_UNINT);
3171                         continue;
3172                 }
3173                 break;
3174         }
3175         vm_page_zero_fill(tmp);
3176         if(m_pa != 0) {
3177                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3178         }
3179         if((m_pa + len) <  PAGE_SIZE) {
3180                 vm_page_part_copy(m, m_pa + len, tmp,
3181                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3182         }
3183         vm_page_copy(tmp,m);
3184         VM_PAGE_FREE(tmp);
3185 #endif
3186
3187 }
3188
3189 /*
3190  *      vm_page_zero_fill:
3191  *
3192  *      Zero-fill the specified page.
3193  */
3194 void
3195 vm_page_zero_fill(
3196         vm_page_t       m)
3197 {
3198         XPR(XPR_VM_PAGE,
3199                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3200                 m->object, m->offset, m, 0,0);
3201
3202         VM_PAGE_CHECK(m);
3203
3204 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3205         pmap_zero_page(m->phys_page);
3206 }
3207
3208 /*
3209  *      vm_page_part_copy:
3210  *
3211  *      copy part of one page to another
3212  */
3213
3214 void
3215 vm_page_part_copy(
3216         vm_page_t       src_m,
3217         vm_offset_t     src_pa,
3218         vm_page_t       dst_m,
3219         vm_offset_t     dst_pa,
3220         vm_size_t       len)
3221 {
3222         VM_PAGE_CHECK(src_m);
3223         VM_PAGE_CHECK(dst_m);
3224
3225         pmap_copy_part_page(src_m->phys_page, src_pa,
3226                         dst_m->phys_page, dst_pa, len);
3227 }
3228
3229 /*
3230  *      vm_page_copy:
3231  *
3232  *      Copy one page to another
3233  *
3234  * ENCRYPTED SWAP:
3235  * The source page should not be encrypted.  The caller should
3236  * make sure the page is decrypted first, if necessary.
3237  */
3238
3239 int vm_page_copy_cs_validations = 0;
3240 int vm_page_copy_cs_tainted = 0;
3241
3242 void
3243 vm_page_copy(
3244         vm_page_t       src_m,
3245         vm_page_t       dest_m)
3246 {
3247         XPR(XPR_VM_PAGE,
3248         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3249         src_m->object, src_m->offset,
3250         dest_m->object, dest_m->offset,
3251         0);
3252
3253         VM_PAGE_CHECK(src_m);
3254         VM_PAGE_CHECK(dest_m);
3255
3256         /*
3257          * ENCRYPTED SWAP:
3258          * The source page should not be encrypted at this point.
3259          * The destination page will therefore not contain encrypted
3260          * data after the copy.
3261          */
3262         if (src_m->encrypted) {
3263                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3264         }
3265         dest_m->encrypted = FALSE;
3266
3267         if (src_m->object != VM_OBJECT_NULL &&
3268             src_m->object->code_signed) {
3269                 /*
3270                  * We're copying a page from a code-signed object.
3271                  * Whoever ends up mapping the copy page might care about
3272                  * the original page's integrity, so let's validate the
3273                  * source page now.
3274                  */
3275                 vm_page_copy_cs_validations++;
3276                 vm_page_validate_cs(src_m);
3277         }
3278
3279         if (vm_page_is_slideable(src_m)) {
3280                 boolean_t was_busy = src_m->busy;
3281                 src_m->busy = TRUE;
3282                 (void) vm_page_slide(src_m, 0);
3283                 assert(src_m->busy);
3284                 if(!was_busy) {
3285                         PAGE_WAKEUP_DONE(src_m);
3286                 }
3287         }
3288
3289         /*
3290          * Propagate the cs_tainted bit to the copy page. Do not propagate
3291          * the cs_validated bit.
3292          */
3293         dest_m->cs_tainted = src_m->cs_tainted;
3294         if (dest_m->cs_tainted) {
3295                 vm_page_copy_cs_tainted++;
3296         }
3297         dest_m->slid = src_m->slid;
3298         dest_m->error = src_m->error; /* sliding src_m might have failed... */
3299         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3300 }
3301
3302 #if MACH_ASSERT
3303 static void
3304 _vm_page_print(
3305         vm_page_t       p)
3306 {
3307         printf("vm_page %p: \n", p);
3308         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3309         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3310         printf("  next=%p\n", p->next);
3311         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3312         printf("  wire_count=%u\n", p->wire_count);
3313
3314         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3315                (p->local ? "" : "!"),
3316                (p->inactive ? "" : "!"),
3317                (p->active ? "" : "!"),
3318                (p->pageout_queue ? "" : "!"),
3319                (p->speculative ? "" : "!"),
3320                (p->laundry ? "" : "!"));
3321         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3322                (p->free ? "" : "!"),
3323                (p->reference ? "" : "!"),
3324                (p->gobbled ? "" : "!"),
3325                (p->private ? "" : "!"),
3326                (p->throttled ? "" : "!"));
3327         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3328                 (p->busy ? "" : "!"),
3329                 (p->wanted ? "" : "!"),
3330                 (p->tabled ? "" : "!"),
3331                 (p->fictitious ? "" : "!"),
3332                 (p->pmapped ? "" : "!"),
3333                 (p->wpmapped ? "" : "!"));
3334         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3335                (p->pageout ? "" : "!"),
3336                (p->absent ? "" : "!"),
3337                (p->error ? "" : "!"),
3338                (p->dirty ? "" : "!"),
3339                (p->cleaning ? "" : "!"),
3340                (p->precious ? "" : "!"),
3341                (p->clustered ? "" : "!"));
3342         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3343                (p->overwriting ? "" : "!"),
3344                (p->restart ? "" : "!"),
3345                (p->unusual ? "" : "!"),
3346                (p->encrypted ? "" : "!"),
3347                (p->encrypted_cleaning ? "" : "!"));
3348         printf("  %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3349                (p->list_req_pending ? "" : "!"),
3350                (p->dump_cleaning ? "" : "!"),
3351                (p->cs_validated ? "" : "!"),
3352                (p->cs_tainted ? "" : "!"),
3353                (p->no_cache ? "" : "!"));
3354         printf("  %szero_fill\n",
3355                (p->zero_fill ? "" : "!"));
3356
3357         printf("phys_page=0x%x\n", p->phys_page);
3358 }
3359
3360 /*
3361  *      Check that the list of pages is ordered by
3362  *      ascending physical address and has no holes.
3363  */
3364 static int
3365 vm_page_verify_contiguous(
3366         vm_page_t       pages,
3367         unsigned int    npages)
3368 {
3369         register vm_page_t      m;
3370         unsigned int            page_count;
3371         vm_offset_t             prev_addr;
3372
3373         prev_addr = pages->phys_page;
3374         page_count = 1;
3375         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3376                 if (m->phys_page != prev_addr + 1) {
3377                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3378                                m, (long)prev_addr, m->phys_page);
3379                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3380                         panic("vm_page_verify_contiguous:  not contiguous!");
3381                 }
3382                 prev_addr = m->phys_page;
3383                 ++page_count;
3384         }
3385         if (page_count != npages) {
3386                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3387                        pages, page_count, npages);
3388                 panic("vm_page_verify_contiguous:  count error");
3389         }
3390         return 1;
3391 }
3392
3393
3394 /*
3395  *      Check the free lists for proper length etc.
3396  */
3397 static unsigned int
3398 vm_page_verify_free_list(
3399         queue_head_t    *vm_page_queue,
3400         unsigned int    color,
3401         vm_page_t       look_for_page,
3402         boolean_t       expect_page)
3403 {
3404         unsigned int    npages;
3405         vm_page_t       m;
3406         vm_page_t       prev_m;
3407         boolean_t       found_page;
3408
3409         found_page = FALSE;
3410         npages = 0;
3411         prev_m = (vm_page_t) vm_page_queue;
3412         queue_iterate(vm_page_queue,
3413                       m,
3414                       vm_page_t,
3415                       pageq) {
3416
3417                 if (m == look_for_page) {
3418                         found_page = TRUE;
3419                 }
3420                 if ((vm_page_t) m->pageq.prev != prev_m)
3421                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3422                               color, npages, m, m->pageq.prev, prev_m);
3423                 if ( ! m->busy )
3424                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3425                               color, npages, m);
3426                 if (color != (unsigned int) -1) {
3427                         if ((m->phys_page & vm_color_mask) != color)
3428                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3429                                       color, npages, m, m->phys_page & vm_color_mask, color);
3430                         if ( ! m->free )
3431                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3432                                       color, npages, m);
3433                 }
3434                 ++npages;
3435                 prev_m = m;
3436         }
3437         if (look_for_page != VM_PAGE_NULL) {
3438                 unsigned int other_color;
3439
3440                 if (expect_page && !found_page) {
3441                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3442                                color, npages, look_for_page, look_for_page->phys_page);
3443                         _vm_page_print(look_for_page);
3444                         for (other_color = 0;
3445                              other_color < vm_colors;
3446                              other_color++) {
3447                                 if (other_color == color)
3448                                         continue;
3449                                 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3450                                                          other_color, look_for_page, FALSE);
3451                         }
3452                         if (color == (unsigned int) -1) {
3453                                 vm_page_verify_free_list(&vm_lopage_queue_free,
3454                                                          (unsigned int) -1, look_for_page, FALSE);
3455                         }
3456                         panic("vm_page_verify_free_list(color=%u)\n", color);
3457                 }
3458                 if (!expect_page && found_page) {
3459                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3460                                color, npages, look_for_page, look_for_page->phys_page);
3461                 }
3462         }
3463         return npages;
3464 }
3465
3466 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3467 static void
3468 vm_page_verify_free_lists( void )
3469 {
3470         unsigned int    color, npages, nlopages;
3471
3472         if (! vm_page_verify_free_lists_enabled)
3473                 return;
3474
3475         npages = 0;
3476
3477         lck_mtx_lock(&vm_page_queue_free_lock);
3478
3479         for( color = 0; color < vm_colors; color++ ) {
3480                 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3481                                                    color, VM_PAGE_NULL, FALSE);
3482         }
3483         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3484                                             (unsigned int) -1,
3485                                             VM_PAGE_NULL, FALSE);
3486         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3487                 panic("vm_page_verify_free_lists:  "
3488                       "npages %u free_count %d nlopages %u lo_free_count %u",
3489                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3490
3491         lck_mtx_unlock(&vm_page_queue_free_lock);
3492 }
3493
3494 void
3495 vm_page_queues_assert(
3496         vm_page_t       mem,
3497         int             val)
3498 {
3499         if (mem->free + mem->active + mem->inactive + mem->speculative +
3500             mem->throttled + mem->pageout_queue > (val)) {
3501                 _vm_page_print(mem);
3502                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3503         }
3504         if (VM_PAGE_WIRED(mem)) {
3505                 assert(!mem->active);
3506                 assert(!mem->inactive);
3507                 assert(!mem->speculative);
3508                 assert(!mem->throttled);
3509         }
3510 }
3511 #endif  /* MACH_ASSERT */
3512
3513
3514 /*
3515  *      CONTIGUOUS PAGE ALLOCATION
3516  *
3517  *      Find a region large enough to contain at least n pages
3518  *      of contiguous physical memory.
3519  *
3520  *      This is done by traversing the vm_page_t array in a linear fashion
3521  *      we assume that the vm_page_t array has the avaiable physical pages in an
3522  *      ordered, ascending list... this is currently true of all our implementations
3523  *      and must remain so... there can be 'holes' in the array...  we also can
3524  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3525  *      which use to happen via 'vm_page_convert'... that function was no longer
3526  *      being called and was removed...
3527  *
3528  *      The basic flow consists of stabilizing some of the interesting state of
3529  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3530  *      sweep at the beginning of the array looking for pages that meet our criterea
3531  *      for a 'stealable' page... currently we are pretty conservative... if the page
3532  *      meets this criterea and is physically contiguous to the previous page in the 'run'
3533  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
3534  *      and start to develop a new run... if at this point we've already considered
3535  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3536  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
3537  *      to other threads trying to acquire free pages (or move pages from q to q),
3538  *      and then continue from the spot we left off... we only make 1 pass through the
3539  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
3540  *      which steals the pages from the queues they're currently on... pages on the free
3541  *      queue can be stolen directly... pages that are on any of the other queues
3542  *      must be removed from the object they are tabled on... this requires taking the
3543  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3544  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
3545  *      dump the pages we've currently stolen back to the free list, and pick up our
3546  *      scan from the point where we aborted the 'current' run.
3547  *
3548  *
3549  *      Requirements:
3550  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
3551  *
3552  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3553  *
3554  * Algorithm:
3555  */
3556
3557 #define MAX_CONSIDERED_BEFORE_YIELD     1000
3558
3559
3560 #define RESET_STATE_OF_RUN()    \
3561         MACRO_BEGIN             \
3562         prevcontaddr = -2;      \
3563         start_pnum = -1;        \
3564         free_considered = 0;    \
3565         substitute_needed = 0;  \
3566         npages = 0;             \
3567         MACRO_END
3568
3569 /*
3570  * Can we steal in-use (i.e. not free) pages when searching for
3571  * physically-contiguous pages ?
3572  */
3573 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3574
3575 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
3576 #if DEBUG
3577 int vm_page_find_contig_debug = 0;
3578 #endif
3579
3580 static vm_page_t
3581 vm_page_find_contiguous(
3582         unsigned int    contig_pages,
3583         ppnum_t         max_pnum,
3584         ppnum_t     pnum_mask,
3585         boolean_t       wire,
3586         int             flags)
3587 {
3588         vm_page_t       m = NULL;
3589         ppnum_t         prevcontaddr;
3590         ppnum_t         start_pnum;
3591         unsigned int    npages, considered, scanned;
3592         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
3593         unsigned int    idx_last_contig_page_found = 0;
3594         int             free_considered, free_available;
3595         int             substitute_needed;
3596         boolean_t       wrapped;
3597 #if DEBUG
3598         clock_sec_t     tv_start_sec, tv_end_sec;
3599         clock_usec_t    tv_start_usec, tv_end_usec;
3600 #endif
3601 #if MACH_ASSERT
3602         int             yielded = 0;
3603         int             dumped_run = 0;
3604         int             stolen_pages = 0;
3605 #endif
3606
3607         if (contig_pages == 0)
3608                 return VM_PAGE_NULL;
3609
3610 #if MACH_ASSERT
3611         vm_page_verify_free_lists();
3612 #endif
3613 #if DEBUG
3614         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3615 #endif
3616         vm_page_lock_queues();
3617         lck_mtx_lock(&vm_page_queue_free_lock);
3618
3619         RESET_STATE_OF_RUN();
3620
3621         scanned = 0;
3622         considered = 0;
3623         free_available = vm_page_free_count - vm_page_free_reserved;
3624
3625         wrapped = FALSE;
3626
3627         if(flags & KMA_LOMEM)
3628                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3629         else
3630                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
3631
3632         orig_last_idx = idx_last_contig_page_found;
3633         last_idx = orig_last_idx;
3634
3635         for (page_idx = last_idx, start_idx = last_idx;
3636              npages < contig_pages && page_idx < vm_pages_count;
3637              page_idx++) {
3638 retry:
3639                 if (wrapped &&
3640                     npages == 0 &&
3641                     page_idx >= orig_last_idx) {
3642                         /*
3643                          * We're back where we started and we haven't
3644                          * found any suitable contiguous range.  Let's
3645                          * give up.
3646                          */
3647                         break;
3648                 }
3649                 scanned++;
3650                 m = &vm_pages[page_idx];
3651
3652                 assert(!m->fictitious);
3653                 assert(!m->private);
3654
3655                 if (max_pnum && m->phys_page > max_pnum) {
3656                         /* no more low pages... */
3657                         break;
3658                 }
3659                 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3660                         /*
3661                          * not aligned
3662                          */
3663                         RESET_STATE_OF_RUN();
3664
3665                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3666                            m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3667                            m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3668                            m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3669                            m->pageout) {
3670                         /*
3671                          * page is in a transient state
3672                          * or a state we don't want to deal
3673                          * with, so don't consider it which
3674                          * means starting a new run
3675                          */
3676                         RESET_STATE_OF_RUN();
3677
3678                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3679                         /*
3680                          * page needs to be on one of our queues
3681                          * in order for it to be stable behind the
3682                          * locks we hold at this point...
3683                          * if not, don't consider it which
3684                          * means starting a new run
3685                          */
3686                         RESET_STATE_OF_RUN();
3687
3688                 } else if (!m->free && (!m->tabled || m->busy)) {
3689                         /*
3690                          * pages on the free list are always 'busy'
3691                          * so we couldn't test for 'busy' in the check
3692                          * for the transient states... pages that are
3693                          * 'free' are never 'tabled', so we also couldn't
3694                          * test for 'tabled'.  So we check here to make
3695                          * sure that a non-free page is not busy and is
3696                          * tabled on an object...
3697                          * if not, don't consider it which
3698                          * means starting a new run
3699                          */
3700                         RESET_STATE_OF_RUN();
3701
3702                 } else {
3703                         if (m->phys_page != prevcontaddr + 1) {
3704                                 if ((m->phys_page & pnum_mask) != 0) {
3705                                         RESET_STATE_OF_RUN();
3706                                         goto did_consider;
3707                                 } else {
3708                                         npages = 1;
3709                                         start_idx = page_idx;
3710                                         start_pnum = m->phys_page;
3711                                 }
3712                         } else {
3713                                 npages++;
3714                         }
3715                         prevcontaddr = m->phys_page;
3716
3717                         VM_PAGE_CHECK(m);
3718                         if (m->free) {
3719                                 free_considered++;
3720                         } else {
3721                                 /*
3722                                  * This page is not free.
3723                                  * If we can't steal used pages,
3724                                  * we have to give up this run
3725                                  * and keep looking.
3726                                  * Otherwise, we might need to
3727                                  * move the contents of this page
3728                                  * into a substitute page.
3729                                  */
3730 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3731                                 if (m->pmapped || m->dirty) {
3732                                         substitute_needed++;
3733                                 }
3734 #else
3735                                 RESET_STATE_OF_RUN();
3736 #endif
3737                         }
3738
3739                         if ((free_considered + substitute_needed) > free_available) {
3740                                 /*
3741                                  * if we let this run continue
3742                                  * we will end up dropping the vm_page_free_count
3743                                  * below the reserve limit... we need to abort
3744                                  * this run, but we can at least re-consider this
3745                                  * page... thus the jump back to 'retry'
3746                                  */
3747                                 RESET_STATE_OF_RUN();
3748
3749                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3750                                         considered++;
3751                                         goto retry;
3752                                 }
3753                                 /*
3754                                  * free_available == 0
3755                                  * so can't consider any free pages... if
3756                                  * we went to retry in this case, we'd
3757                                  * get stuck looking at the same page
3758                                  * w/o making any forward progress
3759                                  * we also want to take this path if we've already
3760                                  * reached our limit that controls the lock latency
3761                                  */
3762                         }
3763                 }
3764 did_consider:
3765                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3766
3767                         lck_mtx_unlock(&vm_page_queue_free_lock);
3768                         vm_page_unlock_queues();
3769
3770                         mutex_pause(0);
3771
3772                         vm_page_lock_queues();
3773                         lck_mtx_lock(&vm_page_queue_free_lock);
3774
3775                         RESET_STATE_OF_RUN();
3776                         /*
3777                          * reset our free page limit since we
3778                          * dropped the lock protecting the vm_page_free_queue
3779                          */
3780                         free_available = vm_page_free_count - vm_page_free_reserved;
3781                         considered = 0;
3782 #if MACH_ASSERT
3783                         yielded++;
3784 #endif
3785                         goto retry;
3786                 }
3787                 considered++;
3788         }
3789         m = VM_PAGE_NULL;
3790
3791         if (npages != contig_pages) {
3792                 if (!wrapped) {
3793                         /*
3794                          * We didn't find a contiguous range but we didn't
3795                          * start from the very first page.
3796                          * Start again from the very first page.
3797                          */
3798                         RESET_STATE_OF_RUN();
3799                         if( flags & KMA_LOMEM)
3800                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
3801                         else
3802                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3803                         last_idx = 0;
3804                         page_idx = last_idx;
3805                         wrapped = TRUE;
3806                         goto retry;
3807                 }
3808                 lck_mtx_unlock(&vm_page_queue_free_lock);
3809         } else {
3810                 vm_page_t       m1;
3811                 vm_page_t       m2;
3812                 unsigned int    cur_idx;
3813                 unsigned int    tmp_start_idx;
3814                 vm_object_t     locked_object = VM_OBJECT_NULL;
3815                 boolean_t       abort_run = FALSE;
3816
3817                 assert(page_idx - start_idx == contig_pages);
3818
3819                 tmp_start_idx = start_idx;
3820
3821                 /*
3822                  * first pass through to pull the free pages
3823                  * off of the free queue so that in case we
3824                  * need substitute pages, we won't grab any
3825                  * of the free pages in the run... we'll clear
3826                  * the 'free' bit in the 2nd pass, and even in
3827                  * an abort_run case, we'll collect all of the
3828                  * free pages in this run and return them to the free list
3829                  */
3830                 while (start_idx < page_idx) {
3831
3832                         m1 = &vm_pages[start_idx++];
3833
3834 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3835                         assert(m1->free);
3836 #endif
3837
3838                         if (m1->free) {
3839                                 unsigned int color;
3840
3841                                 color = m1->phys_page & vm_color_mask;
3842 #if MACH_ASSERT
3843                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
3844 #endif
3845                                 queue_remove(&vm_page_queue_free[color],
3846                                              m1,
3847                                              vm_page_t,
3848                                              pageq);
3849                                 m1->pageq.next = NULL;
3850                                 m1->pageq.prev = NULL;
3851 #if MACH_ASSERT
3852                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
3853 #endif
3854                                 /*
3855                                  * Clear the "free" bit so that this page
3856                                  * does not get considered for another
3857                                  * concurrent physically-contiguous allocation.
3858                                  */
3859                                 m1->free = FALSE;
3860                                 assert(m1->busy);
3861
3862                                 vm_page_free_count--;
3863                         }
3864                 }
3865                 /*
3866                  * adjust global freelist counts
3867                  */
3868                 if (vm_page_free_count < vm_page_free_count_minimum)
3869                         vm_page_free_count_minimum = vm_page_free_count;
3870
3871                 if( flags & KMA_LOMEM)
3872                         vm_page_lomem_find_contiguous_last_idx = page_idx;
3873                 else
3874                         vm_page_find_contiguous_last_idx = page_idx;
3875
3876                 /*
3877                  * we can drop the free queue lock at this point since
3878                  * we've pulled any 'free' candidates off of the list
3879                  * we need it dropped so that we can do a vm_page_grab
3880                  * when substituing for pmapped/dirty pages
3881                  */
3882                 lck_mtx_unlock(&vm_page_queue_free_lock);
3883
3884                 start_idx = tmp_start_idx;
3885                 cur_idx = page_idx - 1;
3886
3887                 while (start_idx++ < page_idx) {
3888                         /*
3889                          * must go through the list from back to front
3890                          * so that the page list is created in the
3891                          * correct order - low -> high phys addresses
3892                          */
3893                         m1 = &vm_pages[cur_idx--];
3894
3895                         assert(!m1->free);
3896                         if (m1->object == VM_OBJECT_NULL) {
3897                                 /*
3898                                  * page has already been removed from
3899                                  * the free list in the 1st pass
3900                                  */
3901                                 assert(m1->offset == (vm_object_offset_t) -1);
3902                                 assert(m1->busy);
3903                                 assert(!m1->wanted);
3904                                 assert(!m1->laundry);
3905                         } else {
3906                                 vm_object_t object;
3907
3908                                 if (abort_run == TRUE)
3909                                         continue;
3910
3911                                 object = m1->object;
3912
3913                                 if (object != locked_object) {
3914                                         if (locked_object) {
3915                                                 vm_object_unlock(locked_object);
3916                                                 locked_object = VM_OBJECT_NULL;
3917                                         }
3918                                         if (vm_object_lock_try(object))
3919                                                 locked_object = object;
3920                                 }
3921                                 if (locked_object == VM_OBJECT_NULL ||
3922                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
3923                                      m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3924                                      m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3925                                      m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3926
3927                                         if (locked_object) {
3928                                                 vm_object_unlock(locked_object);
3929                                                 locked_object = VM_OBJECT_NULL;
3930                                         }
3931                                         tmp_start_idx = cur_idx;
3932                                         abort_run = TRUE;
3933                                         continue;
3934                                 }
3935                                 if (m1->pmapped || m1->dirty) {
3936                                         int refmod;
3937                                         vm_object_offset_t offset;
3938
3939                                         m2 = vm_page_grab();
3940
3941                                         if (m2 == VM_PAGE_NULL) {
3942                                                 if (locked_object) {
3943                                                         vm_object_unlock(locked_object);
3944                                                         locked_object = VM_OBJECT_NULL;
3945                                                 }
3946                                                 tmp_start_idx = cur_idx;
3947                                                 abort_run = TRUE;
3948                                                 continue;
3949                                         }
3950                                         if (m1->pmapped)
3951                                                 refmod = pmap_disconnect(m1->phys_page);
3952                                         else
3953                                                 refmod = 0;
3954                                         vm_page_copy(m1, m2);
3955
3956                                         m2->reference = m1->reference;
3957                                         m2->dirty     = m1->dirty;
3958
3959                                         if (refmod & VM_MEM_REFERENCED)
3960                                                 m2->reference = TRUE;
3961                                         if (refmod & VM_MEM_MODIFIED)
3962                                                 m2->dirty = TRUE;
3963                                         offset = m1->offset;
3964
3965                                         /*
3966                                          * completely cleans up the state
3967                                          * of the page so that it is ready
3968                                          * to be put onto the free list, or
3969                                          * for this purpose it looks like it
3970                                          * just came off of the free list
3971                                          */
3972                                         vm_page_free_prepare(m1);
3973
3974                                         /*
3975                                          * make sure we clear the ref/mod state
3976                                          * from the pmap layer... else we risk
3977                                          * inheriting state from the last time
3978                                          * this page was used...
3979                                          */
3980                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3981                                         /*
3982                                          * now put the substitute page on the object
3983                                          */
3984                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
3985
3986                                         if (m2->reference)
3987                                                 vm_page_activate(m2);
3988                                         else
3989                                                 vm_page_deactivate(m2);
3990
3991                                         PAGE_WAKEUP_DONE(m2);
3992
3993                                 } else {
3994                                         /*
3995                                          * completely cleans up the state
3996                                          * of the page so that it is ready
3997                                          * to be put onto the free list, or
3998                                          * for this purpose it looks like it
3999                                          * just came off of the free list
4000                                          */
4001                                         vm_page_free_prepare(m1);
4002                                 }
4003 #if MACH_ASSERT
4004                                 stolen_pages++;
4005 #endif
4006                         }
4007                         m1->pageq.next = (queue_entry_t) m;
4008                         m1->pageq.prev = NULL;
4009                         m = m1;
4010                 }
4011                 if (locked_object) {
4012                         vm_object_unlock(locked_object);
4013                         locked_object = VM_OBJECT_NULL;
4014                 }
4015
4016                 if (abort_run == TRUE) {
4017                         if (m != VM_PAGE_NULL) {
4018                                 vm_page_free_list(m, FALSE);
4019                         }
4020 #if MACH_ASSERT
4021                         dumped_run++;
4022 #endif
4023                         /*
4024                          * want the index of the last
4025                          * page in this run that was
4026                          * successfully 'stolen', so back
4027                          * it up 1 for the auto-decrement on use
4028                          * and 1 more to bump back over this page
4029                          */
4030                         page_idx = tmp_start_idx + 2;
4031                         if (page_idx >= vm_pages_count) {
4032                                 if (wrapped)
4033                                         goto done_scanning;
4034                                 page_idx = last_idx = 0;
4035                                 wrapped = TRUE;
4036                         }
4037                         abort_run = FALSE;
4038
4039                         /*
4040                          * We didn't find a contiguous range but we didn't
4041                          * start from the very first page.
4042                          * Start again from the very first page.
4043                          */
4044                         RESET_STATE_OF_RUN();
4045
4046                         if( flags & KMA_LOMEM)
4047                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4048                         else
4049                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4050
4051                         last_idx = page_idx;
4052
4053                         lck_mtx_lock(&vm_page_queue_free_lock);
4054                         /*
4055                         * reset our free page limit since we
4056                         * dropped the lock protecting the vm_page_free_queue
4057                         */
4058                         free_available = vm_page_free_count - vm_page_free_reserved;
4059                         goto retry;
4060                 }
4061
4062                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4063
4064                         if (wire == TRUE)
4065                                 m1->wire_count++;
4066                         else
4067                                 m1->gobbled = TRUE;
4068                 }
4069                 if (wire == FALSE)
4070                         vm_page_gobble_count += npages;
4071
4072                 /*
4073                  * gobbled pages are also counted as wired pages
4074                  */
4075                 vm_page_wire_count += npages;
4076
4077                 assert(vm_page_verify_contiguous(m, npages));
4078         }
4079 done_scanning:
4080         vm_page_unlock_queues();
4081
4082 #if DEBUG
4083         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4084
4085         tv_end_sec -= tv_start_sec;
4086         if (tv_end_usec < tv_start_usec) {
4087                 tv_end_sec--;
4088                 tv_end_usec += 1000000;
4089         }
4090         tv_end_usec -= tv_start_usec;
4091         if (tv_end_usec >= 1000000) {
4092                 tv_end_sec++;
4093                 tv_end_sec -= 1000000;
4094         }
4095         if (vm_page_find_contig_debug) {
4096                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d... scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
4097                __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4098                (long)tv_end_sec, tv_end_usec, orig_last_idx,
4099                scanned, yielded, dumped_run, stolen_pages);
4100         }
4101
4102 #endif
4103 #if MACH_ASSERT
4104         vm_page_verify_free_lists();
4105 #endif
4106         return m;
4107 }
4108
4109 /*
4110  *      Allocate a list of contiguous, wired pages.
4111  */
4112 kern_return_t
4113 cpm_allocate(
4114         vm_size_t       size,
4115         vm_page_t       *list,
4116         ppnum_t         max_pnum,
4117         ppnum_t         pnum_mask,
4118         boolean_t       wire,
4119         int             flags)
4120 {
4121         vm_page_t               pages;
4122         unsigned int            npages;
4123
4124         if (size % PAGE_SIZE != 0)
4125                 return KERN_INVALID_ARGUMENT;
4126
4127         npages = (unsigned int) (size / PAGE_SIZE);
4128         if (npages != size / PAGE_SIZE) {
4129                 /* 32-bit overflow */
4130                 return KERN_INVALID_ARGUMENT;
4131         }
4132
4133         /*
4134          *      Obtain a pointer to a subset of the free
4135          *      list large enough to satisfy the request;
4136          *      the region will be physically contiguous.
4137          */
4138         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4139
4140         if (pages == VM_PAGE_NULL)
4141                 return KERN_NO_SPACE;
4142         /*
4143          * determine need for wakeups
4144          */
4145         if ((vm_page_free_count < vm_page_free_min) ||
4146             ((vm_page_free_count < vm_page_free_target) &&
4147              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4148                 thread_wakeup((event_t) &vm_page_free_wanted);
4149
4150         VM_CHECK_MEMORYSTATUS;
4151
4152         /*
4153          *      The CPM pages should now be available and
4154          *      ordered by ascending physical address.
4155          */
4156         assert(vm_page_verify_contiguous(pages, npages));
4157
4158         *list = pages;
4159         return KERN_SUCCESS;
4160 }
4161
4162
4163 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4164
4165 /*
4166  * when working on a 'run' of pages, it is necessary to hold
4167  * the vm_page_queue_lock (a hot global lock) for certain operations
4168  * on the page... however, the majority of the work can be done
4169  * while merely holding the object lock... in fact there are certain
4170  * collections of pages that don't require any work brokered by the
4171  * vm_page_queue_lock... to mitigate the time spent behind the global
4172  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4173  * while doing all of the work that doesn't require the vm_page_queue_lock...
4174  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4175  * necessary work for each page... we will grab the busy bit on the page
4176  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4177  * if it can't immediately take the vm_page_queue_lock in order to compete
4178  * for the locks in the same order that vm_pageout_scan takes them.
4179  * the operation names are modeled after the names of the routines that
4180  * need to be called in order to make the changes very obvious in the
4181  * original loop
4182  */
4183
4184 void
4185 vm_page_do_delayed_work(
4186         vm_object_t     object,
4187         struct vm_page_delayed_work *dwp,
4188         int             dw_count)
4189 {
4190         int             j;
4191         vm_page_t       m;
4192         vm_page_t       local_free_q = VM_PAGE_NULL;
4193         boolean_t       dropped_obj_lock = FALSE;
4194
4195         /*
4196          * pageout_scan takes the vm_page_lock_queues first
4197          * then tries for the object lock... to avoid what
4198          * is effectively a lock inversion, we'll go to the
4199          * trouble of taking them in that same order... otherwise
4200          * if this object contains the majority of the pages resident
4201          * in the UBC (or a small set of large objects actively being
4202          * worked on contain the majority of the pages), we could
4203          * cause the pageout_scan thread to 'starve' in its attempt
4204          * to find pages to move to the free queue, since it has to
4205          * successfully acquire the object lock of any candidate page
4206          * before it can steal/clean it.
4207          */
4208         if (!vm_page_trylockspin_queues()) {
4209                 vm_object_unlock(object);
4210
4211                 vm_page_lockspin_queues();
4212
4213                 for (j = 0; ; j++) {
4214                         if (!vm_object_lock_avoid(object) &&
4215                             _vm_object_lock_try(object))
4216                                 break;
4217                         vm_page_unlock_queues();
4218                         mutex_pause(j);
4219                         vm_page_lockspin_queues();
4220                 }
4221                 dropped_obj_lock = TRUE;
4222         }
4223         for (j = 0; j < dw_count; j++, dwp++) {
4224
4225                 m = dwp->dw_m;
4226
4227                 if (dwp->dw_mask & DW_set_list_req_pending) {
4228                         m->list_req_pending = TRUE;
4229
4230                         if (dropped_obj_lock == TRUE) {
4231                                 /*
4232                                  * need to make sure anyone that might have
4233                                  * blocked on busy == TRUE when we dropped
4234                                  * the object lock gets a chance to re-evaluate
4235                                  * its state since we have several places
4236                                  * where we avoid potential deadlocks with
4237                                  * the fileysystem by stealing pages with
4238                                  * list_req_pending == TRUE and busy == TRUE
4239                                  */
4240                                 dwp->dw_mask |= DW_PAGE_WAKEUP;
4241                         }
4242                 }
4243                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4244                         vm_pageout_throttle_up(m);
4245
4246                 if (dwp->dw_mask & DW_vm_page_wire)
4247                         vm_page_wire(m);
4248                 else if (dwp->dw_mask & DW_vm_page_unwire) {
4249                         boolean_t       queueit;
4250
4251                         queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4252
4253                         vm_page_unwire(m, queueit);
4254                 }
4255                 if (dwp->dw_mask & DW_vm_page_free) {
4256                         vm_page_free_prepare_queues(m);
4257
4258                         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4259                         /*
4260                          * Add this page to our list of reclaimed pages,
4261                          * to be freed later.
4262                          */
4263                         m->pageq.next = (queue_entry_t) local_free_q;
4264                         local_free_q = m;
4265                 } else {
4266                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4267                                 vm_page_deactivate_internal(m, FALSE);
4268                         else if (dwp->dw_mask & DW_vm_page_activate) {
4269                                 if (m->active == FALSE) {
4270                                         vm_page_activate(m);
4271                                 }
4272                         }
4273                         else if (dwp->dw_mask & DW_vm_page_speculate)
4274                                 vm_page_speculate(m, TRUE);
4275                         else if (dwp->dw_mask & DW_vm_page_lru)
4276                                 vm_page_lru(m);
4277                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE)
4278                                 VM_PAGE_QUEUES_REMOVE(m);
4279
4280                         if (dwp->dw_mask & DW_set_reference)
4281                                 m->reference = TRUE;
4282                         else if (dwp->dw_mask & DW_clear_reference)
4283                                 m->reference = FALSE;
4284
4285                         if (dwp->dw_mask & DW_move_page) {
4286                                 VM_PAGE_QUEUES_REMOVE(m);
4287
4288                                 assert(!m->laundry);
4289                                 assert(m->object != kernel_object);
4290                                 assert(m->pageq.next == NULL &&
4291                                        m->pageq.prev == NULL);
4292
4293                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4294                         }
4295                         if (dwp->dw_mask & DW_clear_busy)
4296                                 m->busy = FALSE;
4297
4298                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
4299                                 PAGE_WAKEUP(m);
4300                 }
4301         }
4302         vm_page_unlock_queues();
4303
4304         if (local_free_q)
4305                 vm_page_free_list(local_free_q, TRUE);
4306
4307         VM_CHECK_MEMORYSTATUS;
4308
4309 }
4310
4311
4312
4313
4314 void vm_check_memorystatus()
4315 {
4316 #if CONFIG_EMBEDDED
4317         static boolean_t in_critical = FALSE;
4318         static unsigned int last_memorystatus = 0;
4319         unsigned int pages_avail;
4320
4321         if (!kern_memorystatus_delta) {
4322             return;
4323         }
4324
4325         pages_avail = (vm_page_active_count +
4326                       vm_page_inactive_count +
4327                       vm_page_speculative_count +
4328                       vm_page_free_count +
4329                       (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count));
4330         if ( (!in_critical && (pages_avail < kern_memorystatus_delta)) ||
4331              (pages_avail >= (last_memorystatus + kern_memorystatus_delta)) ||
4332              (last_memorystatus >= (pages_avail + kern_memorystatus_delta)) ) {
4333             kern_memorystatus_level = pages_avail * 100 / atop_64(max_mem);
4334             last_memorystatus = pages_avail;
4335
4336             thread_wakeup((event_t)&kern_memorystatus_wakeup);
4337
4338             in_critical = (pages_avail < kern_memorystatus_delta) ? TRUE : FALSE;
4339         }
4340 #endif
4341 }
4342
4343 kern_return_t
4344 vm_page_alloc_list(
4345         int     page_count,
4346         int     flags,
4347         vm_page_t *list)
4348 {
4349         vm_page_t       lo_page_list = VM_PAGE_NULL;
4350         vm_page_t       mem;
4351         int             i;
4352
4353         if ( !(flags & KMA_LOMEM))
4354                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4355
4356         for (i = 0; i < page_count; i++) {
4357
4358                 mem = vm_page_grablo();
4359
4360                 if (mem == VM_PAGE_NULL) {
4361                         if (lo_page_list)
4362                                 vm_page_free_list(lo_page_list, FALSE);
4363
4364                         *list = VM_PAGE_NULL;
4365
4366                         return (KERN_RESOURCE_SHORTAGE);
4367                 }
4368                 mem->pageq.next = (queue_entry_t) lo_page_list;
4369                 lo_page_list = mem;
4370         }
4371         *list = lo_page_list;
4372
4373         return (KERN_SUCCESS);
4374 }
4375
4376 void
4377 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4378 {
4379         page->offset = offset;
4380 }
4381
4382 vm_page_t
4383 vm_page_get_next(vm_page_t page)
4384 {
4385         return ((vm_page_t) page->pageq.next);
4386 }
4387
4388 vm_object_offset_t
4389 vm_page_get_offset(vm_page_t page)
4390 {
4391         return (page->offset);
4392 }
4393
4394 ppnum_t
4395 vm_page_get_phys_page(vm_page_t page)
4396 {
4397         return (page->phys_page);
4398 }
4399
4400
4401 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4402
4403 #if HIBERNATION
4404
4405 static vm_page_t hibernate_gobble_queue;
4406
4407 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4408
4409 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4410 static int  hibernate_flush_dirty_pages(void);
4411 static int  hibernate_flush_queue(queue_head_t *, int);
4412 static void hibernate_dirty_page(vm_page_t);
4413
4414 void hibernate_flush_wait(void);
4415 void hibernate_mark_in_progress(void);
4416 void hibernate_clear_in_progress(void);
4417
4418
4419 struct hibernate_statistics {
4420         int hibernate_considered;
4421         int hibernate_reentered_on_q;
4422         int hibernate_found_dirty;
4423         int hibernate_skipped_cleaning;
4424         int hibernate_skipped_transient;
4425         int hibernate_skipped_precious;
4426         int hibernate_queue_nolock;
4427         int hibernate_queue_paused;
4428         int hibernate_throttled;
4429         int hibernate_throttle_timeout;
4430         int hibernate_drained;
4431         int hibernate_drain_timeout;
4432         int cd_lock_failed;
4433         int cd_found_precious;
4434         int cd_found_wired;
4435         int cd_found_busy;
4436         int cd_found_unusual;
4437         int cd_found_cleaning;
4438         int cd_found_laundry;
4439         int cd_found_dirty;
4440         int cd_local_free;
4441         int cd_total_free;
4442         int cd_vm_page_wire_count;
4443         int cd_pages;
4444         int cd_discarded;
4445         int cd_count_wire;
4446 } hibernate_stats;
4447
4448
4449
4450 static int
4451 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4452 {
4453         wait_result_t   wait_result;
4454
4455         vm_page_lock_queues();
4456
4457         while (q->pgo_laundry) {
4458
4459                 q->pgo_draining = TRUE;
4460
4461                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4462
4463                 vm_page_unlock_queues();
4464
4465                 wait_result = thread_block(THREAD_CONTINUE_NULL);
4466
4467                 if (wait_result == THREAD_TIMED_OUT) {
4468                         hibernate_stats.hibernate_drain_timeout++;
4469                         return (1);
4470                 }
4471                 vm_page_lock_queues();
4472
4473                 hibernate_stats.hibernate_drained++;
4474         }
4475         vm_page_unlock_queues();
4476
4477         return (0);
4478 }
4479
4480 static void
4481 hibernate_dirty_page(vm_page_t m)
4482 {
4483         vm_object_t     object = m->object;
4484         struct          vm_pageout_queue *q;
4485
4486 #if DEBUG
4487         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4488 #endif
4489         vm_object_lock_assert_exclusive(object);
4490
4491         /*
4492          * protect the object from collapse -
4493          * locking in the object's paging_offset.
4494          */
4495         vm_object_paging_begin(object);
4496
4497         m->list_req_pending = TRUE;
4498         m->cleaning = TRUE;
4499         m->busy = TRUE;
4500
4501         if (object->internal == TRUE)
4502                 q = &vm_pageout_queue_internal;
4503         else
4504                 q = &vm_pageout_queue_external;
4505
4506         /*
4507          * pgo_laundry count is tied to the laundry bit
4508          */
4509         m->laundry = TRUE;
4510         q->pgo_laundry++;
4511
4512         m->pageout_queue = TRUE;
4513         queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
4514
4515         if (q->pgo_idle == TRUE) {
4516                 q->pgo_idle = FALSE;
4517                 thread_wakeup((event_t) &q->pgo_pending);
4518         }
4519 }
4520
4521 static int
4522 hibernate_flush_queue(queue_head_t *q, int qcount)
4523 {
4524         vm_page_t       m;
4525         vm_object_t     l_object = NULL;
4526         vm_object_t     m_object = NULL;
4527         int             refmod_state = 0;
4528         int             try_failed_count = 0;
4529         int             retval = 0;
4530         int             current_run = 0;
4531         struct  vm_pageout_queue *iq;
4532         struct  vm_pageout_queue *eq;
4533         struct  vm_pageout_queue *tq;
4534
4535
4536         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4537
4538         iq = &vm_pageout_queue_internal;
4539         eq = &vm_pageout_queue_external;
4540
4541         vm_page_lock_queues();
4542
4543         while (qcount && !queue_empty(q)) {
4544
4545                 if (current_run++ == 1000) {
4546                         if (hibernate_should_abort()) {
4547                                 retval = 1;
4548                                 break;
4549                         }
4550                         current_run = 0;
4551                 }
4552
4553                 m = (vm_page_t) queue_first(q);
4554                 m_object = m->object;
4555
4556                 /*
4557                  * check to see if we currently are working
4558                  * with the same object... if so, we've
4559                  * already got the lock
4560                  */
4561                 if (m_object != l_object) {
4562                         /*
4563                          * the object associated with candidate page is
4564                          * different from the one we were just working
4565                          * with... dump the lock if we still own it
4566                          */
4567                         if (l_object != NULL) {
4568                                 vm_object_unlock(l_object);
4569                                 l_object = NULL;
4570                         }
4571                         /*
4572                          * Try to lock object; since we've alread got the
4573                          * page queues lock, we can only 'try' for this one.
4574                          * if the 'try' fails, we need to do a mutex_pause
4575                          * to allow the owner of the object lock a chance to
4576                          * run...
4577                          */
4578                         if ( !vm_object_lock_try_scan(m_object)) {
4579
4580                                 if (try_failed_count > 20) {
4581                                         hibernate_stats.hibernate_queue_nolock++;
4582
4583                                         goto reenter_pg_on_q;
4584                                 }
4585                                 vm_pageout_scan_wants_object = m_object;
4586
4587                                 vm_page_unlock_queues();
4588                                 mutex_pause(try_failed_count++);
4589                                 vm_page_lock_queues();
4590
4591                                 hibernate_stats.hibernate_queue_paused++;
4592                                 continue;
4593                         } else {
4594                                 l_object = m_object;
4595                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4596                         }
4597                 }
4598                 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
4599                         /*
4600                          * page is not to be cleaned
4601                          * put it back on the head of its queue
4602                          */
4603                         if (m->cleaning)
4604                                 hibernate_stats.hibernate_skipped_cleaning++;
4605                         else
4606                                 hibernate_stats.hibernate_skipped_transient++;
4607
4608                         goto reenter_pg_on_q;
4609                 }
4610                 if ( !m_object->pager_initialized && m_object->pager_created)
4611                         goto reenter_pg_on_q;
4612
4613                 if (m_object->copy == VM_OBJECT_NULL) {
4614                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4615                                 /*
4616                                  * let the normal hibernate image path
4617                                  * deal with these
4618                                  */
4619                                 goto reenter_pg_on_q;
4620                         }
4621                 }
4622                 if ( !m->dirty && m->pmapped) {
4623                         refmod_state = pmap_get_refmod(m->phys_page);
4624
4625                         if ((refmod_state & VM_MEM_MODIFIED))
4626                                 m->dirty = TRUE;
4627                 } else
4628                         refmod_state = 0;
4629
4630                 if ( !m->dirty) {
4631                         /*
4632                          * page is not to be cleaned
4633                          * put it back on the head of its queue
4634                          */
4635                         if (m->precious)
4636                                 hibernate_stats.hibernate_skipped_precious++;
4637
4638                         goto reenter_pg_on_q;
4639                 }
4640                 tq = NULL;
4641
4642                 if (m_object->internal) {
4643                         if (VM_PAGE_Q_THROTTLED(iq))
4644                                 tq = iq;
4645                 } else if (VM_PAGE_Q_THROTTLED(eq))
4646                         tq = eq;
4647
4648                 if (tq != NULL) {
4649                         wait_result_t   wait_result;
4650                         int             wait_count = 5;
4651
4652                         if (l_object != NULL) {
4653                                 vm_object_unlock(l_object);
4654                                 l_object = NULL;
4655                         }
4656                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4657
4658                         tq->pgo_throttled = TRUE;
4659
4660                         while (retval == 0) {
4661
4662                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4663
4664                         vm_page_unlock_queues();
4665
4666                         wait_result = thread_block(THREAD_CONTINUE_NULL);
4667
4668                                 vm_page_lock_queues();
4669
4670                                 if (hibernate_should_abort())
4671                                         retval = 1;
4672
4673                                 if (wait_result != THREAD_TIMED_OUT)
4674                                         break;
4675
4676                                 if (--wait_count == 0) {
4677                                 hibernate_stats.hibernate_throttle_timeout++;
4678                                 retval = 1;
4679                         }
4680                         }
4681                         if (retval)
4682                                 break;
4683
4684                         hibernate_stats.hibernate_throttled++;
4685
4686                         continue;
4687                 }
4688                 VM_PAGE_QUEUES_REMOVE(m);
4689
4690                 hibernate_dirty_page(m);
4691
4692                 hibernate_stats.hibernate_found_dirty++;
4693
4694                 goto next_pg;
4695
4696 reenter_pg_on_q:
4697                 queue_remove(q, m, vm_page_t, pageq);
4698                 queue_enter(q, m, vm_page_t, pageq);
4699
4700                 hibernate_stats.hibernate_reentered_on_q++;
4701 next_pg:
4702                 hibernate_stats.hibernate_considered++;
4703
4704                 qcount--;
4705                 try_failed_count = 0;
4706         }
4707         if (l_object != NULL) {
4708                 vm_object_unlock(l_object);
4709                 l_object = NULL;
4710         }
4711     vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4712
4713         vm_page_unlock_queues();
4714
4715         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4716
4717         return (retval);
4718 }
4719
4720
4721 static int
4722 hibernate_flush_dirty_pages()
4723 {
4724         struct vm_speculative_age_q     *aq;
4725         uint32_t        i;
4726
4727         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4728
4729         if (vm_page_local_q) {
4730                 for (i = 0; i < vm_page_local_q_count; i++)
4731                         vm_page_reactivate_local(i, TRUE, FALSE);
4732         }
4733
4734         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4735                 int             qcount;
4736                 vm_page_t       m;
4737
4738                 aq = &vm_page_queue_speculative[i];
4739
4740                 if (queue_empty(&aq->age_q))
4741                         continue;
4742                 qcount = 0;
4743
4744                 vm_page_lockspin_queues();
4745
4746                 queue_iterate(&aq->age_q,
4747                               m,
4748                               vm_page_t,
4749                               pageq)
4750                 {
4751                         qcount++;
4752                 }
4753                 vm_page_unlock_queues();
4754
4755                 if (qcount) {
4756                         if (hibernate_flush_queue(&aq->age_q, qcount))
4757                                 return (1);
4758                 }
4759         }
4760         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4761                 return (1);
4762         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
4763                 return (1);
4764         if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
4765                 return (1);
4766
4767         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4768                 return (1);
4769         return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4770 }
4771
4772
4773 extern void IOSleep(unsigned int);
4774 extern int sync_internal(void);
4775
4776 int
4777 hibernate_flush_memory()
4778 {
4779         int     retval;
4780
4781         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4782
4783         IOSleep(2 * 1000);
4784
4785         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4786
4787         if ((retval = hibernate_flush_dirty_pages()) == 0) {
4788                 if (consider_buffer_cache_collect != NULL) {
4789
4790                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4791
4792                         sync_internal();
4793                         (void)(*consider_buffer_cache_collect)(1);
4794                         consider_zone_gc(TRUE);
4795
4796                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4797                 }
4798         }
4799         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4800
4801     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4802                 hibernate_stats.hibernate_considered,
4803                 hibernate_stats.hibernate_reentered_on_q,
4804                 hibernate_stats.hibernate_found_dirty);
4805     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4806                 hibernate_stats.hibernate_skipped_cleaning,
4807                 hibernate_stats.hibernate_skipped_transient,
4808                 hibernate_stats.hibernate_skipped_precious,
4809                 hibernate_stats.hibernate_queue_nolock);
4810     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4811                 hibernate_stats.hibernate_queue_paused,
4812                 hibernate_stats.hibernate_throttled,
4813                 hibernate_stats.hibernate_throttle_timeout,
4814                 hibernate_stats.hibernate_drained,
4815                 hibernate_stats.hibernate_drain_timeout);
4816
4817         return (retval);
4818 }
4819
4820
4821 static void
4822 hibernate_page_list_zero(hibernate_page_list_t *list)
4823 {
4824     uint32_t             bank;
4825     hibernate_bitmap_t * bitmap;
4826
4827     bitmap = &list->bank_bitmap[0];
4828     for (bank = 0; bank < list->bank_count; bank++)
4829     {
4830         uint32_t last_bit;
4831
4832         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4833         // set out-of-bound bits at end of bitmap.
4834         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4835         if (last_bit)
4836             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4837
4838         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4839     }
4840 }
4841
4842 void
4843 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4844 {
4845     uint32_t i;
4846     vm_page_t m;
4847     uint64_t start, end, timeout, nsec;
4848     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4849     clock_get_uptime(&start);
4850
4851     for (i = 0; i < gobble_count; i++)
4852     {
4853         while (VM_PAGE_NULL == (m = vm_page_grab()))
4854         {
4855             clock_get_uptime(&end);
4856             if (end >= timeout)
4857                 break;
4858             VM_PAGE_WAIT();
4859         }
4860         if (!m)
4861             break;
4862         m->busy = FALSE;
4863         vm_page_gobble(m);
4864
4865         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4866         hibernate_gobble_queue = m;
4867     }
4868
4869     clock_get_uptime(&end);
4870     absolutetime_to_nanoseconds(end - start, &nsec);
4871     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4872 }
4873
4874 void
4875 hibernate_free_gobble_pages(void)
4876 {
4877     vm_page_t m, next;
4878     uint32_t  count = 0;
4879
4880     m = (vm_page_t) hibernate_gobble_queue;
4881     while(m)
4882     {
4883         next = (vm_page_t) m->pageq.next;
4884         vm_page_free(m);
4885         count++;
4886         m = next;
4887     }
4888     hibernate_gobble_queue = VM_PAGE_NULL;
4889
4890     if (count)
4891         HIBLOG("Freed %d pages\n", count);
4892 }
4893
4894 static boolean_t
4895 hibernate_consider_discard(vm_page_t m)
4896 {
4897     vm_object_t object = NULL;
4898     int                  refmod_state;
4899     boolean_t            discard = FALSE;
4900
4901     do
4902     {
4903         if (m->private)
4904             panic("hibernate_consider_discard: private");
4905
4906         if (!vm_object_lock_try(m->object)) {
4907             hibernate_stats.cd_lock_failed++;
4908             break;
4909         }
4910         object = m->object;
4911
4912         if (VM_PAGE_WIRED(m)) {
4913             hibernate_stats.cd_found_wired++;
4914             break;
4915         }
4916         if (m->precious) {
4917             hibernate_stats.cd_found_precious++;
4918             break;
4919         }
4920         if (m->busy || !object->alive) {
4921            /*
4922             *   Somebody is playing with this page.
4923             */
4924             hibernate_stats.cd_found_busy++;
4925             break;
4926         }
4927         if (m->absent || m->unusual || m->error) {
4928            /*
4929             * If it's unusual in anyway, ignore it
4930             */
4931             hibernate_stats.cd_found_unusual++;
4932             break;
4933         }
4934         if (m->cleaning) {
4935             hibernate_stats.cd_found_cleaning++;
4936             break;
4937         }
4938         if (m->laundry || m->list_req_pending) {
4939             hibernate_stats.cd_found_laundry++;
4940             break;
4941         }
4942         if (!m->dirty)
4943         {
4944             refmod_state = pmap_get_refmod(m->phys_page);
4945
4946             if (refmod_state & VM_MEM_REFERENCED)
4947                 m->reference = TRUE;
4948             if (refmod_state & VM_MEM_MODIFIED)
4949                 m->dirty = TRUE;
4950         }
4951
4952         /*
4953          * If it's clean or purgeable we can discard the page on wakeup.
4954          */
4955         discard = (!m->dirty)
4956                     || (VM_PURGABLE_VOLATILE == object->purgable)
4957                     || (VM_PURGABLE_EMPTY    == object->purgable);
4958
4959         if (discard == FALSE)
4960             hibernate_stats.cd_found_dirty++;
4961     }
4962     while (FALSE);
4963
4964     if (object)
4965         vm_object_unlock(object);
4966
4967     return (discard);
4968 }
4969
4970
4971 static void
4972 hibernate_discard_page(vm_page_t m)
4973 {
4974     if (m->absent || m->unusual || m->error)
4975        /*
4976         * If it's unusual in anyway, ignore
4977         */
4978         return;
4979
4980     if (m->pmapped == TRUE)
4981     {
4982         __unused int refmod_state = pmap_disconnect(m->phys_page);
4983     }
4984
4985     if (m->laundry)
4986         panic("hibernate_discard_page(%p) laundry", m);
4987     if (m->private)
4988         panic("hibernate_discard_page(%p) private", m);
4989     if (m->fictitious)
4990         panic("hibernate_discard_page(%p) fictitious", m);
4991
4992     if (VM_PURGABLE_VOLATILE == m->object->purgable)
4993     {
4994         /* object should be on a queue */
4995         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4996         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4997         assert(old_queue);
4998         /* No need to lock page queue for token delete, hibernate_vm_unlock()
4999            makes sure these locks are uncontended before sleep */
5000         vm_purgeable_token_delete_first(old_queue);
5001         m->object->purgable = VM_PURGABLE_EMPTY;
5002     }
5003
5004     vm_page_free(m);
5005 }
5006
5007 /*
5008  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5009  pages known to VM to not need saving are subtracted.
5010  Wired pages to be saved are present in page_list_wired, pageable in page_list.
5011 */
5012
5013 void
5014 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5015                            hibernate_page_list_t * page_list_wired,
5016                            hibernate_page_list_t * page_list_pal,
5017                            uint32_t * pagesOut)
5018 {
5019     uint64_t start, end, nsec;
5020     vm_page_t m;
5021     uint32_t pages = page_list->page_count;
5022     uint32_t count_zf = 0, count_throttled = 0;
5023     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
5024     uint32_t count_wire = pages;
5025     uint32_t count_discard_active    = 0;
5026     uint32_t count_discard_inactive  = 0;
5027     uint32_t count_discard_purgeable = 0;
5028     uint32_t count_discard_speculative = 0;
5029     uint32_t i;
5030     uint32_t             bank;
5031     hibernate_bitmap_t * bitmap;
5032     hibernate_bitmap_t * bitmap_wired;
5033
5034
5035     HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
5036
5037     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5038
5039     clock_get_uptime(&start);
5040
5041     hibernate_page_list_zero(page_list);
5042     hibernate_page_list_zero(page_list_wired);
5043     hibernate_page_list_zero(page_list_pal);
5044
5045     hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5046     hibernate_stats.cd_pages = pages;
5047
5048     if (vm_page_local_q) {
5049             for (i = 0; i < vm_page_local_q_count; i++)
5050                     vm_page_reactivate_local(i, TRUE, TRUE);
5051     }
5052
5053     m = (vm_page_t) hibernate_gobble_queue;
5054     while(m)
5055     {
5056         pages--;
5057         count_wire--;
5058         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5059         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5060         m = (vm_page_t) m->pageq.next;
5061     }
5062
5063     for( i = 0; i < real_ncpus; i++ )
5064     {
5065         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5066         {
5067             for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5068             {
5069                 pages--;
5070                 count_wire--;
5071                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5072                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5073
5074                 hibernate_stats.cd_local_free++;
5075                 hibernate_stats.cd_total_free++;
5076             }
5077         }
5078     }
5079
5080     for( i = 0; i < vm_colors; i++ )
5081     {
5082         queue_iterate(&vm_page_queue_free[i],
5083                       m,
5084                       vm_page_t,
5085                       pageq)
5086         {
5087             pages--;
5088             count_wire--;
5089             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5090             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5091
5092             hibernate_stats.cd_total_free++;
5093         }
5094     }
5095
5096     queue_iterate(&vm_lopage_queue_free,
5097                   m,
5098                   vm_page_t,
5099                   pageq)
5100     {
5101         pages--;
5102         count_wire--;
5103         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5104         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5105
5106         hibernate_stats.cd_total_free++;
5107     }
5108
5109     queue_iterate( &vm_page_queue_throttled,
5110                     m,
5111                     vm_page_t,
5112                     pageq )
5113     {
5114         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5115          && hibernate_consider_discard(m))
5116         {
5117             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5118             count_discard_inactive++;
5119         }
5120         else
5121             count_throttled++;
5122         count_wire--;
5123         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5124     }
5125
5126     queue_iterate( &vm_page_queue_zf,
5127                     m,
5128                     vm_page_t,
5129                    pageq )
5130     {
5131         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5132          && hibernate_consider_discard(m))
5133         {
5134             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5135             if (m->dirty)
5136                 count_discard_purgeable++;
5137             else
5138                 count_discard_inactive++;
5139         }
5140         else
5141             count_zf++;
5142         count_wire--;
5143         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5144     }
5145
5146     queue_iterate( &vm_page_queue_inactive,
5147                     m,
5148                     vm_page_t,
5149                     pageq )
5150     {
5151         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5152          && hibernate_consider_discard(m))
5153         {
5154             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5155             if (m->dirty)
5156                 count_discard_purgeable++;
5157             else
5158                 count_discard_inactive++;
5159         }
5160         else
5161             count_inactive++;
5162         count_wire--;
5163         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5164     }
5165
5166     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5167     {
5168        queue_iterate(&vm_page_queue_speculative[i].age_q,
5169                      m,
5170                      vm_page_t,
5171                      pageq)
5172        {
5173            if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5174             && hibernate_consider_discard(m))
5175            {
5176                hibernate_page_bitset(page_list, TRUE, m->phys_page);
5177                count_discard_speculative++;
5178            }
5179            else
5180                count_speculative++;
5181            count_wire--;
5182            hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5183        }
5184     }
5185
5186     queue_iterate( &vm_page_queue_active,
5187                     m,
5188                     vm_page_t,
5189                     pageq )
5190     {
5191         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5192          && hibernate_consider_discard(m))
5193         {
5194             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5195             if (m->dirty)
5196                 count_discard_purgeable++;
5197             else
5198                 count_discard_active++;
5199         }
5200         else
5201             count_active++;
5202         count_wire--;
5203         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5204     }
5205
5206     // pull wired from hibernate_bitmap
5207
5208     bitmap = &page_list->bank_bitmap[0];
5209     bitmap_wired = &page_list_wired->bank_bitmap[0];
5210     for (bank = 0; bank < page_list->bank_count; bank++)
5211     {
5212         for (i = 0; i < bitmap->bitmapwords; i++)
5213             bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5214         bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5215         bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5216     }
5217
5218     // machine dependent adjustments
5219     hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5220
5221     hibernate_stats.cd_count_wire = count_wire;
5222     hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
5223
5224     clock_get_uptime(&end);
5225     absolutetime_to_nanoseconds(end - start, &nsec);
5226     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5227
5228     HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5229                 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
5230                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5231
5232     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
5233
5234     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5235 }
5236
5237 void
5238 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5239 {
5240     uint64_t  start, end, nsec;
5241     vm_page_t m;
5242     vm_page_t next;
5243     uint32_t  i;
5244     uint32_t  count_discard_active    = 0;
5245     uint32_t  count_discard_inactive  = 0;
5246     uint32_t  count_discard_purgeable = 0;
5247     uint32_t  count_discard_speculative = 0;
5248
5249     clock_get_uptime(&start);
5250
5251     m = (vm_page_t) queue_first(&vm_page_queue_zf);
5252     while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
5253     {
5254         next = (vm_page_t) m->pageq.next;
5255         if (hibernate_page_bittst(page_list, m->phys_page))
5256         {
5257             if (m->dirty)
5258                 count_discard_purgeable++;
5259             else
5260                 count_discard_inactive++;
5261             hibernate_discard_page(m);
5262         }
5263         m = next;
5264     }
5265
5266     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5267     {
5268        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5269        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5270        {
5271            next = (vm_page_t) m->pageq.next;
5272            if (hibernate_page_bittst(page_list, m->phys_page))
5273            {
5274                count_discard_speculative++;
5275                hibernate_discard_page(m);
5276            }
5277            m = next;
5278        }
5279     }
5280
5281     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5282     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5283     {
5284         next = (vm_page_t) m->pageq.next;
5285         if (hibernate_page_bittst(page_list, m->phys_page))
5286         {
5287             if (m->dirty)
5288                 count_discard_purgeable++;
5289             else
5290                 count_discard_inactive++;
5291             hibernate_discard_page(m);
5292         }
5293         m = next;
5294     }
5295
5296     m = (vm_page_t) queue_first(&vm_page_queue_active);
5297     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5298     {
5299         next = (vm_page_t) m->pageq.next;
5300         if (hibernate_page_bittst(page_list, m->phys_page))
5301         {
5302             if (m->dirty)
5303                 count_discard_purgeable++;
5304             else
5305                 count_discard_active++;
5306             hibernate_discard_page(m);
5307         }
5308         m = next;
5309     }
5310
5311     clock_get_uptime(&end);
5312     absolutetime_to_nanoseconds(end - start, &nsec);
5313     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5314                 nsec / 1000000ULL,
5315                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5316 }
5317
5318 #endif /* HIBERNATION */
5319
5320 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5321
5322 #include <mach_vm_debug.h>
5323 #if     MACH_VM_DEBUG
5324
5325 #include <mach_debug/hash_info.h>
5326 #include <vm/vm_debug.h>
5327
5328 /*
5329  *      Routine:        vm_page_info
5330  *      Purpose:
5331  *              Return information about the global VP table.
5332  *              Fills the buffer with as much information as possible
5333  *              and returns the desired size of the buffer.
5334  *      Conditions:
5335  *              Nothing locked.  The caller should provide
5336  *              possibly-pageable memory.
5337  */
5338
5339 unsigned int
5340 vm_page_info(
5341         hash_info_bucket_t *info,
5342         unsigned int count)
5343 {
5344         unsigned int i;
5345         lck_spin_t      *bucket_lock;
5346
5347         if (vm_page_bucket_count < count)
5348                 count = vm_page_bucket_count;
5349
5350         for (i = 0; i < count; i++) {
5351                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5352                 unsigned int bucket_count = 0;
5353                 vm_page_t m;
5354
5355                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5356                 lck_spin_lock(bucket_lock);
5357
5358                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5359                         bucket_count++;
5360
5361                 lck_spin_unlock(bucket_lock);
5362
5363                 /* don't touch pageable memory while holding locks */
5364                 info[i].hib_count = bucket_count;
5365         }
5366
5367         return vm_page_bucket_count;
5368 }
5369 #endif  /* MACH_VM_DEBUG */
5370
5371 #include <mach_kdb.h>
5372 #if     MACH_KDB
5373
5374 #include <ddb/db_output.h>
5375 #include <vm/vm_print.h>
5376 #define printf  kdbprintf
5377
5378 /*
5379  *      Routine:        vm_page_print [exported]
5380  */
5381 void
5382 vm_page_print(
5383         db_addr_t       db_addr)
5384 {
5385         vm_page_t       p;
5386
5387         p = (vm_page_t) (long) db_addr;
5388
5389         iprintf("page 0x%x\n", p);
5390
5391         db_indent += 2;
5392
5393         iprintf("object=0x%x", p->object);
5394         printf(", offset=0x%x", p->offset);
5395         printf(", wire_count=%d", p->wire_count);
5396
5397         iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5398                 (p->local ? "" : "!"),
5399                 (p->inactive ? "" : "!"),
5400                 (p->active ? "" : "!"),
5401                 (p->throttled ? "" : "!"),
5402                 (p->gobbled ? "" : "!"),
5403                 (p->laundry ? "" : "!"),
5404                 (p->free ? "" : "!"),
5405                 (p->reference ? "" : "!"),
5406                 (p->encrypted ? "" : "!"));
5407         iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5408                 (p->busy ? "" : "!"),
5409                 (p->wanted ? "" : "!"),
5410                 (p->tabled ? "" : "!"),
5411                 (p->fictitious ? "" : "!"),
5412                 (p->private ? "" : "!"),
5413                 (p->precious ? "" : "!"));
5414         iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5415                 (p->absent ? "" : "!"),
5416                 (p->error ? "" : "!"),
5417                 (p->dirty ? "" : "!"),
5418                 (p->cleaning ? "" : "!"),
5419                 (p->pageout ? "" : "!"),
5420                 (p->clustered ? "" : "!"));
5421         iprintf("%soverwriting, %srestart, %sunusual\n",
5422                 (p->overwriting ? "" : "!"),
5423                 (p->restart ? "" : "!"),
5424                 (p->unusual ? "" : "!"));
5425
5426         iprintf("phys_page=0x%x", p->phys_page);
5427
5428         db_indent -= 2;
5429 }
5430 #endif  /* MACH_KDB */