osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <vm/pmap.h>
  80 #include <vm/vm_init.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_page.h>
  83 #include <vm/vm_pageout.h>
  84 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  85 #include <kern/misc_protos.h>
  86 #include <zone_debug.h>
  87 #include <vm/cpm.h>
  88 #include <pexpert/pexpert.h>
  89
  90 #include <vm/vm_protos.h>
  91 #include <vm/memory_object.h>
  92 #include <vm/vm_purgeable_internal.h>
  93
  94 #include <IOKit/IOHibernatePrivate.h>
  95
  96
  97 #include <sys/kern_memorystatus.h>
  98
  99 #include <sys/kdebug.h>
 100
 101 boolean_t       vm_page_free_verify = TRUE;
 102
 103 uint32_t        vm_lopage_free_count = 0;
 104 uint32_t        vm_lopage_free_limit = 0;
 105 uint32_t        vm_lopage_lowater    = 0;
 106 boolean_t       vm_lopage_refill = FALSE;
 107 boolean_t       vm_lopage_needed = FALSE;
 108
 109 lck_mtx_ext_t   vm_page_queue_lock_ext;
 110 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 111 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 112
 113 int             speculative_age_index = 0;
 114 int             speculative_steal_index = 0;
 115 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 116
 117
 118 __private_extern__ void         vm_page_init_lck_grp(void);
 119
 120 static void             vm_page_free_prepare(vm_page_t  page);
 121 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 122
 123
 124
 125
 126 /*
 127  *      Associated with page of user-allocatable memory is a
 128  *      page structure.
 129  */
 130
 131 /*
 132  *      These variables record the values returned by vm_page_bootstrap,
 133  *      for debugging purposes.  The implementation of pmap_steal_memory
 134  *      and pmap_startup here also uses them internally.
 135  */
 136
 137 vm_offset_t virtual_space_start;
 138 vm_offset_t virtual_space_end;
 139 int     vm_page_pages;
 140
 141 /*
 142  *      The vm_page_lookup() routine, which provides for fast
 143  *      (virtual memory object, offset) to page lookup, employs
 144  *      the following hash table.  The vm_page_{insert,remove}
 145  *      routines install and remove associations in the table.
 146  *      [This table is often called the virtual-to-physical,
 147  *      or VP, table.]
 148  */
 149 typedef struct {
 150         vm_page_t       pages;
 151 #if     MACH_PAGE_HASH_STATS
 152         int             cur_count;              /* current count */
 153         int             hi_count;               /* high water mark */
 154 #endif /* MACH_PAGE_HASH_STATS */
 155 } vm_page_bucket_t;
 156
 157
 158 #define BUCKETS_PER_LOCK        16
 159
 160 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 161 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 162 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 163 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 164 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 165 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 166
 167 lck_spin_t      *vm_page_bucket_locks;
 168
 169
 170 #if     MACH_PAGE_HASH_STATS
 171 /* This routine is only for debug.  It is intended to be called by
 172  * hand by a developer using a kernel debugger.  This routine prints
 173  * out vm_page_hash table statistics to the kernel debug console.
 174  */
 175 void
 176 hash_debug(void)
 177 {
 178         int     i;
 179         int     numbuckets = 0;
 180         int     highsum = 0;
 181         int     maxdepth = 0;
 182
 183         for (i = 0; i < vm_page_bucket_count; i++) {
 184                 if (vm_page_buckets[i].hi_count) {
 185                         numbuckets++;
 186                         highsum += vm_page_buckets[i].hi_count;
 187                         if (vm_page_buckets[i].hi_count > maxdepth)
 188                                 maxdepth = vm_page_buckets[i].hi_count;
 189                 }
 190         }
 191         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 192         printf("Number used buckets:     %d = %d%%\n",
 193                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 194         printf("Number unused buckets:   %d = %d%%\n",
 195                 vm_page_bucket_count - numbuckets,
 196                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 197         printf("Sum of bucket max depth: %d\n", highsum);
 198         printf("Average bucket depth:    %d.%2d\n",
 199                 highsum/vm_page_bucket_count,
 200                 highsum%vm_page_bucket_count);
 201         printf("Maximum bucket depth:    %d\n", maxdepth);
 202 }
 203 #endif /* MACH_PAGE_HASH_STATS */
 204
 205 /*
 206  *      The virtual page size is currently implemented as a runtime
 207  *      variable, but is constant once initialized using vm_set_page_size.
 208  *      This initialization must be done in the machine-dependent
 209  *      bootstrap sequence, before calling other machine-independent
 210  *      initializations.
 211  *
 212  *      All references to the virtual page size outside this
 213  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 214  *      constants.
 215  */
 216 vm_size_t       page_size  = PAGE_SIZE;
 217 vm_size_t       page_mask  = PAGE_MASK;
 218 int             page_shift = PAGE_SHIFT;
 219
 220 /*
 221  *      Resident page structures are initialized from
 222  *      a template (see vm_page_alloc).
 223  *
 224  *      When adding a new field to the virtual memory
 225  *      object structure, be sure to add initialization
 226  *      (see vm_page_bootstrap).
 227  */
 228 struct vm_page  vm_page_template;
 229
 230 vm_page_t       vm_pages = VM_PAGE_NULL;
 231 unsigned int    vm_pages_count = 0;
 232 ppnum_t         vm_page_lowest = 0;
 233
 234 /*
 235  *      Resident pages that represent real memory
 236  *      are allocated from a set of free lists,
 237  *      one per color.
 238  */
 239 unsigned int    vm_colors;
 240 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 241 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 242 queue_head_t    vm_page_queue_free[MAX_COLORS];
 243 unsigned int    vm_page_free_wanted;
 244 unsigned int    vm_page_free_wanted_privileged;
 245 unsigned int    vm_page_free_count;
 246 unsigned int    vm_page_fictitious_count;
 247
 248 unsigned int    vm_page_free_count_minimum;     /* debugging */
 249
 250 /*
 251  *      Occasionally, the virtual memory system uses
 252  *      resident page structures that do not refer to
 253  *      real pages, for example to leave a page with
 254  *      important state information in the VP table.
 255  *
 256  *      These page structures are allocated the way
 257  *      most other kernel structures are.
 258  */
 259 zone_t  vm_page_zone;
 260 vm_locks_array_t vm_page_locks;
 261 decl_lck_mtx_data(,vm_page_alloc_lock)
 262 unsigned int io_throttle_zero_fill;
 263
 264 unsigned int    vm_page_local_q_count = 0;
 265 unsigned int    vm_page_local_q_soft_limit = 250;
 266 unsigned int    vm_page_local_q_hard_limit = 500;
 267 struct vplq     *vm_page_local_q = NULL;
 268
 269 /*
 270  *      Fictitious pages don't have a physical address,
 271  *      but we must initialize phys_page to something.
 272  *      For debugging, this should be a strange value
 273  *      that the pmap module can recognize in assertions.
 274  */
 275 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 276
 277 /*
 278  *      Guard pages are not accessible so they don't
 279  *      need a physical address, but we need to enter
 280  *      one in the pmap.
 281  *      Let's make it recognizable and make sure that
 282  *      we don't use a real physical page with that
 283  *      physical address.
 284  */
 285 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 286
 287 /*
 288  *      Resident page structures are also chained on
 289  *      queues that are used by the page replacement
 290  *      system (pageout daemon).  These queues are
 291  *      defined here, but are shared by the pageout
 292  *      module.  The inactive queue is broken into
 293  *      inactive and zf for convenience as the
 294  *      pageout daemon often assignes a higher
 295  *      affinity to zf pages
 296  */
 297 queue_head_t    vm_page_queue_active;
 298 queue_head_t    vm_page_queue_inactive;
 299 queue_head_t    vm_page_queue_zf;       /* inactive memory queue for zero fill */
 300 queue_head_t    vm_page_queue_throttled;
 301
 302 unsigned int    vm_page_active_count;
 303 unsigned int    vm_page_inactive_count;
 304 unsigned int    vm_page_throttled_count;
 305 unsigned int    vm_page_speculative_count;
 306 unsigned int    vm_page_wire_count;
 307 unsigned int    vm_page_wire_count_initial;
 308 unsigned int    vm_page_gobble_count = 0;
 309 unsigned int    vm_page_wire_count_warning = 0;
 310 unsigned int    vm_page_gobble_count_warning = 0;
 311
 312 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 313 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 314 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 315
 316 #if DEVELOPMENT || DEBUG
 317 unsigned int    vm_page_speculative_recreated = 0;
 318 unsigned int    vm_page_speculative_created = 0;
 319 unsigned int    vm_page_speculative_used = 0;
 320 #endif
 321
 322 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 323 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 324
 325
 326 /*
 327  *      Several page replacement parameters are also
 328  *      shared with this module, so that page allocation
 329  *      (done here in vm_page_alloc) can trigger the
 330  *      pageout daemon.
 331  */
 332 unsigned int    vm_page_free_target = 0;
 333 unsigned int    vm_page_free_min = 0;
 334 unsigned int    vm_page_throttle_limit = 0;
 335 uint32_t        vm_page_creation_throttle = 0;
 336 unsigned int    vm_page_inactive_target = 0;
 337 unsigned int    vm_page_inactive_min = 0;
 338 unsigned int    vm_page_free_reserved = 0;
 339 unsigned int    vm_page_throttle_count = 0;
 340
 341 /*
 342  *      The VM system has a couple of heuristics for deciding
 343  *      that pages are "uninteresting" and should be placed
 344  *      on the inactive queue as likely candidates for replacement.
 345  *      These variables let the heuristics be controlled at run-time
 346  *      to make experimentation easier.
 347  */
 348
 349 boolean_t vm_page_deactivate_hint = TRUE;
 350
 351 struct vm_page_stats_reusable vm_page_stats_reusable;
 352
 353 /*
 354  *      vm_set_page_size:
 355  *
 356  *      Sets the page size, perhaps based upon the memory
 357  *      size.  Must be called before any use of page-size
 358  *      dependent functions.
 359  *
 360  *      Sets page_shift and page_mask from page_size.
 361  */
 362 void
 363 vm_set_page_size(void)
 364 {
 365         page_mask = page_size - 1;
 366
 367         if ((page_mask & page_size) != 0)
 368                 panic("vm_set_page_size: page size not a power of two");
 369
 370         for (page_shift = 0; ; page_shift++)
 371                 if ((1U << page_shift) == page_size)
 372                         break;
 373 }
 374
 375
 376 /* Called once during statup, once the cache geometry is known.
 377  */
 378 static void
 379 vm_page_set_colors( void )
 380 {
 381         unsigned int    n, override;
 382
 383         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 384                 n = override;
 385         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 386                 n = vm_cache_geometry_colors;
 387         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 388
 389         if ( n == 0 )
 390                 n = 1;
 391         if ( n > MAX_COLORS )
 392                 n = MAX_COLORS;
 393
 394         /* the count must be a power of 2  */
 395         if ( ( n & (n - 1)) != 0  )
 396                 panic("vm_page_set_colors");
 397
 398         vm_colors = n;
 399         vm_color_mask = n - 1;
 400 }
 401
 402
 403 lck_grp_t               vm_page_lck_grp_free;
 404 lck_grp_t               vm_page_lck_grp_queue;
 405 lck_grp_t               vm_page_lck_grp_local;
 406 lck_grp_t               vm_page_lck_grp_purge;
 407 lck_grp_t               vm_page_lck_grp_alloc;
 408 lck_grp_t               vm_page_lck_grp_bucket;
 409 lck_grp_attr_t          vm_page_lck_grp_attr;
 410 lck_attr_t              vm_page_lck_attr;
 411
 412
 413 __private_extern__ void
 414 vm_page_init_lck_grp(void)
 415 {
 416         /*
 417          * initialze the vm_page lock world
 418          */
 419         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 420         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 421         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 422         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 423         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 424         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 425         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 426         lck_attr_setdefault(&vm_page_lck_attr);
 427 }
 428
 429 void
 430 vm_page_init_local_q()
 431 {
 432         unsigned int            num_cpus;
 433         unsigned int            i;
 434         struct vplq             *t_local_q;
 435
 436         num_cpus = ml_get_max_cpus();
 437
 438         /*
 439          * no point in this for a uni-processor system
 440          */
 441         if (num_cpus >= 2) {
 442                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 443
 444                 for (i = 0; i < num_cpus; i++) {
 445                         struct vpl      *lq;
 446
 447                         lq = &t_local_q[i].vpl_un.vpl;
 448                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 449                         queue_init(&lq->vpl_queue);
 450                         lq->vpl_count = 0;
 451                 }
 452                 vm_page_local_q_count = num_cpus;
 453
 454                 vm_page_local_q = (struct vplq *)t_local_q;
 455         }
 456 }
 457
 458
 459 /*
 460  *      vm_page_bootstrap:
 461  *
 462  *      Initializes the resident memory module.
 463  *
 464  *      Allocates memory for the page cells, and
 465  *      for the object/offset-to-page hash table headers.
 466  *      Each page cell is initialized and placed on the free list.
 467  *      Returns the range of available kernel virtual memory.
 468  */
 469
 470 void
 471 vm_page_bootstrap(
 472         vm_offset_t             *startp,
 473         vm_offset_t             *endp)
 474 {
 475         register vm_page_t      m;
 476         unsigned int            i;
 477         unsigned int            log1;
 478         unsigned int            log2;
 479         unsigned int            size;
 480
 481         /*
 482          *      Initialize the vm_page template.
 483          */
 484
 485         m = &vm_page_template;
 486         bzero(m, sizeof (*m));
 487
 488         m->pageq.next = NULL;
 489         m->pageq.prev = NULL;
 490         m->listq.next = NULL;
 491         m->listq.prev = NULL;
 492         m->next = VM_PAGE_NULL;
 493
 494         m->object = VM_OBJECT_NULL;             /* reset later */
 495         m->offset = (vm_object_offset_t) -1;    /* reset later */
 496
 497         m->wire_count = 0;
 498         m->local = FALSE;
 499         m->inactive = FALSE;
 500         m->active = FALSE;
 501         m->pageout_queue = FALSE;
 502         m->speculative = FALSE;
 503         m->laundry = FALSE;
 504         m->free = FALSE;
 505         m->reference = FALSE;
 506         m->gobbled = FALSE;
 507         m->private = FALSE;
 508         m->throttled = FALSE;
 509         m->__unused_pageq_bits = 0;
 510
 511         m->phys_page = 0;               /* reset later */
 512
 513         m->busy = TRUE;
 514         m->wanted = FALSE;
 515         m->tabled = FALSE;
 516         m->fictitious = FALSE;
 517         m->pmapped = FALSE;
 518         m->wpmapped = FALSE;
 519         m->pageout = FALSE;
 520         m->absent = FALSE;
 521         m->error = FALSE;
 522         m->dirty = FALSE;
 523         m->cleaning = FALSE;
 524         m->precious = FALSE;
 525         m->clustered = FALSE;
 526         m->overwriting = FALSE;
 527         m->restart = FALSE;
 528         m->unusual = FALSE;
 529         m->encrypted = FALSE;
 530         m->encrypted_cleaning = FALSE;
 531         m->list_req_pending = FALSE;
 532         m->dump_cleaning = FALSE;
 533         m->cs_validated = FALSE;
 534         m->cs_tainted = FALSE;
 535         m->no_cache = FALSE;
 536         m->zero_fill = FALSE;
 537         m->reusable = FALSE;
 538         m->slid = FALSE;
 539         m->__unused_object_bits = 0;
 540
 541
 542         /*
 543          *      Initialize the page queues.
 544          */
 545         vm_page_init_lck_grp();
 546
 547         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 548         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 549         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 550
 551         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 552                 int group;
 553
 554                 purgeable_queues[i].token_q_head = 0;
 555                 purgeable_queues[i].token_q_tail = 0;
 556                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 557                         queue_init(&purgeable_queues[i].objq[group]);
 558
 559                 purgeable_queues[i].type = i;
 560                 purgeable_queues[i].new_pages = 0;
 561 #if MACH_ASSERT
 562                 purgeable_queues[i].debug_count_tokens = 0;
 563                 purgeable_queues[i].debug_count_objects = 0;
 564 #endif
 565         };
 566
 567         for (i = 0; i < MAX_COLORS; i++ )
 568                 queue_init(&vm_page_queue_free[i]);
 569
 570         queue_init(&vm_lopage_queue_free);
 571         queue_init(&vm_page_queue_active);
 572         queue_init(&vm_page_queue_inactive);
 573         queue_init(&vm_page_queue_throttled);
 574         queue_init(&vm_page_queue_zf);
 575
 576         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 577                 queue_init(&vm_page_queue_speculative[i].age_q);
 578
 579                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 580                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 581         }
 582         vm_page_free_wanted = 0;
 583         vm_page_free_wanted_privileged = 0;
 584
 585         vm_page_set_colors();
 586
 587
 588         /*
 589          *      Steal memory for the map and zone subsystems.
 590          */
 591
 592         vm_map_steal_memory();
 593         zone_steal_memory();
 594
 595         /*
 596          *      Allocate (and initialize) the virtual-to-physical
 597          *      table hash buckets.
 598          *
 599          *      The number of buckets should be a power of two to
 600          *      get a good hash function.  The following computation
 601          *      chooses the first power of two that is greater
 602          *      than the number of physical pages in the system.
 603          */
 604
 605         if (vm_page_bucket_count == 0) {
 606                 unsigned int npages = pmap_free_pages();
 607
 608                 vm_page_bucket_count = 1;
 609                 while (vm_page_bucket_count < npages)
 610                         vm_page_bucket_count <<= 1;
 611         }
 612         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 613
 614         vm_page_hash_mask = vm_page_bucket_count - 1;
 615
 616         /*
 617          *      Calculate object shift value for hashing algorithm:
 618          *              O = log2(sizeof(struct vm_object))
 619          *              B = log2(vm_page_bucket_count)
 620          *              hash shifts the object left by
 621          *              B/2 - O
 622          */
 623         size = vm_page_bucket_count;
 624         for (log1 = 0; size > 1; log1++)
 625                 size /= 2;
 626         size = sizeof(struct vm_object);
 627         for (log2 = 0; size > 1; log2++)
 628                 size /= 2;
 629         vm_page_hash_shift = log1/2 - log2 + 1;
 630
 631         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 632         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 633         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 634
 635         if (vm_page_hash_mask & vm_page_bucket_count)
 636                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 637
 638         vm_page_buckets = (vm_page_bucket_t *)
 639                 pmap_steal_memory(vm_page_bucket_count *
 640                                   sizeof(vm_page_bucket_t));
 641
 642         vm_page_bucket_locks = (lck_spin_t *)
 643                 pmap_steal_memory(vm_page_bucket_lock_count *
 644                                   sizeof(lck_spin_t));
 645
 646         for (i = 0; i < vm_page_bucket_count; i++) {
 647                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 648
 649                 bucket->pages = VM_PAGE_NULL;
 650 #if     MACH_PAGE_HASH_STATS
 651                 bucket->cur_count = 0;
 652                 bucket->hi_count = 0;
 653 #endif /* MACH_PAGE_HASH_STATS */
 654         }
 655
 656         for (i = 0; i < vm_page_bucket_lock_count; i++)
 657                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 658
 659         /*
 660          *      Machine-dependent code allocates the resident page table.
 661          *      It uses vm_page_init to initialize the page frames.
 662          *      The code also returns to us the virtual space available
 663          *      to the kernel.  We don't trust the pmap module
 664          *      to get the alignment right.
 665          */
 666
 667         pmap_startup(&virtual_space_start, &virtual_space_end);
 668         virtual_space_start = round_page(virtual_space_start);
 669         virtual_space_end = trunc_page(virtual_space_end);
 670
 671         *startp = virtual_space_start;
 672         *endp = virtual_space_end;
 673
 674         /*
 675          *      Compute the initial "wire" count.
 676          *      Up until now, the pages which have been set aside are not under
 677          *      the VM system's control, so although they aren't explicitly
 678          *      wired, they nonetheless can't be moved. At this moment,
 679          *      all VM managed pages are "free", courtesy of pmap_startup.
 680          */
 681         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 682         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 683         vm_page_wire_count_initial = vm_page_wire_count;
 684         vm_page_free_count_minimum = vm_page_free_count;
 685
 686         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 687                vm_page_free_count, vm_page_wire_count);
 688
 689         simple_lock_init(&vm_paging_lock, 0);
 690 }
 691
 692 #ifndef MACHINE_PAGES
 693 /*
 694  *      We implement pmap_steal_memory and pmap_startup with the help
 695  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 696  */
 697
 698 void *
 699 pmap_steal_memory(
 700         vm_size_t size)
 701 {
 702         vm_offset_t addr, vaddr;
 703         ppnum_t phys_page;
 704
 705         /*
 706          *      We round the size to a round multiple.
 707          */
 708
 709         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 710
 711         /*
 712          *      If this is the first call to pmap_steal_memory,
 713          *      we have to initialize ourself.
 714          */
 715
 716         if (virtual_space_start == virtual_space_end) {
 717                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 718
 719                 /*
 720                  *      The initial values must be aligned properly, and
 721                  *      we don't trust the pmap module to do it right.
 722                  */
 723
 724                 virtual_space_start = round_page(virtual_space_start);
 725                 virtual_space_end = trunc_page(virtual_space_end);
 726         }
 727
 728         /*
 729          *      Allocate virtual memory for this request.
 730          */
 731
 732         addr = virtual_space_start;
 733         virtual_space_start += size;
 734
 735         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
 736
 737         /*
 738          *      Allocate and map physical pages to back new virtual pages.
 739          */
 740
 741         for (vaddr = round_page(addr);
 742              vaddr < addr + size;
 743              vaddr += PAGE_SIZE) {
 744
 745                 if (!pmap_next_page_hi(&phys_page))
 746                         panic("pmap_steal_memory");
 747
 748                 /*
 749                  *      XXX Logically, these mappings should be wired,
 750                  *      but some pmap modules barf if they are.
 751                  */
 752 #if defined(__LP64__)
 753                 pmap_pre_expand(kernel_pmap, vaddr);
 754 #endif
 755
 756                 pmap_enter(kernel_pmap, vaddr, phys_page,
 757                            VM_PROT_READ|VM_PROT_WRITE,
 758                                 VM_WIMG_USE_DEFAULT, FALSE);
 759                 /*
 760                  * Account for newly stolen memory
 761                  */
 762                 vm_page_wire_count++;
 763
 764         }
 765
 766         return (void *) addr;
 767 }
 768
 769 void
 770 pmap_startup(
 771         vm_offset_t *startp,
 772         vm_offset_t *endp)
 773 {
 774         unsigned int i, npages, pages_initialized, fill, fillval;
 775         ppnum_t         phys_page;
 776         addr64_t        tmpaddr;
 777
 778         /*
 779          *      We calculate how many page frames we will have
 780          *      and then allocate the page structures in one chunk.
 781          */
 782
 783         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 784         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 785         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 786
 787         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 788
 789         /*
 790          *      Initialize the page frames.
 791          */
 792         for (i = 0, pages_initialized = 0; i < npages; i++) {
 793                 if (!pmap_next_page(&phys_page))
 794                         break;
 795                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
 796                         vm_page_lowest = phys_page;
 797
 798                 vm_page_init(&vm_pages[i], phys_page, FALSE);
 799                 vm_page_pages++;
 800                 pages_initialized++;
 801         }
 802         vm_pages_count = pages_initialized;
 803
 804         /*
 805          * Check if we want to initialize pages to a known value
 806          */
 807         fill = 0;                                                               /* Assume no fill */
 808         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 809
 810         // -debug code remove
 811         if (2 == vm_himemory_mode) {
 812                 // free low -> high so high is preferred
 813                 for (i = 1; i <= pages_initialized; i++) {
 814                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 815                         vm_page_release(&vm_pages[i - 1]);
 816                 }
 817         }
 818         else
 819         // debug code remove-
 820
 821         /*
 822          * Release pages in reverse order so that physical pages
 823          * initially get allocated in ascending addresses. This keeps
 824          * the devices (which must address physical memory) happy if
 825          * they require several consecutive pages.
 826          */
 827         for (i = pages_initialized; i > 0; i--) {
 828                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 829                 vm_page_release(&vm_pages[i - 1]);
 830         }
 831
 832 #if 0
 833         {
 834                 vm_page_t xx, xxo, xxl;
 835                 int i, j, k, l;
 836
 837                 j = 0;                                                                                                  /* (BRINGUP) */
 838                 xxl = 0;
 839
 840                 for( i = 0; i < vm_colors; i++ ) {
 841                         queue_iterate(&vm_page_queue_free[i],
 842                                       xx,
 843                                       vm_page_t,
 844                                       pageq) {  /* BRINGUP */
 845                                 j++;                                                                                            /* (BRINGUP) */
 846                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 847                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 848                                 }
 849
 850                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 851                                 k = 0;                                                                                          /* (BRINGUP) */
 852
 853                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 854
 855                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 856                                         k++;
 857                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 858                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 859                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 860                                         }
 861                                 }
 862
 863                                 xxl = xx;
 864                         }
 865                 }
 866
 867                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 868                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 869                 }
 870         }
 871 #endif
 872
 873
 874         /*
 875          *      We have to re-align virtual_space_start,
 876          *      because pmap_steal_memory has been using it.
 877          */
 878
 879         virtual_space_start = round_page(virtual_space_start);
 880
 881         *startp = virtual_space_start;
 882         *endp = virtual_space_end;
 883 }
 884 #endif  /* MACHINE_PAGES */
 885
 886 /*
 887  *      Routine:        vm_page_module_init
 888  *      Purpose:
 889  *              Second initialization pass, to be done after
 890  *              the basic VM system is ready.
 891  */
 892 void
 893 vm_page_module_init(void)
 894 {
 895         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
 896                              0, PAGE_SIZE, "vm pages");
 897
 898 #if     ZONE_DEBUG
 899         zone_debug_disable(vm_page_zone);
 900 #endif  /* ZONE_DEBUG */
 901
 902         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
 903         zone_change(vm_page_zone, Z_EXPAND, FALSE);
 904         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
 905         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
 906
 907         /*
 908          * Adjust zone statistics to account for the real pages allocated
 909          * in vm_page_create(). [Q: is this really what we want?]
 910          */
 911         vm_page_zone->count += vm_page_pages;
 912         vm_page_zone->sum_count += vm_page_pages;
 913         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
 914
 915         lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 916 }
 917
 918 /*
 919  *      Routine:        vm_page_create
 920  *      Purpose:
 921  *              After the VM system is up, machine-dependent code
 922  *              may stumble across more physical memory.  For example,
 923  *              memory that it was reserving for a frame buffer.
 924  *              vm_page_create turns this memory into available pages.
 925  */
 926
 927 void
 928 vm_page_create(
 929         ppnum_t start,
 930         ppnum_t end)
 931 {
 932         ppnum_t         phys_page;
 933         vm_page_t       m;
 934
 935         for (phys_page = start;
 936              phys_page < end;
 937              phys_page++) {
 938                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
 939                         == VM_PAGE_NULL)
 940                         vm_page_more_fictitious();
 941
 942                 m->fictitious = FALSE;
 943                 pmap_clear_noencrypt(phys_page);
 944
 945                 vm_page_pages++;
 946                 vm_page_release(m);
 947         }
 948 }
 949
 950 /*
 951  *      vm_page_hash:
 952  *
 953  *      Distributes the object/offset key pair among hash buckets.
 954  *
 955  *      NOTE:   The bucket count must be a power of 2
 956  */
 957 #define vm_page_hash(object, offset) (\
 958         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
 959          & vm_page_hash_mask)
 960
 961
 962 /*
 963  *      vm_page_insert:         [ internal use only ]
 964  *
 965  *      Inserts the given mem entry into the object/object-page
 966  *      table and object list.
 967  *
 968  *      The object must be locked.
 969  */
 970 void
 971 vm_page_insert(
 972         vm_page_t               mem,
 973         vm_object_t             object,
 974         vm_object_offset_t      offset)
 975 {
 976         vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
 977 }
 978
 979 void
 980 vm_page_insert_internal(
 981         vm_page_t               mem,
 982         vm_object_t             object,
 983         vm_object_offset_t      offset,
 984         boolean_t               queues_lock_held,
 985         boolean_t               insert_in_hash)
 986 {
 987         vm_page_bucket_t *bucket;
 988         lck_spin_t      *bucket_lock;
 989         int     hash_id;
 990
 991         XPR(XPR_VM_PAGE,
 992                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
 993                 object, offset, mem, 0,0);
 994
 995         VM_PAGE_CHECK(mem);
 996
 997         if (object == vm_submap_object) {
 998                 /* the vm_submap_object is only a placeholder for submaps */
 999                 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1000         }
1001
1002         vm_object_lock_assert_exclusive(object);
1003 #if DEBUG
1004         lck_mtx_assert(&vm_page_queue_lock,
1005                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1006                                         : LCK_MTX_ASSERT_NOTOWNED);
1007 #endif  /* DEBUG */
1008
1009         if (insert_in_hash == TRUE) {
1010 #if DEBUG
1011                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1012                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1013                               "already in (obj=%p,off=0x%llx)",
1014                               mem, object, offset, mem->object, mem->offset);
1015 #endif
1016                 assert(!object->internal || offset < object->vo_size);
1017
1018                 /* only insert "pageout" pages into "pageout" objects,
1019                  * and normal pages into normal objects */
1020                 assert(object->pageout == mem->pageout);
1021
1022                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1023
1024                 /*
1025                  *      Record the object/offset pair in this page
1026                  */
1027
1028                 mem->object = object;
1029                 mem->offset = offset;
1030
1031                 /*
1032                  *      Insert it into the object_object/offset hash table
1033                  */
1034                 hash_id = vm_page_hash(object, offset);
1035                 bucket = &vm_page_buckets[hash_id];
1036                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1037
1038                 lck_spin_lock(bucket_lock);
1039
1040                 mem->next = bucket->pages;
1041                 bucket->pages = mem;
1042 #if     MACH_PAGE_HASH_STATS
1043                 if (++bucket->cur_count > bucket->hi_count)
1044                         bucket->hi_count = bucket->cur_count;
1045 #endif /* MACH_PAGE_HASH_STATS */
1046
1047                 lck_spin_unlock(bucket_lock);
1048         }
1049
1050         {       unsigned int    cache_attr;
1051
1052                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1053
1054                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1055                         pmap_set_cache_attributes(mem->phys_page, cache_attr);
1056                         object->set_cache_attr = TRUE;
1057                 }
1058         }
1059         /*
1060          *      Now link into the object's list of backed pages.
1061          */
1062
1063         VM_PAGE_INSERT(mem, object);
1064         mem->tabled = TRUE;
1065
1066         /*
1067          *      Show that the object has one more resident page.
1068          */
1069
1070         object->resident_page_count++;
1071         if (VM_PAGE_WIRED(mem)) {
1072                 object->wired_page_count++;
1073         }
1074         assert(object->resident_page_count >= object->wired_page_count);
1075
1076         assert(!mem->reusable);
1077
1078         if (object->purgable == VM_PURGABLE_VOLATILE) {
1079                 if (VM_PAGE_WIRED(mem)) {
1080                         OSAddAtomic(1, &vm_page_purgeable_wired_count);
1081                 } else {
1082                         OSAddAtomic(1, &vm_page_purgeable_count);
1083                 }
1084         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1085                    mem->throttled) {
1086                 /*
1087                  * This page belongs to a purged VM object but hasn't
1088                  * been purged (because it was "busy").
1089                  * It's in the "throttled" queue and hence not
1090                  * visible to vm_pageout_scan().  Move it to a pageable
1091                  * queue, so that it can eventually be reclaimed, instead
1092                  * of lingering in the "empty" object.
1093                  */
1094                 if (queues_lock_held == FALSE)
1095                         vm_page_lockspin_queues();
1096                 vm_page_deactivate(mem);
1097                 if (queues_lock_held == FALSE)
1098                         vm_page_unlock_queues();
1099         }
1100 }
1101
1102 /*
1103  *      vm_page_replace:
1104  *
1105  *      Exactly like vm_page_insert, except that we first
1106  *      remove any existing page at the given offset in object.
1107  *
1108  *      The object must be locked.
1109  */
1110 void
1111 vm_page_replace(
1112         register vm_page_t              mem,
1113         register vm_object_t            object,
1114         register vm_object_offset_t     offset)
1115 {
1116         vm_page_bucket_t *bucket;
1117         vm_page_t        found_m = VM_PAGE_NULL;
1118         lck_spin_t      *bucket_lock;
1119         int             hash_id;
1120
1121         VM_PAGE_CHECK(mem);
1122         vm_object_lock_assert_exclusive(object);
1123 #if DEBUG
1124         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1125                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1126                       "already in (obj=%p,off=0x%llx)",
1127                       mem, object, offset, mem->object, mem->offset);
1128         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1129 #endif
1130         /*
1131          *      Record the object/offset pair in this page
1132          */
1133
1134         mem->object = object;
1135         mem->offset = offset;
1136
1137         /*
1138          *      Insert it into the object_object/offset hash table,
1139          *      replacing any page that might have been there.
1140          */
1141
1142         hash_id = vm_page_hash(object, offset);
1143         bucket = &vm_page_buckets[hash_id];
1144         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1145
1146         lck_spin_lock(bucket_lock);
1147
1148         if (bucket->pages) {
1149                 vm_page_t *mp = &bucket->pages;
1150                 vm_page_t m = *mp;
1151
1152                 do {
1153                         if (m->object == object && m->offset == offset) {
1154                                 /*
1155                                  * Remove old page from hash list
1156                                  */
1157                                 *mp = m->next;
1158
1159                                 found_m = m;
1160                                 break;
1161                         }
1162                         mp = &m->next;
1163                 } while ((m = *mp));
1164
1165                 mem->next = bucket->pages;
1166         } else {
1167                 mem->next = VM_PAGE_NULL;
1168         }
1169         /*
1170          * insert new page at head of hash list
1171          */
1172         bucket->pages = mem;
1173
1174         lck_spin_unlock(bucket_lock);
1175
1176         if (found_m) {
1177                 /*
1178                  * there was already a page at the specified
1179                  * offset for this object... remove it from
1180                  * the object and free it back to the free list
1181                  */
1182                 vm_page_free_unlocked(found_m, FALSE);
1183         }
1184         vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1185 }
1186
1187 /*
1188  *      vm_page_remove:         [ internal use only ]
1189  *
1190  *      Removes the given mem entry from the object/offset-page
1191  *      table and the object page list.
1192  *
1193  *      The object must be locked.
1194  */
1195
1196 void
1197 vm_page_remove(
1198         vm_page_t       mem,
1199         boolean_t       remove_from_hash)
1200 {
1201         vm_page_bucket_t *bucket;
1202         vm_page_t       this;
1203         lck_spin_t      *bucket_lock;
1204         int             hash_id;
1205
1206         XPR(XPR_VM_PAGE,
1207                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1208                 mem->object, mem->offset,
1209                 mem, 0,0);
1210
1211         vm_object_lock_assert_exclusive(mem->object);
1212         assert(mem->tabled);
1213         assert(!mem->cleaning);
1214         VM_PAGE_CHECK(mem);
1215
1216         if (remove_from_hash == TRUE) {
1217                 /*
1218                  *      Remove from the object_object/offset hash table
1219                  */
1220                 hash_id = vm_page_hash(mem->object, mem->offset);
1221                 bucket = &vm_page_buckets[hash_id];
1222                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1223
1224                 lck_spin_lock(bucket_lock);
1225
1226                 if ((this = bucket->pages) == mem) {
1227                         /* optimize for common case */
1228
1229                         bucket->pages = mem->next;
1230                 } else {
1231                         vm_page_t       *prev;
1232
1233                         for (prev = &this->next;
1234                              (this = *prev) != mem;
1235                              prev = &this->next)
1236                                 continue;
1237                         *prev = this->next;
1238                 }
1239 #if     MACH_PAGE_HASH_STATS
1240                 bucket->cur_count--;
1241 #endif /* MACH_PAGE_HASH_STATS */
1242
1243                 lck_spin_unlock(bucket_lock);
1244         }
1245         /*
1246          *      Now remove from the object's list of backed pages.
1247          */
1248
1249         VM_PAGE_REMOVE(mem);
1250
1251         /*
1252          *      And show that the object has one fewer resident
1253          *      page.
1254          */
1255
1256         assert(mem->object->resident_page_count > 0);
1257         mem->object->resident_page_count--;
1258
1259         if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1260                 if (mem->object->resident_page_count == 0)
1261                         vm_object_cache_remove(mem->object);
1262         }
1263
1264         if (VM_PAGE_WIRED(mem)) {
1265                 assert(mem->object->wired_page_count > 0);
1266                 mem->object->wired_page_count--;
1267         }
1268         assert(mem->object->resident_page_count >=
1269                mem->object->wired_page_count);
1270         if (mem->reusable) {
1271                 assert(mem->object->reusable_page_count > 0);
1272                 mem->object->reusable_page_count--;
1273                 assert(mem->object->reusable_page_count <=
1274                        mem->object->resident_page_count);
1275                 mem->reusable = FALSE;
1276                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1277                 vm_page_stats_reusable.reused_remove++;
1278         } else if (mem->object->all_reusable) {
1279                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1280                 vm_page_stats_reusable.reused_remove++;
1281         }
1282
1283         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1284                 if (VM_PAGE_WIRED(mem)) {
1285                         assert(vm_page_purgeable_wired_count > 0);
1286                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1287                 } else {
1288                         assert(vm_page_purgeable_count > 0);
1289                         OSAddAtomic(-1, &vm_page_purgeable_count);
1290                 }
1291         }
1292         if (mem->object->set_cache_attr == TRUE)
1293                 pmap_set_cache_attributes(mem->phys_page, 0);
1294
1295         mem->tabled = FALSE;
1296         mem->object = VM_OBJECT_NULL;
1297         mem->offset = (vm_object_offset_t) -1;
1298 }
1299
1300
1301 /*
1302  *      vm_page_lookup:
1303  *
1304  *      Returns the page associated with the object/offset
1305  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1306  *
1307  *      The object must be locked.  No side effects.
1308  */
1309
1310 unsigned long vm_page_lookup_hint = 0;
1311 unsigned long vm_page_lookup_hint_next = 0;
1312 unsigned long vm_page_lookup_hint_prev = 0;
1313 unsigned long vm_page_lookup_hint_miss = 0;
1314 unsigned long vm_page_lookup_bucket_NULL = 0;
1315 unsigned long vm_page_lookup_miss = 0;
1316
1317
1318 vm_page_t
1319 vm_page_lookup(
1320         vm_object_t             object,
1321         vm_object_offset_t      offset)
1322 {
1323         vm_page_t       mem;
1324         vm_page_bucket_t *bucket;
1325         queue_entry_t   qe;
1326         lck_spin_t      *bucket_lock;
1327         int             hash_id;
1328
1329         vm_object_lock_assert_held(object);
1330         mem = object->memq_hint;
1331
1332         if (mem != VM_PAGE_NULL) {
1333                 assert(mem->object == object);
1334
1335                 if (mem->offset == offset) {
1336                         vm_page_lookup_hint++;
1337                         return mem;
1338                 }
1339                 qe = queue_next(&mem->listq);
1340
1341                 if (! queue_end(&object->memq, qe)) {
1342                         vm_page_t       next_page;
1343
1344                         next_page = (vm_page_t) qe;
1345                         assert(next_page->object == object);
1346
1347                         if (next_page->offset == offset) {
1348                                 vm_page_lookup_hint_next++;
1349                                 object->memq_hint = next_page; /* new hint */
1350                                 return next_page;
1351                         }
1352                 }
1353                 qe = queue_prev(&mem->listq);
1354
1355                 if (! queue_end(&object->memq, qe)) {
1356                         vm_page_t prev_page;
1357
1358                         prev_page = (vm_page_t) qe;
1359                         assert(prev_page->object == object);
1360
1361                         if (prev_page->offset == offset) {
1362                                 vm_page_lookup_hint_prev++;
1363                                 object->memq_hint = prev_page; /* new hint */
1364                                 return prev_page;
1365                         }
1366                 }
1367         }
1368         /*
1369          * Search the hash table for this object/offset pair
1370          */
1371         hash_id = vm_page_hash(object, offset);
1372         bucket = &vm_page_buckets[hash_id];
1373
1374         /*
1375          * since we hold the object lock, we are guaranteed that no
1376          * new pages can be inserted into this object... this in turn
1377          * guarantess that the page we're looking for can't exist
1378          * if the bucket it hashes to is currently NULL even when looked
1379          * at outside the scope of the hash bucket lock... this is a
1380          * really cheap optimiztion to avoid taking the lock
1381          */
1382         if (bucket->pages == VM_PAGE_NULL) {
1383                 vm_page_lookup_bucket_NULL++;
1384
1385                 return (VM_PAGE_NULL);
1386         }
1387         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1388
1389         lck_spin_lock(bucket_lock);
1390
1391         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1392                 VM_PAGE_CHECK(mem);
1393                 if ((mem->object == object) && (mem->offset == offset))
1394                         break;
1395         }
1396         lck_spin_unlock(bucket_lock);
1397
1398         if (mem != VM_PAGE_NULL) {
1399                 if (object->memq_hint != VM_PAGE_NULL) {
1400                         vm_page_lookup_hint_miss++;
1401                 }
1402                 assert(mem->object == object);
1403                 object->memq_hint = mem;
1404         } else
1405                 vm_page_lookup_miss++;
1406
1407         return(mem);
1408 }
1409
1410
1411 /*
1412  *      vm_page_rename:
1413  *
1414  *      Move the given memory entry from its
1415  *      current object to the specified target object/offset.
1416  *
1417  *      The object must be locked.
1418  */
1419 void
1420 vm_page_rename(
1421         register vm_page_t              mem,
1422         register vm_object_t            new_object,
1423         vm_object_offset_t              new_offset,
1424         boolean_t                       encrypted_ok)
1425 {
1426         assert(mem->object != new_object);
1427
1428         /*
1429          * ENCRYPTED SWAP:
1430          * The encryption key is based on the page's memory object
1431          * (aka "pager") and paging offset.  Moving the page to
1432          * another VM object changes its "pager" and "paging_offset"
1433          * so it has to be decrypted first, or we would lose the key.
1434          *
1435          * One exception is VM object collapsing, where we transfer pages
1436          * from one backing object to its parent object.  This operation also
1437          * transfers the paging information, so the <pager,paging_offset> info
1438          * should remain consistent.  The caller (vm_object_do_collapse())
1439          * sets "encrypted_ok" in this case.
1440          */
1441         if (!encrypted_ok && mem->encrypted) {
1442                 panic("vm_page_rename: page %p is encrypted\n", mem);
1443         }
1444
1445         XPR(XPR_VM_PAGE,
1446                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1447                 new_object, new_offset,
1448                 mem, 0,0);
1449
1450         /*
1451          *      Changes to mem->object require the page lock because
1452          *      the pageout daemon uses that lock to get the object.
1453          */
1454         vm_page_lockspin_queues();
1455
1456         vm_page_remove(mem, TRUE);
1457         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1458
1459         vm_page_unlock_queues();
1460 }
1461
1462 /*
1463  *      vm_page_init:
1464  *
1465  *      Initialize the fields in a new page.
1466  *      This takes a structure with random values and initializes it
1467  *      so that it can be given to vm_page_release or vm_page_insert.
1468  */
1469 void
1470 vm_page_init(
1471         vm_page_t       mem,
1472         ppnum_t         phys_page,
1473         boolean_t       lopage)
1474 {
1475         assert(phys_page);
1476         *mem = vm_page_template;
1477         mem->phys_page = phys_page;
1478 #if 0
1479         /*
1480          * we're leaving this turned off for now... currently pages
1481          * come off the free list and are either immediately dirtied/referenced
1482          * due to zero-fill or COW faults, or are used to read or write files...
1483          * in the file I/O case, the UPL mechanism takes care of clearing
1484          * the state of the HW ref/mod bits in a somewhat fragile way.
1485          * Since we may change the way this works in the future (to toughen it up),
1486          * I'm leaving this as a reminder of where these bits could get cleared
1487          */
1488
1489         /*
1490          * make sure both the h/w referenced and modified bits are
1491          * clear at this point... we are especially dependent on
1492          * not finding a 'stale' h/w modified in a number of spots
1493          * once this page goes back into use
1494          */
1495         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1496 #endif
1497         mem->lopage = lopage;
1498 }
1499
1500 /*
1501  *      vm_page_grab_fictitious:
1502  *
1503  *      Remove a fictitious page from the free list.
1504  *      Returns VM_PAGE_NULL if there are no free pages.
1505  */
1506 int     c_vm_page_grab_fictitious = 0;
1507 int     c_vm_page_grab_fictitious_failed = 0;
1508 int     c_vm_page_release_fictitious = 0;
1509 int     c_vm_page_more_fictitious = 0;
1510
1511 vm_page_t
1512 vm_page_grab_fictitious_common(
1513         ppnum_t phys_addr)
1514 {
1515         vm_page_t       m;
1516
1517         if ((m = (vm_page_t)zget(vm_page_zone))) {
1518
1519                 vm_page_init(m, phys_addr, FALSE);
1520                 m->fictitious = TRUE;
1521
1522                 c_vm_page_grab_fictitious++;
1523         } else
1524                 c_vm_page_grab_fictitious_failed++;
1525
1526         return m;
1527 }
1528
1529 vm_page_t
1530 vm_page_grab_fictitious(void)
1531 {
1532         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1533 }
1534
1535 vm_page_t
1536 vm_page_grab_guard(void)
1537 {
1538         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1539 }
1540
1541
1542 /*
1543  *      vm_page_release_fictitious:
1544  *
1545  *      Release a fictitious page to the zone pool
1546  */
1547 void
1548 vm_page_release_fictitious(
1549         vm_page_t m)
1550 {
1551         assert(!m->free);
1552         assert(m->fictitious);
1553         assert(m->phys_page == vm_page_fictitious_addr ||
1554                m->phys_page == vm_page_guard_addr);
1555
1556         c_vm_page_release_fictitious++;
1557
1558         zfree(vm_page_zone, m);
1559 }
1560
1561 /*
1562  *      vm_page_more_fictitious:
1563  *
1564  *      Add more fictitious pages to the zone.
1565  *      Allowed to block. This routine is way intimate
1566  *      with the zones code, for several reasons:
1567  *      1. we need to carve some page structures out of physical
1568  *         memory before zones work, so they _cannot_ come from
1569  *         the zone_map.
1570  *      2. the zone needs to be collectable in order to prevent
1571  *         growth without bound. These structures are used by
1572  *         the device pager (by the hundreds and thousands), as
1573  *         private pages for pageout, and as blocking pages for
1574  *         pagein. Temporary bursts in demand should not result in
1575  *         permanent allocation of a resource.
1576  *      3. To smooth allocation humps, we allocate single pages
1577  *         with kernel_memory_allocate(), and cram them into the
1578  *         zone.
1579  */
1580
1581 void vm_page_more_fictitious(void)
1582 {
1583         vm_offset_t     addr;
1584         kern_return_t   retval;
1585
1586         c_vm_page_more_fictitious++;
1587
1588         /*
1589          * Allocate a single page from the zone_map. Do not wait if no physical
1590          * pages are immediately available, and do not zero the space. We need
1591          * our own blocking lock here to prevent having multiple,
1592          * simultaneous requests from piling up on the zone_map lock. Exactly
1593          * one (of our) threads should be potentially waiting on the map lock.
1594          * If winner is not vm-privileged, then the page allocation will fail,
1595          * and it will temporarily block here in the vm_page_wait().
1596          */
1597         lck_mtx_lock(&vm_page_alloc_lock);
1598         /*
1599          * If another thread allocated space, just bail out now.
1600          */
1601         if (zone_free_count(vm_page_zone) > 5) {
1602                 /*
1603                  * The number "5" is a small number that is larger than the
1604                  * number of fictitious pages that any single caller will
1605                  * attempt to allocate. Otherwise, a thread will attempt to
1606                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1607                  * release all of the resources and locks already acquired,
1608                  * and then call this routine. This routine finds the pages
1609                  * that the caller released, so fails to allocate new space.
1610                  * The process repeats infinitely. The largest known number
1611                  * of fictitious pages required in this manner is 2. 5 is
1612                  * simply a somewhat larger number.
1613                  */
1614                 lck_mtx_unlock(&vm_page_alloc_lock);
1615                 return;
1616         }
1617
1618         retval = kernel_memory_allocate(zone_map,
1619                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1620                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1621         if (retval != KERN_SUCCESS) {
1622                 /*
1623                  * No page was available. Drop the
1624                  * lock to give another thread a chance at it, and
1625                  * wait for the pageout daemon to make progress.
1626                  */
1627                 lck_mtx_unlock(&vm_page_alloc_lock);
1628                 vm_page_wait(THREAD_UNINT);
1629                 return;
1630         }
1631         zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1632
1633         lck_mtx_unlock(&vm_page_alloc_lock);
1634 }
1635
1636
1637 /*
1638  *      vm_pool_low():
1639  *
1640  *      Return true if it is not likely that a non-vm_privileged thread
1641  *      can get memory without blocking.  Advisory only, since the
1642  *      situation may change under us.
1643  */
1644 int
1645 vm_pool_low(void)
1646 {
1647         /* No locking, at worst we will fib. */
1648         return( vm_page_free_count <= vm_page_free_reserved );
1649 }
1650
1651
1652
1653 /*
1654  * this is an interface to support bring-up of drivers
1655  * on platforms with physical memory > 4G...
1656  */
1657 int             vm_himemory_mode = 0;
1658
1659
1660 /*
1661  * this interface exists to support hardware controllers
1662  * incapable of generating DMAs with more than 32 bits
1663  * of address on platforms with physical memory > 4G...
1664  */
1665 unsigned int    vm_lopages_allocated_q = 0;
1666 unsigned int    vm_lopages_allocated_cpm_success = 0;
1667 unsigned int    vm_lopages_allocated_cpm_failed = 0;
1668 queue_head_t    vm_lopage_queue_free;
1669
1670 vm_page_t
1671 vm_page_grablo(void)
1672 {
1673         vm_page_t       mem;
1674
1675         if (vm_lopage_needed == FALSE)
1676                 return (vm_page_grab());
1677
1678         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1679
1680         if ( !queue_empty(&vm_lopage_queue_free)) {
1681                 queue_remove_first(&vm_lopage_queue_free,
1682                                    mem,
1683                                    vm_page_t,
1684                                    pageq);
1685                 assert(vm_lopage_free_count);
1686
1687                 vm_lopage_free_count--;
1688                 vm_lopages_allocated_q++;
1689
1690                 if (vm_lopage_free_count < vm_lopage_lowater)
1691                         vm_lopage_refill = TRUE;
1692
1693                 lck_mtx_unlock(&vm_page_queue_free_lock);
1694         } else {
1695                 lck_mtx_unlock(&vm_page_queue_free_lock);
1696
1697                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1698
1699                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1700                         vm_lopages_allocated_cpm_failed++;
1701                         lck_mtx_unlock(&vm_page_queue_free_lock);
1702
1703                         return (VM_PAGE_NULL);
1704                 }
1705                 mem->busy = TRUE;
1706
1707                 vm_page_lockspin_queues();
1708
1709                 mem->gobbled = FALSE;
1710                 vm_page_gobble_count--;
1711                 vm_page_wire_count--;
1712
1713                 vm_lopages_allocated_cpm_success++;
1714                 vm_page_unlock_queues();
1715         }
1716         assert(mem->busy);
1717         assert(!mem->free);
1718         assert(!mem->pmapped);
1719         assert(!mem->wpmapped);
1720
1721         mem->pageq.next = NULL;
1722         mem->pageq.prev = NULL;
1723
1724         return (mem);
1725 }
1726
1727
1728 /*
1729  *      vm_page_grab:
1730  *
1731  *      first try to grab a page from the per-cpu free list...
1732  *      this must be done while pre-emption is disabled... if
1733  *      a page is available, we're done...
1734  *      if no page is available, grab the vm_page_queue_free_lock
1735  *      and see if current number of free pages would allow us
1736  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
1737  *      if there are pages available, disable preemption and
1738  *      recheck the state of the per-cpu free list... we could
1739  *      have been preempted and moved to a different cpu, or
1740  *      some other thread could have re-filled it... if still
1741  *      empty, figure out how many pages we can steal from the
1742  *      global free queue and move to the per-cpu queue...
1743  *      return 1 of these pages when done... only wakeup the
1744  *      pageout_scan thread if we moved pages from the global
1745  *      list... no need for the wakeup if we've satisfied the
1746  *      request from the per-cpu queue.
1747  */
1748
1749 #define COLOR_GROUPS_TO_STEAL   4
1750
1751
1752 vm_page_t
1753 vm_page_grab( void )
1754 {
1755         vm_page_t       mem;
1756
1757
1758         disable_preemption();
1759
1760         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1761 return_page_from_cpu_list:
1762                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1763                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1764                 mem->pageq.next = NULL;
1765
1766                 enable_preemption();
1767
1768                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1769                 assert(mem->tabled == FALSE);
1770                 assert(mem->object == VM_OBJECT_NULL);
1771                 assert(!mem->laundry);
1772                 assert(!mem->free);
1773                 assert(pmap_verify_free(mem->phys_page));
1774                 assert(mem->busy);
1775                 assert(!mem->encrypted);
1776                 assert(!mem->pmapped);
1777                 assert(!mem->wpmapped);
1778                 assert(!mem->active);
1779                 assert(!mem->inactive);
1780                 assert(!mem->throttled);
1781                 assert(!mem->speculative);
1782
1783                 return mem;
1784         }
1785         enable_preemption();
1786
1787
1788         /*
1789          *      Optionally produce warnings if the wire or gobble
1790          *      counts exceed some threshold.
1791          */
1792         if (vm_page_wire_count_warning > 0
1793             && vm_page_wire_count >= vm_page_wire_count_warning) {
1794                 printf("mk: vm_page_grab(): high wired page count of %d\n",
1795                         vm_page_wire_count);
1796                 assert(vm_page_wire_count < vm_page_wire_count_warning);
1797         }
1798         if (vm_page_gobble_count_warning > 0
1799             && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1800                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1801                         vm_page_gobble_count);
1802                 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1803         }
1804
1805         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1806
1807         /*
1808          *      Only let privileged threads (involved in pageout)
1809          *      dip into the reserved pool.
1810          */
1811         if ((vm_page_free_count < vm_page_free_reserved) &&
1812             !(current_thread()->options & TH_OPT_VMPRIV)) {
1813                 lck_mtx_unlock(&vm_page_queue_free_lock);
1814                 mem = VM_PAGE_NULL;
1815         }
1816         else {
1817                vm_page_t        head;
1818                vm_page_t        tail;
1819                unsigned int     pages_to_steal;
1820                unsigned int     color;
1821
1822                while ( vm_page_free_count == 0 ) {
1823
1824                         lck_mtx_unlock(&vm_page_queue_free_lock);
1825                         /*
1826                          * must be a privileged thread to be
1827                          * in this state since a non-privileged
1828                          * thread would have bailed if we were
1829                          * under the vm_page_free_reserved mark
1830                          */
1831                         VM_PAGE_WAIT();
1832                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1833                 }
1834
1835                 disable_preemption();
1836
1837                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1838                         lck_mtx_unlock(&vm_page_queue_free_lock);
1839
1840                         /*
1841                          * we got preempted and moved to another processor
1842                          * or we got preempted and someone else ran and filled the cache
1843                          */
1844                         goto return_page_from_cpu_list;
1845                 }
1846                 if (vm_page_free_count <= vm_page_free_reserved)
1847                         pages_to_steal = 1;
1848                 else {
1849                         pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1850
1851                         if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1852                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1853                 }
1854                 color = PROCESSOR_DATA(current_processor(), start_color);
1855                 head = tail = NULL;
1856
1857                 while (pages_to_steal--) {
1858                         if (--vm_page_free_count < vm_page_free_count_minimum)
1859                                 vm_page_free_count_minimum = vm_page_free_count;
1860
1861                         while (queue_empty(&vm_page_queue_free[color]))
1862                                 color = (color + 1) & vm_color_mask;
1863
1864                         queue_remove_first(&vm_page_queue_free[color],
1865                                            mem,
1866                                            vm_page_t,
1867                                            pageq);
1868                         mem->pageq.next = NULL;
1869                         mem->pageq.prev = NULL;
1870
1871                         assert(!mem->active);
1872                         assert(!mem->inactive);
1873                         assert(!mem->throttled);
1874                         assert(!mem->speculative);
1875
1876                         color = (color + 1) & vm_color_mask;
1877
1878                         if (head == NULL)
1879                                 head = mem;
1880                         else
1881                                 tail->pageq.next = (queue_t)mem;
1882                         tail = mem;
1883
1884                         mem->pageq.prev = NULL;
1885                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1886                         assert(mem->tabled == FALSE);
1887                         assert(mem->object == VM_OBJECT_NULL);
1888                         assert(!mem->laundry);
1889                         assert(mem->free);
1890                         mem->free = FALSE;
1891
1892                         assert(pmap_verify_free(mem->phys_page));
1893                         assert(mem->busy);
1894                         assert(!mem->free);
1895                         assert(!mem->encrypted);
1896                         assert(!mem->pmapped);
1897                         assert(!mem->wpmapped);
1898                 }
1899                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1900                 PROCESSOR_DATA(current_processor(), start_color) = color;
1901
1902                 /*
1903                  * satisfy this request
1904                  */
1905                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1906                 mem = head;
1907                 mem->pageq.next = NULL;
1908
1909                 lck_mtx_unlock(&vm_page_queue_free_lock);
1910
1911                 enable_preemption();
1912         }
1913         /*
1914          *      Decide if we should poke the pageout daemon.
1915          *      We do this if the free count is less than the low
1916          *      water mark, or if the free count is less than the high
1917          *      water mark (but above the low water mark) and the inactive
1918          *      count is less than its target.
1919          *
1920          *      We don't have the counts locked ... if they change a little,
1921          *      it doesn't really matter.
1922          */
1923         if ((vm_page_free_count < vm_page_free_min) ||
1924             ((vm_page_free_count < vm_page_free_target) &&
1925              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1926                 thread_wakeup((event_t) &vm_page_free_wanted);
1927
1928         VM_CHECK_MEMORYSTATUS;
1929
1930 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
1931
1932         return mem;
1933 }
1934
1935 /*
1936  *      vm_page_release:
1937  *
1938  *      Return a page to the free list.
1939  */
1940
1941 void
1942 vm_page_release(
1943         register vm_page_t      mem)
1944 {
1945         unsigned int    color;
1946         int     need_wakeup = 0;
1947         int     need_priv_wakeup = 0;
1948
1949
1950         assert(!mem->private && !mem->fictitious);
1951         if (vm_page_free_verify) {
1952                 assert(pmap_verify_free(mem->phys_page));
1953         }
1954 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
1955
1956
1957         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1958 #if DEBUG
1959         if (mem->free)
1960                 panic("vm_page_release");
1961 #endif
1962
1963         assert(mem->busy);
1964         assert(!mem->laundry);
1965         assert(mem->object == VM_OBJECT_NULL);
1966         assert(mem->pageq.next == NULL &&
1967                mem->pageq.prev == NULL);
1968         assert(mem->listq.next == NULL &&
1969                mem->listq.prev == NULL);
1970
1971         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
1972             vm_lopage_free_count < vm_lopage_free_limit &&
1973             mem->phys_page < max_valid_low_ppnum) {
1974                 /*
1975                  * this exists to support hardware controllers
1976                  * incapable of generating DMAs with more than 32 bits
1977                  * of address on platforms with physical memory > 4G...
1978                  */
1979                 queue_enter_first(&vm_lopage_queue_free,
1980                                   mem,
1981                                   vm_page_t,
1982                                   pageq);
1983                 vm_lopage_free_count++;
1984
1985                 if (vm_lopage_free_count >= vm_lopage_free_limit)
1986                         vm_lopage_refill = FALSE;
1987
1988                 mem->lopage = TRUE;
1989         } else {
1990                 mem->lopage = FALSE;
1991                 mem->free = TRUE;
1992
1993                 color = mem->phys_page & vm_color_mask;
1994                 queue_enter_first(&vm_page_queue_free[color],
1995                                   mem,
1996                                   vm_page_t,
1997                                   pageq);
1998                 vm_page_free_count++;
1999                 /*
2000                  *      Check if we should wake up someone waiting for page.
2001                  *      But don't bother waking them unless they can allocate.
2002                  *
2003                  *      We wakeup only one thread, to prevent starvation.
2004                  *      Because the scheduling system handles wait queues FIFO,
2005                  *      if we wakeup all waiting threads, one greedy thread
2006                  *      can starve multiple niceguy threads.  When the threads
2007                  *      all wakeup, the greedy threads runs first, grabs the page,
2008                  *      and waits for another page.  It will be the first to run
2009                  *      when the next page is freed.
2010                  *
2011                  *      However, there is a slight danger here.
2012                  *      The thread we wake might not use the free page.
2013                  *      Then the other threads could wait indefinitely
2014                  *      while the page goes unused.  To forestall this,
2015                  *      the pageout daemon will keep making free pages
2016                  *      as long as vm_page_free_wanted is non-zero.
2017                  */
2018
2019                 assert(vm_page_free_count > 0);
2020                 if (vm_page_free_wanted_privileged > 0) {
2021                         vm_page_free_wanted_privileged--;
2022                         need_priv_wakeup = 1;
2023                 } else if (vm_page_free_wanted > 0 &&
2024                            vm_page_free_count > vm_page_free_reserved) {
2025                         vm_page_free_wanted--;
2026                         need_wakeup = 1;
2027                 }
2028         }
2029         lck_mtx_unlock(&vm_page_queue_free_lock);
2030
2031         if (need_priv_wakeup)
2032                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2033         else if (need_wakeup)
2034                 thread_wakeup_one((event_t) &vm_page_free_count);
2035
2036         VM_CHECK_MEMORYSTATUS;
2037 }
2038
2039 /*
2040  *      vm_page_wait:
2041  *
2042  *      Wait for a page to become available.
2043  *      If there are plenty of free pages, then we don't sleep.
2044  *
2045  *      Returns:
2046  *              TRUE:  There may be another page, try again
2047  *              FALSE: We were interrupted out of our wait, don't try again
2048  */
2049
2050 boolean_t
2051 vm_page_wait(
2052         int     interruptible )
2053 {
2054         /*
2055          *      We can't use vm_page_free_reserved to make this
2056          *      determination.  Consider: some thread might
2057          *      need to allocate two pages.  The first allocation
2058          *      succeeds, the second fails.  After the first page is freed,
2059          *      a call to vm_page_wait must really block.
2060          */
2061         kern_return_t   wait_result;
2062         int             need_wakeup = 0;
2063         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2064
2065         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2066
2067         if (is_privileged && vm_page_free_count) {
2068                 lck_mtx_unlock(&vm_page_queue_free_lock);
2069                 return TRUE;
2070         }
2071         if (vm_page_free_count < vm_page_free_target) {
2072
2073                 if (is_privileged) {
2074                         if (vm_page_free_wanted_privileged++ == 0)
2075                                 need_wakeup = 1;
2076                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2077                 } else {
2078                         if (vm_page_free_wanted++ == 0)
2079                                 need_wakeup = 1;
2080                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2081                 }
2082                 lck_mtx_unlock(&vm_page_queue_free_lock);
2083                 counter(c_vm_page_wait_block++);
2084
2085                 if (need_wakeup)
2086                         thread_wakeup((event_t)&vm_page_free_wanted);
2087
2088                 if (wait_result == THREAD_WAITING)
2089                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2090
2091                 return(wait_result == THREAD_AWAKENED);
2092         } else {
2093                 lck_mtx_unlock(&vm_page_queue_free_lock);
2094                 return TRUE;
2095         }
2096 }
2097
2098 /*
2099  *      vm_page_alloc:
2100  *
2101  *      Allocate and return a memory cell associated
2102  *      with this VM object/offset pair.
2103  *
2104  *      Object must be locked.
2105  */
2106
2107 vm_page_t
2108 vm_page_alloc(
2109         vm_object_t             object,
2110         vm_object_offset_t      offset)
2111 {
2112         register vm_page_t      mem;
2113
2114         vm_object_lock_assert_exclusive(object);
2115         mem = vm_page_grab();
2116         if (mem == VM_PAGE_NULL)
2117                 return VM_PAGE_NULL;
2118
2119         vm_page_insert(mem, object, offset);
2120
2121         return(mem);
2122 }
2123
2124 vm_page_t
2125 vm_page_alloclo(
2126         vm_object_t             object,
2127         vm_object_offset_t      offset)
2128 {
2129         register vm_page_t      mem;
2130
2131         vm_object_lock_assert_exclusive(object);
2132         mem = vm_page_grablo();
2133         if (mem == VM_PAGE_NULL)
2134                 return VM_PAGE_NULL;
2135
2136         vm_page_insert(mem, object, offset);
2137
2138         return(mem);
2139 }
2140
2141
2142 /*
2143  *      vm_page_alloc_guard:
2144  *
2145  *      Allocate a fictitious page which will be used
2146  *      as a guard page.  The page will be inserted into
2147  *      the object and returned to the caller.
2148  */
2149
2150 vm_page_t
2151 vm_page_alloc_guard(
2152         vm_object_t             object,
2153         vm_object_offset_t      offset)
2154 {
2155         register vm_page_t      mem;
2156
2157         vm_object_lock_assert_exclusive(object);
2158         mem = vm_page_grab_guard();
2159         if (mem == VM_PAGE_NULL)
2160                 return VM_PAGE_NULL;
2161
2162         vm_page_insert(mem, object, offset);
2163
2164         return(mem);
2165 }
2166
2167
2168 counter(unsigned int c_laundry_pages_freed = 0;)
2169
2170 /*
2171  *      vm_page_free_prepare:
2172  *
2173  *      Removes page from any queue it may be on
2174  *      and disassociates it from its VM object.
2175  *
2176  *      Object and page queues must be locked prior to entry.
2177  */
2178 static void
2179 vm_page_free_prepare(
2180         vm_page_t       mem)
2181 {
2182         vm_page_free_prepare_queues(mem);
2183         vm_page_free_prepare_object(mem, TRUE);
2184 }
2185
2186
2187 void
2188 vm_page_free_prepare_queues(
2189         vm_page_t       mem)
2190 {
2191         VM_PAGE_CHECK(mem);
2192         assert(!mem->free);
2193         assert(!mem->cleaning);
2194         assert(!mem->pageout);
2195 #if DEBUG
2196         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2197         if (mem->free)
2198                 panic("vm_page_free: freeing page on free list\n");
2199 #endif
2200         if (mem->object) {
2201                 vm_object_lock_assert_exclusive(mem->object);
2202         }
2203
2204         if (mem->laundry) {
2205                 /*
2206                  * We may have to free a page while it's being laundered
2207                  * if we lost its pager (due to a forced unmount, for example).
2208                  * We need to call vm_pageout_throttle_up() before removing
2209                  * the page from its VM object, so that we can find out on
2210                  * which pageout queue the page is on.
2211                  */
2212                 vm_pageout_throttle_up(mem);
2213                 counter(++c_laundry_pages_freed);
2214         }
2215         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2216
2217         if (VM_PAGE_WIRED(mem)) {
2218                 if (mem->object) {
2219                         assert(mem->object->wired_page_count > 0);
2220                         mem->object->wired_page_count--;
2221                         assert(mem->object->resident_page_count >=
2222                                mem->object->wired_page_count);
2223
2224                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2225                                 OSAddAtomic(+1, &vm_page_purgeable_count);
2226                                 assert(vm_page_purgeable_wired_count > 0);
2227                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2228                         }
2229                 }
2230                 if (!mem->private && !mem->fictitious)
2231                         vm_page_wire_count--;
2232                 mem->wire_count = 0;
2233                 assert(!mem->gobbled);
2234         } else if (mem->gobbled) {
2235                 if (!mem->private && !mem->fictitious)
2236                         vm_page_wire_count--;
2237                 vm_page_gobble_count--;
2238         }
2239 }
2240
2241
2242 void
2243 vm_page_free_prepare_object(
2244         vm_page_t       mem,
2245         boolean_t       remove_from_hash)
2246 {
2247         if (mem->tabled)
2248                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2249
2250         PAGE_WAKEUP(mem);               /* clears wanted */
2251
2252         if (mem->private) {
2253                 mem->private = FALSE;
2254                 mem->fictitious = TRUE;
2255                 mem->phys_page = vm_page_fictitious_addr;
2256         }
2257         if ( !mem->fictitious) {
2258                 if (mem->zero_fill == TRUE)
2259                         VM_ZF_COUNT_DECR();
2260                 vm_page_init(mem, mem->phys_page, mem->lopage);
2261         }
2262 }
2263
2264
2265 /*
2266  *      vm_page_free:
2267  *
2268  *      Returns the given page to the free list,
2269  *      disassociating it with any VM object.
2270  *
2271  *      Object and page queues must be locked prior to entry.
2272  */
2273 void
2274 vm_page_free(
2275         vm_page_t       mem)
2276 {
2277         vm_page_free_prepare(mem);
2278
2279         if (mem->fictitious) {
2280                 vm_page_release_fictitious(mem);
2281         } else {
2282                 vm_page_release(mem);
2283         }
2284 }
2285
2286
2287 void
2288 vm_page_free_unlocked(
2289         vm_page_t       mem,
2290         boolean_t       remove_from_hash)
2291 {
2292         vm_page_lockspin_queues();
2293         vm_page_free_prepare_queues(mem);
2294         vm_page_unlock_queues();
2295
2296         vm_page_free_prepare_object(mem, remove_from_hash);
2297
2298         if (mem->fictitious) {
2299                 vm_page_release_fictitious(mem);
2300         } else {
2301                 vm_page_release(mem);
2302         }
2303 }
2304
2305 /*
2306  * Free a list of pages.  The list can be up to several hundred pages,
2307  * as blocked up by vm_pageout_scan().
2308  * The big win is not having to take the free list lock once
2309  * per page.  We sort the incoming pages into n lists, one for
2310  * each color.
2311  */
2312 void
2313 vm_page_free_list(
2314         vm_page_t       mem,
2315         boolean_t       prepare_object)
2316 {
2317         vm_page_t       nxt;
2318         int             pg_count = 0;
2319         int             color;
2320         int             inuse_list_head = -1;
2321
2322         queue_head_t    free_list[MAX_COLORS];
2323         int             inuse[MAX_COLORS];
2324
2325         for (color = 0; color < (signed) vm_colors; color++) {
2326                 queue_init(&free_list[color]);
2327         }
2328
2329         while (mem) {
2330                 assert(!mem->inactive);
2331                 assert(!mem->active);
2332                 assert(!mem->throttled);
2333                 assert(!mem->free);
2334                 assert(!mem->speculative);
2335                 assert(!VM_PAGE_WIRED(mem));
2336                 assert(mem->pageq.prev == NULL);
2337
2338                 nxt = (vm_page_t)(mem->pageq.next);
2339
2340                 if (prepare_object == TRUE)
2341                         vm_page_free_prepare_object(mem, TRUE);
2342
2343                 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2344                         assert(pmap_verify_free(mem->phys_page));
2345                 }
2346
2347                 if (!mem->fictitious) {
2348                         assert(mem->busy);
2349                         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2350                             vm_lopage_free_count < vm_lopage_free_limit &&
2351                             mem->phys_page < max_valid_low_ppnum) {
2352                                 mem->pageq.next = NULL;
2353                                 vm_page_release(mem);
2354                         } else {
2355
2356                         /*
2357                          * IMPORTANT: we can't set the page "free" here
2358                          * because that would make the page eligible for
2359                          * a physically-contiguous allocation (see
2360                          * vm_page_find_contiguous()) right away (we don't
2361                          * hold the vm_page_queue_free lock).  That would
2362                          * cause trouble because the page is not actually
2363                          * in the free queue yet...
2364                          */
2365                                 color = mem->phys_page & vm_color_mask;
2366                                 if (queue_empty(&free_list[color])) {
2367                                         inuse[color] = inuse_list_head;
2368                                         inuse_list_head = color;
2369                                 }
2370                                 queue_enter_first(&free_list[color],
2371                                                   mem,
2372                                                   vm_page_t,
2373                                                   pageq);
2374                                 pg_count++;
2375                         }
2376                 } else {
2377                         assert(mem->phys_page == vm_page_fictitious_addr ||
2378                                mem->phys_page == vm_page_guard_addr);
2379                         vm_page_release_fictitious(mem);
2380                 }
2381                 mem = nxt;
2382         }
2383         if (pg_count) {
2384                 unsigned int    avail_free_count;
2385                 unsigned int    need_wakeup = 0;
2386                 unsigned int    need_priv_wakeup = 0;
2387
2388                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2389
2390                 color = inuse_list_head;
2391
2392                 while( color != -1 ) {
2393                         vm_page_t first, last;
2394                         vm_page_t first_free;
2395
2396                         /*
2397                          * Now that we hold the vm_page_queue_free lock,
2398                          * it's safe to mark all pages in our local queue
2399                          * as "free"...
2400                          */
2401                         queue_iterate(&free_list[color],
2402                                       mem,
2403                                       vm_page_t,
2404                                       pageq) {
2405                                 assert(!mem->free);
2406                                 assert(mem->busy);
2407                                 mem->free = TRUE;
2408                         }
2409
2410                         /*
2411                          * ... and insert our local queue at the head of
2412                          * the global free queue.
2413                          */
2414                         first = (vm_page_t) queue_first(&free_list[color]);
2415                         last = (vm_page_t) queue_last(&free_list[color]);
2416                         first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2417                         if (queue_empty(&vm_page_queue_free[color])) {
2418                                 queue_last(&vm_page_queue_free[color]) =
2419                                         (queue_entry_t) last;
2420                         } else {
2421                                 queue_prev(&first_free->pageq) =
2422                                         (queue_entry_t) last;
2423                         }
2424                         queue_first(&vm_page_queue_free[color]) =
2425                                 (queue_entry_t) first;
2426                         queue_prev(&first->pageq) =
2427                                 (queue_entry_t) &vm_page_queue_free[color];
2428                         queue_next(&last->pageq) =
2429                                 (queue_entry_t) first_free;
2430
2431                         /* next color */
2432                         color = inuse[color];
2433                 }
2434
2435                 vm_page_free_count += pg_count;
2436                 avail_free_count = vm_page_free_count;
2437
2438                 if (vm_page_free_wanted_privileged > 0 &&
2439                     avail_free_count > 0) {
2440                         if (avail_free_count < vm_page_free_wanted_privileged) {
2441                                 need_priv_wakeup = avail_free_count;
2442                                 vm_page_free_wanted_privileged -=
2443                                         avail_free_count;
2444                                 avail_free_count = 0;
2445                         } else {
2446                                 need_priv_wakeup = vm_page_free_wanted_privileged;
2447                                 vm_page_free_wanted_privileged = 0;
2448                                 avail_free_count -=
2449                                         vm_page_free_wanted_privileged;
2450                         }
2451                 }
2452
2453                 if (vm_page_free_wanted > 0 &&
2454                     avail_free_count > vm_page_free_reserved) {
2455                         unsigned int  available_pages;
2456
2457                         available_pages = (avail_free_count -
2458                                            vm_page_free_reserved);
2459
2460                         if (available_pages >= vm_page_free_wanted) {
2461                                 need_wakeup = vm_page_free_wanted;
2462                                 vm_page_free_wanted = 0;
2463                         } else {
2464                                 need_wakeup = available_pages;
2465                                 vm_page_free_wanted -= available_pages;
2466                         }
2467                 }
2468                 lck_mtx_unlock(&vm_page_queue_free_lock);
2469
2470                 if (need_priv_wakeup != 0) {
2471                         /*
2472                          * There shouldn't be that many VM-privileged threads,
2473                          * so let's wake them all up, even if we don't quite
2474                          * have enough pages to satisfy them all.
2475                          */
2476                         thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2477                 }
2478                 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2479                         /*
2480                          * We don't expect to have any more waiters
2481                          * after this, so let's wake them all up at
2482                          * once.
2483                          */
2484                         thread_wakeup((event_t) &vm_page_free_count);
2485                 } else for (; need_wakeup != 0; need_wakeup--) {
2486                         /*
2487                          * Wake up one waiter per page we just released.
2488                          */
2489                         thread_wakeup_one((event_t) &vm_page_free_count);
2490                 }
2491
2492                 VM_CHECK_MEMORYSTATUS;
2493         }
2494 }
2495
2496
2497 /*
2498  *      vm_page_wire:
2499  *
2500  *      Mark this page as wired down by yet
2501  *      another map, removing it from paging queues
2502  *      as necessary.
2503  *
2504  *      The page's object and the page queues must be locked.
2505  */
2506 void
2507 vm_page_wire(
2508         register vm_page_t      mem)
2509 {
2510
2511 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2512
2513         VM_PAGE_CHECK(mem);
2514         if (mem->object) {
2515                 vm_object_lock_assert_exclusive(mem->object);
2516         } else {
2517                 /*
2518                  * In theory, the page should be in an object before it
2519                  * gets wired, since we need to hold the object lock
2520                  * to update some fields in the page structure.
2521                  * However, some code (i386 pmap, for example) might want
2522                  * to wire a page before it gets inserted into an object.
2523                  * That's somewhat OK, as long as nobody else can get to
2524                  * that page and update it at the same time.
2525                  */
2526         }
2527 #if DEBUG
2528         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2529 #endif
2530         if ( !VM_PAGE_WIRED(mem)) {
2531                 VM_PAGE_QUEUES_REMOVE(mem);
2532
2533                 if (mem->object) {
2534                         mem->object->wired_page_count++;
2535                         assert(mem->object->resident_page_count >=
2536                                mem->object->wired_page_count);
2537                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2538                                 assert(vm_page_purgeable_count > 0);
2539                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2540                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2541                         }
2542                         if (mem->object->all_reusable) {
2543                                 /*
2544                                  * Wired pages are not counted as "re-usable"
2545                                  * in "all_reusable" VM objects, so nothing
2546                                  * to do here.
2547                                  */
2548                         } else if (mem->reusable) {
2549                                 /*
2550                                  * This page is not "re-usable" when it's
2551                                  * wired, so adjust its state and the
2552                                  * accounting.
2553                                  */
2554                                 vm_object_reuse_pages(mem->object,
2555                                                       mem->offset,
2556                                                       mem->offset+PAGE_SIZE_64,
2557                                                       FALSE);
2558                         }
2559                 }
2560                 assert(!mem->reusable);
2561
2562                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2563                         vm_page_wire_count++;
2564                 if (mem->gobbled)
2565                         vm_page_gobble_count--;
2566                 mem->gobbled = FALSE;
2567                 if (mem->zero_fill == TRUE) {
2568                         mem->zero_fill = FALSE;
2569                         VM_ZF_COUNT_DECR();
2570                 }
2571
2572                 VM_CHECK_MEMORYSTATUS;
2573
2574                 /*
2575                  * ENCRYPTED SWAP:
2576                  * The page could be encrypted, but
2577                  * We don't have to decrypt it here
2578                  * because we don't guarantee that the
2579                  * data is actually valid at this point.
2580                  * The page will get decrypted in
2581                  * vm_fault_wire() if needed.
2582                  */
2583         }
2584         assert(!mem->gobbled);
2585         mem->wire_count++;
2586         VM_PAGE_CHECK(mem);
2587 }
2588
2589 /*
2590  *      vm_page_gobble:
2591  *
2592  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2593  *
2594  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2595  */
2596 void
2597 vm_page_gobble(
2598         register vm_page_t      mem)
2599 {
2600         vm_page_lockspin_queues();
2601         VM_PAGE_CHECK(mem);
2602
2603         assert(!mem->gobbled);
2604         assert( !VM_PAGE_WIRED(mem));
2605
2606         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2607                 if (!mem->private && !mem->fictitious)
2608                         vm_page_wire_count++;
2609         }
2610         vm_page_gobble_count++;
2611         mem->gobbled = TRUE;
2612         vm_page_unlock_queues();
2613 }
2614
2615 /*
2616  *      vm_page_unwire:
2617  *
2618  *      Release one wiring of this page, potentially
2619  *      enabling it to be paged again.
2620  *
2621  *      The page's object and the page queues must be locked.
2622  */
2623 void
2624 vm_page_unwire(
2625         vm_page_t       mem,
2626         boolean_t       queueit)
2627 {
2628
2629 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2630
2631         VM_PAGE_CHECK(mem);
2632         assert(VM_PAGE_WIRED(mem));
2633         assert(mem->object != VM_OBJECT_NULL);
2634 #if DEBUG
2635         vm_object_lock_assert_exclusive(mem->object);
2636         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2637 #endif
2638         if (--mem->wire_count == 0) {
2639                 assert(!mem->private && !mem->fictitious);
2640                 vm_page_wire_count--;
2641                 assert(mem->object->wired_page_count > 0);
2642                 mem->object->wired_page_count--;
2643                 assert(mem->object->resident_page_count >=
2644                        mem->object->wired_page_count);
2645                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2646                         OSAddAtomic(+1, &vm_page_purgeable_count);
2647                         assert(vm_page_purgeable_wired_count > 0);
2648                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2649                 }
2650                 assert(!mem->laundry);
2651                 assert(mem->object != kernel_object);
2652                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2653
2654                 if (queueit == TRUE) {
2655                         if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2656                                 vm_page_deactivate(mem);
2657                         } else {
2658                                 vm_page_activate(mem);
2659                         }
2660                 }
2661
2662                 VM_CHECK_MEMORYSTATUS;
2663
2664         }
2665         VM_PAGE_CHECK(mem);
2666 }
2667
2668 /*
2669  *      vm_page_deactivate:
2670  *
2671  *      Returns the given page to the inactive list,
2672  *      indicating that no physical maps have access
2673  *      to this page.  [Used by the physical mapping system.]
2674  *
2675  *      The page queues must be locked.
2676  */
2677 void
2678 vm_page_deactivate(
2679         vm_page_t       m)
2680 {
2681         vm_page_deactivate_internal(m, TRUE);
2682 }
2683
2684
2685 void
2686 vm_page_deactivate_internal(
2687         vm_page_t       m,
2688         boolean_t       clear_hw_reference)
2689 {
2690
2691         VM_PAGE_CHECK(m);
2692         assert(m->object != kernel_object);
2693         assert(m->phys_page != vm_page_guard_addr);
2694
2695 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
2696 #if DEBUG
2697         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2698 #endif
2699         /*
2700          *      This page is no longer very interesting.  If it was
2701          *      interesting (active or inactive/referenced), then we
2702          *      clear the reference bit and (re)enter it in the
2703          *      inactive queue.  Note wired pages should not have
2704          *      their reference bit cleared.
2705          */
2706         assert ( !(m->absent && !m->unusual));
2707
2708         if (m->gobbled) {               /* can this happen? */
2709                 assert( !VM_PAGE_WIRED(m));
2710
2711                 if (!m->private && !m->fictitious)
2712                         vm_page_wire_count--;
2713                 vm_page_gobble_count--;
2714                 m->gobbled = FALSE;
2715         }
2716         if (m->private || m->fictitious || (VM_PAGE_WIRED(m)))
2717                 return;
2718
2719         if (!m->absent && clear_hw_reference == TRUE)
2720                 pmap_clear_reference(m->phys_page);
2721
2722         m->reference = FALSE;
2723         m->no_cache = FALSE;
2724
2725         if (!m->inactive) {
2726                 VM_PAGE_QUEUES_REMOVE(m);
2727
2728                 assert(!m->laundry);
2729                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2730
2731                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2732                     m->dirty && m->object->internal &&
2733                     (m->object->purgable == VM_PURGABLE_DENY ||
2734                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2735                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2736                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2737                         m->throttled = TRUE;
2738                         vm_page_throttled_count++;
2739                 } else {
2740                         if (m->object->named && m->object->ref_count == 1) {
2741                                 vm_page_speculate(m, FALSE);
2742 #if DEVELOPMENT || DEBUG
2743                                 vm_page_speculative_recreated++;
2744 #endif
2745                         } else {
2746                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2747                         }
2748                 }
2749         }
2750 }
2751
2752 /*
2753  *      vm_page_activate:
2754  *
2755  *      Put the specified page on the active list (if appropriate).
2756  *
2757  *      The page queues must be locked.
2758  */
2759
2760 void
2761 vm_page_activate(
2762         register vm_page_t      m)
2763 {
2764         VM_PAGE_CHECK(m);
2765 #ifdef  FIXME_4778297
2766         assert(m->object != kernel_object);
2767 #endif
2768         assert(m->phys_page != vm_page_guard_addr);
2769 #if DEBUG
2770         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2771 #endif
2772         assert( !(m->absent && !m->unusual));
2773
2774         if (m->gobbled) {
2775                 assert( !VM_PAGE_WIRED(m));
2776                 if (!m->private && !m->fictitious)
2777                         vm_page_wire_count--;
2778                 vm_page_gobble_count--;
2779                 m->gobbled = FALSE;
2780         }
2781         if (m->private || m->fictitious)
2782                 return;
2783
2784 #if DEBUG
2785         if (m->active)
2786                 panic("vm_page_activate: already active");
2787 #endif
2788
2789         if (m->speculative) {
2790                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2791                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2792         }
2793
2794         VM_PAGE_QUEUES_REMOVE(m);
2795
2796         if ( !VM_PAGE_WIRED(m)) {
2797                 assert(!m->laundry);
2798                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2799                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2800                     m->dirty && m->object->internal &&
2801                     (m->object->purgable == VM_PURGABLE_DENY ||
2802                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2803                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2804                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2805                         m->throttled = TRUE;
2806                         vm_page_throttled_count++;
2807                 } else {
2808                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2809                         m->active = TRUE;
2810                         vm_page_active_count++;
2811                 }
2812                 m->reference = TRUE;
2813                 m->no_cache = FALSE;
2814         }
2815         VM_PAGE_CHECK(m);
2816 }
2817
2818
2819 /*
2820  *      vm_page_speculate:
2821  *
2822  *      Put the specified page on the speculative list (if appropriate).
2823  *
2824  *      The page queues must be locked.
2825  */
2826 void
2827 vm_page_speculate(
2828         vm_page_t       m,
2829         boolean_t       new)
2830 {
2831         struct vm_speculative_age_q     *aq;
2832
2833         VM_PAGE_CHECK(m);
2834         assert(m->object != kernel_object);
2835         assert(m->phys_page != vm_page_guard_addr);
2836 #if DEBUG
2837         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2838 #endif
2839         assert( !(m->absent && !m->unusual));
2840
2841         if (m->private || m->fictitious)
2842                 return;
2843
2844         VM_PAGE_QUEUES_REMOVE(m);
2845
2846         if ( !VM_PAGE_WIRED(m)) {
2847                 mach_timespec_t         ts;
2848                 clock_sec_t sec;
2849                 clock_nsec_t nsec;
2850
2851                 clock_get_system_nanotime(&sec, &nsec);
2852                 ts.tv_sec = (unsigned int) sec;
2853                 ts.tv_nsec = nsec;
2854
2855                 if (vm_page_speculative_count == 0) {
2856
2857                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2858                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2859
2860                         aq = &vm_page_queue_speculative[speculative_age_index];
2861
2862                         /*
2863                          * set the timer to begin a new group
2864                          */
2865                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2866                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2867
2868                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2869                 } else {
2870                         aq = &vm_page_queue_speculative[speculative_age_index];
2871
2872                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2873
2874                                 speculative_age_index++;
2875
2876                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2877                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2878                                 if (speculative_age_index == speculative_steal_index) {
2879                                         speculative_steal_index = speculative_age_index + 1;
2880
2881                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2882                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2883                                 }
2884                                 aq = &vm_page_queue_speculative[speculative_age_index];
2885
2886                                 if (!queue_empty(&aq->age_q))
2887                                         vm_page_speculate_ageit(aq);
2888
2889                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2890                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2891
2892                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2893                         }
2894                 }
2895                 enqueue_tail(&aq->age_q, &m->pageq);
2896                 m->speculative = TRUE;
2897                 vm_page_speculative_count++;
2898
2899                 if (new == TRUE) {
2900                         vm_object_lock_assert_exclusive(m->object);
2901
2902                         m->object->pages_created++;
2903 #if DEVELOPMENT || DEBUG
2904                         vm_page_speculative_created++;
2905 #endif
2906                 }
2907         }
2908         VM_PAGE_CHECK(m);
2909 }
2910
2911
2912 /*
2913  * move pages from the specified aging bin to
2914  * the speculative bin that pageout_scan claims from
2915  *
2916  *      The page queues must be locked.
2917  */
2918 void
2919 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2920 {
2921         struct vm_speculative_age_q     *sq;
2922         vm_page_t       t;
2923
2924         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2925
2926         if (queue_empty(&sq->age_q)) {
2927                 sq->age_q.next = aq->age_q.next;
2928                 sq->age_q.prev = aq->age_q.prev;
2929
2930                 t = (vm_page_t)sq->age_q.next;
2931                 t->pageq.prev = &sq->age_q;
2932
2933                 t = (vm_page_t)sq->age_q.prev;
2934                 t->pageq.next = &sq->age_q;
2935         } else {
2936                 t = (vm_page_t)sq->age_q.prev;
2937                 t->pageq.next = aq->age_q.next;
2938
2939                 t = (vm_page_t)aq->age_q.next;
2940                 t->pageq.prev = sq->age_q.prev;
2941
2942                 t = (vm_page_t)aq->age_q.prev;
2943                 t->pageq.next = &sq->age_q;
2944
2945                 sq->age_q.prev = aq->age_q.prev;
2946         }
2947         queue_init(&aq->age_q);
2948 }
2949
2950
2951 void
2952 vm_page_lru(
2953         vm_page_t       m)
2954 {
2955         VM_PAGE_CHECK(m);
2956         assert(m->object != kernel_object);
2957         assert(m->phys_page != vm_page_guard_addr);
2958
2959 #if DEBUG
2960         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2961 #endif
2962         if (m->active || m->reference)
2963                 return;
2964
2965         if (m->private || (VM_PAGE_WIRED(m)))
2966                 return;
2967
2968         m->no_cache = FALSE;
2969
2970         VM_PAGE_QUEUES_REMOVE(m);
2971
2972         assert(!m->laundry);
2973         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2974
2975         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2976 }
2977
2978
2979 void
2980 vm_page_reactivate_all_throttled(void)
2981 {
2982         vm_page_t       first_throttled, last_throttled;
2983         vm_page_t       first_active;
2984         vm_page_t       m;
2985         int             extra_active_count;
2986
2987         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
2988                 return;
2989
2990         extra_active_count = 0;
2991         vm_page_lock_queues();
2992         if (! queue_empty(&vm_page_queue_throttled)) {
2993                 /*
2994                  * Switch "throttled" pages to "active".
2995                  */
2996                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
2997                         VM_PAGE_CHECK(m);
2998                         assert(m->throttled);
2999                         assert(!m->active);
3000                         assert(!m->inactive);
3001                         assert(!m->speculative);
3002                         assert(!VM_PAGE_WIRED(m));
3003
3004                         extra_active_count++;
3005
3006                         m->throttled = FALSE;
3007                         m->active = TRUE;
3008                         VM_PAGE_CHECK(m);
3009                 }
3010
3011                 /*
3012                  * Transfer the entire throttled queue to a regular LRU page queues.
3013                  * We insert it at the head of the active queue, so that these pages
3014                  * get re-evaluated by the LRU algorithm first, since they've been
3015                  * completely out of it until now.
3016                  */
3017                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3018                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3019                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3020                 if (queue_empty(&vm_page_queue_active)) {
3021                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3022                 } else {
3023                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3024                 }
3025                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3026                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3027                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3028
3029 #if DEBUG
3030                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3031 #endif
3032                 queue_init(&vm_page_queue_throttled);
3033                 /*
3034                  * Adjust the global page counts.
3035                  */
3036                 vm_page_active_count += extra_active_count;
3037                 vm_page_throttled_count = 0;
3038         }
3039         assert(vm_page_throttled_count == 0);
3040         assert(queue_empty(&vm_page_queue_throttled));
3041         vm_page_unlock_queues();
3042 }
3043
3044
3045 /*
3046  * move pages from the indicated local queue to the global active queue
3047  * its ok to fail if we're below the hard limit and force == FALSE
3048  * the nolocks == TRUE case is to allow this function to be run on
3049  * the hibernate path
3050  */
3051
3052 void
3053 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3054 {
3055         struct vpl      *lq;
3056         vm_page_t       first_local, last_local;
3057         vm_page_t       first_active;
3058         vm_page_t       m;
3059         uint32_t        count = 0;
3060
3061         if (vm_page_local_q == NULL)
3062                 return;
3063
3064         lq = &vm_page_local_q[lid].vpl_un.vpl;
3065
3066         if (nolocks == FALSE) {
3067                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3068                         if ( !vm_page_trylockspin_queues())
3069                                 return;
3070                 } else
3071                         vm_page_lockspin_queues();
3072
3073                 VPL_LOCK(&lq->vpl_lock);
3074         }
3075         if (lq->vpl_count) {
3076                 /*
3077                  * Switch "local" pages to "active".
3078                  */
3079                 assert(!queue_empty(&lq->vpl_queue));
3080
3081                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3082                         VM_PAGE_CHECK(m);
3083                         assert(m->local);
3084                         assert(!m->active);
3085                         assert(!m->inactive);
3086                         assert(!m->speculative);
3087                         assert(!VM_PAGE_WIRED(m));
3088                         assert(!m->throttled);
3089                         assert(!m->fictitious);
3090
3091                         if (m->local_id != lid)
3092                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3093
3094                         m->local_id = 0;
3095                         m->local = FALSE;
3096                         m->active = TRUE;
3097                         VM_PAGE_CHECK(m);
3098
3099                         count++;
3100                 }
3101                 if (count != lq->vpl_count)
3102                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3103
3104                 /*
3105                  * Transfer the entire local queue to a regular LRU page queues.
3106                  */
3107                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3108                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3109                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3110
3111                 if (queue_empty(&vm_page_queue_active)) {
3112                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3113                 } else {
3114                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3115                 }
3116                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3117                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3118                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3119
3120                 queue_init(&lq->vpl_queue);
3121                 /*
3122                  * Adjust the global page counts.
3123                  */
3124                 vm_page_active_count += lq->vpl_count;
3125                 lq->vpl_count = 0;
3126         }
3127         assert(queue_empty(&lq->vpl_queue));
3128
3129         if (nolocks == FALSE) {
3130                 VPL_UNLOCK(&lq->vpl_lock);
3131                 vm_page_unlock_queues();
3132         }
3133 }
3134
3135 /*
3136  *      vm_page_part_zero_fill:
3137  *
3138  *      Zero-fill a part of the page.
3139  */
3140 void
3141 vm_page_part_zero_fill(
3142         vm_page_t       m,
3143         vm_offset_t     m_pa,
3144         vm_size_t       len)
3145 {
3146         vm_page_t       tmp;
3147
3148         VM_PAGE_CHECK(m);
3149 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3150         pmap_zero_part_page(m->phys_page, m_pa, len);
3151 #else
3152         while (1) {
3153                 tmp = vm_page_grab();
3154                 if (tmp == VM_PAGE_NULL) {
3155                         vm_page_wait(THREAD_UNINT);
3156                         continue;
3157                 }
3158                 break;
3159         }
3160         vm_page_zero_fill(tmp);
3161         if(m_pa != 0) {
3162                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3163         }
3164         if((m_pa + len) <  PAGE_SIZE) {
3165                 vm_page_part_copy(m, m_pa + len, tmp,
3166                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3167         }
3168         vm_page_copy(tmp,m);
3169         VM_PAGE_FREE(tmp);
3170 #endif
3171
3172 }
3173
3174 /*
3175  *      vm_page_zero_fill:
3176  *
3177  *      Zero-fill the specified page.
3178  */
3179 void
3180 vm_page_zero_fill(
3181         vm_page_t       m)
3182 {
3183         XPR(XPR_VM_PAGE,
3184                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3185                 m->object, m->offset, m, 0,0);
3186
3187         VM_PAGE_CHECK(m);
3188
3189 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3190         pmap_zero_page(m->phys_page);
3191 }
3192
3193 /*
3194  *      vm_page_part_copy:
3195  *
3196  *      copy part of one page to another
3197  */
3198
3199 void
3200 vm_page_part_copy(
3201         vm_page_t       src_m,
3202         vm_offset_t     src_pa,
3203         vm_page_t       dst_m,
3204         vm_offset_t     dst_pa,
3205         vm_size_t       len)
3206 {
3207         VM_PAGE_CHECK(src_m);
3208         VM_PAGE_CHECK(dst_m);
3209
3210         pmap_copy_part_page(src_m->phys_page, src_pa,
3211                         dst_m->phys_page, dst_pa, len);
3212 }
3213
3214 /*
3215  *      vm_page_copy:
3216  *
3217  *      Copy one page to another
3218  *
3219  * ENCRYPTED SWAP:
3220  * The source page should not be encrypted.  The caller should
3221  * make sure the page is decrypted first, if necessary.
3222  */
3223
3224 int vm_page_copy_cs_validations = 0;
3225 int vm_page_copy_cs_tainted = 0;
3226
3227 void
3228 vm_page_copy(
3229         vm_page_t       src_m,
3230         vm_page_t       dest_m)
3231 {
3232         XPR(XPR_VM_PAGE,
3233         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3234         src_m->object, src_m->offset,
3235         dest_m->object, dest_m->offset,
3236         0);
3237
3238         VM_PAGE_CHECK(src_m);
3239         VM_PAGE_CHECK(dest_m);
3240
3241         /*
3242          * ENCRYPTED SWAP:
3243          * The source page should not be encrypted at this point.
3244          * The destination page will therefore not contain encrypted
3245          * data after the copy.
3246          */
3247         if (src_m->encrypted) {
3248                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3249         }
3250         dest_m->encrypted = FALSE;
3251
3252         if (src_m->object != VM_OBJECT_NULL &&
3253             src_m->object->code_signed) {
3254                 /*
3255                  * We're copying a page from a code-signed object.
3256                  * Whoever ends up mapping the copy page might care about
3257                  * the original page's integrity, so let's validate the
3258                  * source page now.
3259                  */
3260                 vm_page_copy_cs_validations++;
3261                 vm_page_validate_cs(src_m);
3262         }
3263
3264         if (vm_page_is_slideable(src_m)) {
3265                 boolean_t was_busy = src_m->busy;
3266                 src_m->busy = TRUE;
3267                 (void) vm_page_slide(src_m, 0);
3268                 assert(src_m->busy);
3269                 if(!was_busy) {
3270                         PAGE_WAKEUP_DONE(src_m);
3271                 }
3272         }
3273
3274         /*
3275          * Propagate the cs_tainted bit to the copy page. Do not propagate
3276          * the cs_validated bit.
3277          */
3278         dest_m->cs_tainted = src_m->cs_tainted;
3279         if (dest_m->cs_tainted) {
3280                 vm_page_copy_cs_tainted++;
3281         }
3282         dest_m->slid = src_m->slid;
3283         dest_m->error = src_m->error; /* sliding src_m might have failed... */
3284         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3285 }
3286
3287 #if MACH_ASSERT
3288 static void
3289 _vm_page_print(
3290         vm_page_t       p)
3291 {
3292         printf("vm_page %p: \n", p);
3293         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3294         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3295         printf("  next=%p\n", p->next);
3296         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3297         printf("  wire_count=%u\n", p->wire_count);
3298
3299         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3300                (p->local ? "" : "!"),
3301                (p->inactive ? "" : "!"),
3302                (p->active ? "" : "!"),
3303                (p->pageout_queue ? "" : "!"),
3304                (p->speculative ? "" : "!"),
3305                (p->laundry ? "" : "!"));
3306         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3307                (p->free ? "" : "!"),
3308                (p->reference ? "" : "!"),
3309                (p->gobbled ? "" : "!"),
3310                (p->private ? "" : "!"),
3311                (p->throttled ? "" : "!"));
3312         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3313                 (p->busy ? "" : "!"),
3314                 (p->wanted ? "" : "!"),
3315                 (p->tabled ? "" : "!"),
3316                 (p->fictitious ? "" : "!"),
3317                 (p->pmapped ? "" : "!"),
3318                 (p->wpmapped ? "" : "!"));
3319         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3320                (p->pageout ? "" : "!"),
3321                (p->absent ? "" : "!"),
3322                (p->error ? "" : "!"),
3323                (p->dirty ? "" : "!"),
3324                (p->cleaning ? "" : "!"),
3325                (p->precious ? "" : "!"),
3326                (p->clustered ? "" : "!"));
3327         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3328                (p->overwriting ? "" : "!"),
3329                (p->restart ? "" : "!"),
3330                (p->unusual ? "" : "!"),
3331                (p->encrypted ? "" : "!"),
3332                (p->encrypted_cleaning ? "" : "!"));
3333         printf("  %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3334                (p->list_req_pending ? "" : "!"),
3335                (p->dump_cleaning ? "" : "!"),
3336                (p->cs_validated ? "" : "!"),
3337                (p->cs_tainted ? "" : "!"),
3338                (p->no_cache ? "" : "!"));
3339         printf("  %szero_fill\n",
3340                (p->zero_fill ? "" : "!"));
3341
3342         printf("phys_page=0x%x\n", p->phys_page);
3343 }
3344
3345 /*
3346  *      Check that the list of pages is ordered by
3347  *      ascending physical address and has no holes.
3348  */
3349 static int
3350 vm_page_verify_contiguous(
3351         vm_page_t       pages,
3352         unsigned int    npages)
3353 {
3354         register vm_page_t      m;
3355         unsigned int            page_count;
3356         vm_offset_t             prev_addr;
3357
3358         prev_addr = pages->phys_page;
3359         page_count = 1;
3360         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3361                 if (m->phys_page != prev_addr + 1) {
3362                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3363                                m, (long)prev_addr, m->phys_page);
3364                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3365                         panic("vm_page_verify_contiguous:  not contiguous!");
3366                 }
3367                 prev_addr = m->phys_page;
3368                 ++page_count;
3369         }
3370         if (page_count != npages) {
3371                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3372                        pages, page_count, npages);
3373                 panic("vm_page_verify_contiguous:  count error");
3374         }
3375         return 1;
3376 }
3377
3378
3379 /*
3380  *      Check the free lists for proper length etc.
3381  */
3382 static unsigned int
3383 vm_page_verify_free_list(
3384         queue_head_t    *vm_page_queue,
3385         unsigned int    color,
3386         vm_page_t       look_for_page,
3387         boolean_t       expect_page)
3388 {
3389         unsigned int    npages;
3390         vm_page_t       m;
3391         vm_page_t       prev_m;
3392         boolean_t       found_page;
3393
3394         found_page = FALSE;
3395         npages = 0;
3396         prev_m = (vm_page_t) vm_page_queue;
3397         queue_iterate(vm_page_queue,
3398                       m,
3399                       vm_page_t,
3400                       pageq) {
3401
3402                 if (m == look_for_page) {
3403                         found_page = TRUE;
3404                 }
3405                 if ((vm_page_t) m->pageq.prev != prev_m)
3406                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3407                               color, npages, m, m->pageq.prev, prev_m);
3408                 if ( ! m->busy )
3409                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3410                               color, npages, m);
3411                 if (color != (unsigned int) -1) {
3412                         if ((m->phys_page & vm_color_mask) != color)
3413                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3414                                       color, npages, m, m->phys_page & vm_color_mask, color);
3415                         if ( ! m->free )
3416                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3417                                       color, npages, m);
3418                 }
3419                 ++npages;
3420                 prev_m = m;
3421         }
3422         if (look_for_page != VM_PAGE_NULL) {
3423                 unsigned int other_color;
3424
3425                 if (expect_page && !found_page) {
3426                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3427                                color, npages, look_for_page, look_for_page->phys_page);
3428                         _vm_page_print(look_for_page);
3429                         for (other_color = 0;
3430                              other_color < vm_colors;
3431                              other_color++) {
3432                                 if (other_color == color)
3433                                         continue;
3434                                 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3435                                                          other_color, look_for_page, FALSE);
3436                         }
3437                         if (color == (unsigned int) -1) {
3438                                 vm_page_verify_free_list(&vm_lopage_queue_free,
3439                                                          (unsigned int) -1, look_for_page, FALSE);
3440                         }
3441                         panic("vm_page_verify_free_list(color=%u)\n", color);
3442                 }
3443                 if (!expect_page && found_page) {
3444                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3445                                color, npages, look_for_page, look_for_page->phys_page);
3446                 }
3447         }
3448         return npages;
3449 }
3450
3451 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3452 static void
3453 vm_page_verify_free_lists( void )
3454 {
3455         unsigned int    color, npages, nlopages;
3456
3457         if (! vm_page_verify_free_lists_enabled)
3458                 return;
3459
3460         npages = 0;
3461
3462         lck_mtx_lock(&vm_page_queue_free_lock);
3463
3464         for( color = 0; color < vm_colors; color++ ) {
3465                 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3466                                                    color, VM_PAGE_NULL, FALSE);
3467         }
3468         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3469                                             (unsigned int) -1,
3470                                             VM_PAGE_NULL, FALSE);
3471         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3472                 panic("vm_page_verify_free_lists:  "
3473                       "npages %u free_count %d nlopages %u lo_free_count %u",
3474                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3475
3476         lck_mtx_unlock(&vm_page_queue_free_lock);
3477 }
3478
3479 void
3480 vm_page_queues_assert(
3481         vm_page_t       mem,
3482         int             val)
3483 {
3484         if (mem->free + mem->active + mem->inactive + mem->speculative +
3485             mem->throttled + mem->pageout_queue > (val)) {
3486                 _vm_page_print(mem);
3487                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3488         }
3489         if (VM_PAGE_WIRED(mem)) {
3490                 assert(!mem->active);
3491                 assert(!mem->inactive);
3492                 assert(!mem->speculative);
3493                 assert(!mem->throttled);
3494         }
3495 }
3496 #endif  /* MACH_ASSERT */
3497
3498
3499 /*
3500  *      CONTIGUOUS PAGE ALLOCATION
3501  *
3502  *      Find a region large enough to contain at least n pages
3503  *      of contiguous physical memory.
3504  *
3505  *      This is done by traversing the vm_page_t array in a linear fashion
3506  *      we assume that the vm_page_t array has the avaiable physical pages in an
3507  *      ordered, ascending list... this is currently true of all our implementations
3508  *      and must remain so... there can be 'holes' in the array...  we also can
3509  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3510  *      which use to happen via 'vm_page_convert'... that function was no longer
3511  *      being called and was removed...
3512  *
3513  *      The basic flow consists of stabilizing some of the interesting state of
3514  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3515  *      sweep at the beginning of the array looking for pages that meet our criterea
3516  *      for a 'stealable' page... currently we are pretty conservative... if the page
3517  *      meets this criterea and is physically contiguous to the previous page in the 'run'
3518  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
3519  *      and start to develop a new run... if at this point we've already considered
3520  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3521  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
3522  *      to other threads trying to acquire free pages (or move pages from q to q),
3523  *      and then continue from the spot we left off... we only make 1 pass through the
3524  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
3525  *      which steals the pages from the queues they're currently on... pages on the free
3526  *      queue can be stolen directly... pages that are on any of the other queues
3527  *      must be removed from the object they are tabled on... this requires taking the
3528  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3529  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
3530  *      dump the pages we've currently stolen back to the free list, and pick up our
3531  *      scan from the point where we aborted the 'current' run.
3532  *
3533  *
3534  *      Requirements:
3535  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
3536  *
3537  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3538  *
3539  * Algorithm:
3540  */
3541
3542 #define MAX_CONSIDERED_BEFORE_YIELD     1000
3543
3544
3545 #define RESET_STATE_OF_RUN()    \
3546         MACRO_BEGIN             \
3547         prevcontaddr = -2;      \
3548         start_pnum = -1;        \
3549         free_considered = 0;    \
3550         substitute_needed = 0;  \
3551         npages = 0;             \
3552         MACRO_END
3553
3554 /*
3555  * Can we steal in-use (i.e. not free) pages when searching for
3556  * physically-contiguous pages ?
3557  */
3558 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3559
3560 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
3561 #if DEBUG
3562 int vm_page_find_contig_debug = 0;
3563 #endif
3564
3565 static vm_page_t
3566 vm_page_find_contiguous(
3567         unsigned int    contig_pages,
3568         ppnum_t         max_pnum,
3569         ppnum_t     pnum_mask,
3570         boolean_t       wire,
3571         int             flags)
3572 {
3573         vm_page_t       m = NULL;
3574         ppnum_t         prevcontaddr;
3575         ppnum_t         start_pnum;
3576         unsigned int    npages, considered, scanned;
3577         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
3578         unsigned int    idx_last_contig_page_found = 0;
3579         int             free_considered, free_available;
3580         int             substitute_needed;
3581         boolean_t       wrapped;
3582 #if DEBUG
3583         clock_sec_t     tv_start_sec, tv_end_sec;
3584         clock_usec_t    tv_start_usec, tv_end_usec;
3585 #endif
3586 #if MACH_ASSERT
3587         int             yielded = 0;
3588         int             dumped_run = 0;
3589         int             stolen_pages = 0;
3590 #endif
3591
3592         if (contig_pages == 0)
3593                 return VM_PAGE_NULL;
3594
3595 #if MACH_ASSERT
3596         vm_page_verify_free_lists();
3597 #endif
3598 #if DEBUG
3599         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3600 #endif
3601         vm_page_lock_queues();
3602         lck_mtx_lock(&vm_page_queue_free_lock);
3603
3604         RESET_STATE_OF_RUN();
3605
3606         scanned = 0;
3607         considered = 0;
3608         free_available = vm_page_free_count - vm_page_free_reserved;
3609
3610         wrapped = FALSE;
3611
3612         if(flags & KMA_LOMEM)
3613                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3614         else
3615                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
3616
3617         orig_last_idx = idx_last_contig_page_found;
3618         last_idx = orig_last_idx;
3619
3620         for (page_idx = last_idx, start_idx = last_idx;
3621              npages < contig_pages && page_idx < vm_pages_count;
3622              page_idx++) {
3623 retry:
3624                 if (wrapped &&
3625                     npages == 0 &&
3626                     page_idx >= orig_last_idx) {
3627                         /*
3628                          * We're back where we started and we haven't
3629                          * found any suitable contiguous range.  Let's
3630                          * give up.
3631                          */
3632                         break;
3633                 }
3634                 scanned++;
3635                 m = &vm_pages[page_idx];
3636
3637                 assert(!m->fictitious);
3638                 assert(!m->private);
3639
3640                 if (max_pnum && m->phys_page > max_pnum) {
3641                         /* no more low pages... */
3642                         break;
3643                 }
3644                 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3645                         /*
3646                          * not aligned
3647                          */
3648                         RESET_STATE_OF_RUN();
3649
3650                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3651                            m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3652                            m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3653                            m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3654                            m->pageout) {
3655                         /*
3656                          * page is in a transient state
3657                          * or a state we don't want to deal
3658                          * with, so don't consider it which
3659                          * means starting a new run
3660                          */
3661                         RESET_STATE_OF_RUN();
3662
3663                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3664                         /*
3665                          * page needs to be on one of our queues
3666                          * in order for it to be stable behind the
3667                          * locks we hold at this point...
3668                          * if not, don't consider it which
3669                          * means starting a new run
3670                          */
3671                         RESET_STATE_OF_RUN();
3672
3673                 } else if (!m->free && (!m->tabled || m->busy)) {
3674                         /*
3675                          * pages on the free list are always 'busy'
3676                          * so we couldn't test for 'busy' in the check
3677                          * for the transient states... pages that are
3678                          * 'free' are never 'tabled', so we also couldn't
3679                          * test for 'tabled'.  So we check here to make
3680                          * sure that a non-free page is not busy and is
3681                          * tabled on an object...
3682                          * if not, don't consider it which
3683                          * means starting a new run
3684                          */
3685                         RESET_STATE_OF_RUN();
3686
3687                 } else {
3688                         if (m->phys_page != prevcontaddr + 1) {
3689                                 if ((m->phys_page & pnum_mask) != 0) {
3690                                         RESET_STATE_OF_RUN();
3691                                         goto did_consider;
3692                                 } else {
3693                                         npages = 1;
3694                                         start_idx = page_idx;
3695                                         start_pnum = m->phys_page;
3696                                 }
3697                         } else {
3698                                 npages++;
3699                         }
3700                         prevcontaddr = m->phys_page;
3701
3702                         VM_PAGE_CHECK(m);
3703                         if (m->free) {
3704                                 free_considered++;
3705                         } else {
3706                                 /*
3707                                  * This page is not free.
3708                                  * If we can't steal used pages,
3709                                  * we have to give up this run
3710                                  * and keep looking.
3711                                  * Otherwise, we might need to
3712                                  * move the contents of this page
3713                                  * into a substitute page.
3714                                  */
3715 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3716                                 if (m->pmapped || m->dirty) {
3717                                         substitute_needed++;
3718                                 }
3719 #else
3720                                 RESET_STATE_OF_RUN();
3721 #endif
3722                         }
3723
3724                         if ((free_considered + substitute_needed) > free_available) {
3725                                 /*
3726                                  * if we let this run continue
3727                                  * we will end up dropping the vm_page_free_count
3728                                  * below the reserve limit... we need to abort
3729                                  * this run, but we can at least re-consider this
3730                                  * page... thus the jump back to 'retry'
3731                                  */
3732                                 RESET_STATE_OF_RUN();
3733
3734                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3735                                         considered++;
3736                                         goto retry;
3737                                 }
3738                                 /*
3739                                  * free_available == 0
3740                                  * so can't consider any free pages... if
3741                                  * we went to retry in this case, we'd
3742                                  * get stuck looking at the same page
3743                                  * w/o making any forward progress
3744                                  * we also want to take this path if we've already
3745                                  * reached our limit that controls the lock latency
3746                                  */
3747                         }
3748                 }
3749 did_consider:
3750                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3751
3752                         lck_mtx_unlock(&vm_page_queue_free_lock);
3753                         vm_page_unlock_queues();
3754
3755                         mutex_pause(0);
3756
3757                         vm_page_lock_queues();
3758                         lck_mtx_lock(&vm_page_queue_free_lock);
3759
3760                         RESET_STATE_OF_RUN();
3761                         /*
3762                          * reset our free page limit since we
3763                          * dropped the lock protecting the vm_page_free_queue
3764                          */
3765                         free_available = vm_page_free_count - vm_page_free_reserved;
3766                         considered = 0;
3767 #if MACH_ASSERT
3768                         yielded++;
3769 #endif
3770                         goto retry;
3771                 }
3772                 considered++;
3773         }
3774         m = VM_PAGE_NULL;
3775
3776         if (npages != contig_pages) {
3777                 if (!wrapped) {
3778                         /*
3779                          * We didn't find a contiguous range but we didn't
3780                          * start from the very first page.
3781                          * Start again from the very first page.
3782                          */
3783                         RESET_STATE_OF_RUN();
3784                         if( flags & KMA_LOMEM)
3785                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
3786                         else
3787                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3788                         last_idx = 0;
3789                         page_idx = last_idx;
3790                         wrapped = TRUE;
3791                         goto retry;
3792                 }
3793                 lck_mtx_unlock(&vm_page_queue_free_lock);
3794         } else {
3795                 vm_page_t       m1;
3796                 vm_page_t       m2;
3797                 unsigned int    cur_idx;
3798                 unsigned int    tmp_start_idx;
3799                 vm_object_t     locked_object = VM_OBJECT_NULL;
3800                 boolean_t       abort_run = FALSE;
3801
3802                 assert(page_idx - start_idx == contig_pages);
3803
3804                 tmp_start_idx = start_idx;
3805
3806                 /*
3807                  * first pass through to pull the free pages
3808                  * off of the free queue so that in case we
3809                  * need substitute pages, we won't grab any
3810                  * of the free pages in the run... we'll clear
3811                  * the 'free' bit in the 2nd pass, and even in
3812                  * an abort_run case, we'll collect all of the
3813                  * free pages in this run and return them to the free list
3814                  */
3815                 while (start_idx < page_idx) {
3816
3817                         m1 = &vm_pages[start_idx++];
3818
3819 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3820                         assert(m1->free);
3821 #endif
3822
3823                         if (m1->free) {
3824                                 unsigned int color;
3825
3826                                 color = m1->phys_page & vm_color_mask;
3827 #if MACH_ASSERT
3828                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
3829 #endif
3830                                 queue_remove(&vm_page_queue_free[color],
3831                                              m1,
3832                                              vm_page_t,
3833                                              pageq);
3834                                 m1->pageq.next = NULL;
3835                                 m1->pageq.prev = NULL;
3836 #if MACH_ASSERT
3837                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
3838 #endif
3839                                 /*
3840                                  * Clear the "free" bit so that this page
3841                                  * does not get considered for another
3842                                  * concurrent physically-contiguous allocation.
3843                                  */
3844                                 m1->free = FALSE;
3845                                 assert(m1->busy);
3846
3847                                 vm_page_free_count--;
3848                         }
3849                 }
3850                 /*
3851                  * adjust global freelist counts
3852                  */
3853                 if (vm_page_free_count < vm_page_free_count_minimum)
3854                         vm_page_free_count_minimum = vm_page_free_count;
3855
3856                 if( flags & KMA_LOMEM)
3857                         vm_page_lomem_find_contiguous_last_idx = page_idx;
3858                 else
3859                         vm_page_find_contiguous_last_idx = page_idx;
3860
3861                 /*
3862                  * we can drop the free queue lock at this point since
3863                  * we've pulled any 'free' candidates off of the list
3864                  * we need it dropped so that we can do a vm_page_grab
3865                  * when substituing for pmapped/dirty pages
3866                  */
3867                 lck_mtx_unlock(&vm_page_queue_free_lock);
3868
3869                 start_idx = tmp_start_idx;
3870                 cur_idx = page_idx - 1;
3871
3872                 while (start_idx++ < page_idx) {
3873                         /*
3874                          * must go through the list from back to front
3875                          * so that the page list is created in the
3876                          * correct order - low -> high phys addresses
3877                          */
3878                         m1 = &vm_pages[cur_idx--];
3879
3880                         assert(!m1->free);
3881                         if (m1->object == VM_OBJECT_NULL) {
3882                                 /*
3883                                  * page has already been removed from
3884                                  * the free list in the 1st pass
3885                                  */
3886                                 assert(m1->offset == (vm_object_offset_t) -1);
3887                                 assert(m1->busy);
3888                                 assert(!m1->wanted);
3889                                 assert(!m1->laundry);
3890                         } else {
3891                                 vm_object_t object;
3892
3893                                 if (abort_run == TRUE)
3894                                         continue;
3895
3896                                 object = m1->object;
3897
3898                                 if (object != locked_object) {
3899                                         if (locked_object) {
3900                                                 vm_object_unlock(locked_object);
3901                                                 locked_object = VM_OBJECT_NULL;
3902                                         }
3903                                         if (vm_object_lock_try(object))
3904                                                 locked_object = object;
3905                                 }
3906                                 if (locked_object == VM_OBJECT_NULL ||
3907                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
3908                                      m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3909                                      m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3910                                      m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3911
3912                                         if (locked_object) {
3913                                                 vm_object_unlock(locked_object);
3914                                                 locked_object = VM_OBJECT_NULL;
3915                                         }
3916                                         tmp_start_idx = cur_idx;
3917                                         abort_run = TRUE;
3918                                         continue;
3919                                 }
3920                                 if (m1->pmapped || m1->dirty) {
3921                                         int refmod;
3922                                         vm_object_offset_t offset;
3923
3924                                         m2 = vm_page_grab();
3925
3926                                         if (m2 == VM_PAGE_NULL) {
3927                                                 if (locked_object) {
3928                                                         vm_object_unlock(locked_object);
3929                                                         locked_object = VM_OBJECT_NULL;
3930                                                 }
3931                                                 tmp_start_idx = cur_idx;
3932                                                 abort_run = TRUE;
3933                                                 continue;
3934                                         }
3935                                         if (m1->pmapped)
3936                                                 refmod = pmap_disconnect(m1->phys_page);
3937                                         else
3938                                                 refmod = 0;
3939                                         vm_page_copy(m1, m2);
3940
3941                                         m2->reference = m1->reference;
3942                                         m2->dirty     = m1->dirty;
3943
3944                                         if (refmod & VM_MEM_REFERENCED)
3945                                                 m2->reference = TRUE;
3946                                         if (refmod & VM_MEM_MODIFIED)
3947                                                 m2->dirty = TRUE;
3948                                         offset = m1->offset;
3949
3950                                         /*
3951                                          * completely cleans up the state
3952                                          * of the page so that it is ready
3953                                          * to be put onto the free list, or
3954                                          * for this purpose it looks like it
3955                                          * just came off of the free list
3956                                          */
3957                                         vm_page_free_prepare(m1);
3958
3959                                         /*
3960                                          * make sure we clear the ref/mod state
3961                                          * from the pmap layer... else we risk
3962                                          * inheriting state from the last time
3963                                          * this page was used...
3964                                          */
3965                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3966                                         /*
3967                                          * now put the substitute page on the object
3968                                          */
3969                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
3970
3971                                         if (m2->reference)
3972                                                 vm_page_activate(m2);
3973                                         else
3974                                                 vm_page_deactivate(m2);
3975
3976                                         PAGE_WAKEUP_DONE(m2);
3977
3978                                 } else {
3979                                         /*
3980                                          * completely cleans up the state
3981                                          * of the page so that it is ready
3982                                          * to be put onto the free list, or
3983                                          * for this purpose it looks like it
3984                                          * just came off of the free list
3985                                          */
3986                                         vm_page_free_prepare(m1);
3987                                 }
3988 #if MACH_ASSERT
3989                                 stolen_pages++;
3990 #endif
3991                         }
3992                         m1->pageq.next = (queue_entry_t) m;
3993                         m1->pageq.prev = NULL;
3994                         m = m1;
3995                 }
3996                 if (locked_object) {
3997                         vm_object_unlock(locked_object);
3998                         locked_object = VM_OBJECT_NULL;
3999                 }
4000
4001                 if (abort_run == TRUE) {
4002                         if (m != VM_PAGE_NULL) {
4003                                 vm_page_free_list(m, FALSE);
4004                         }
4005 #if MACH_ASSERT
4006                         dumped_run++;
4007 #endif
4008                         /*
4009                          * want the index of the last
4010                          * page in this run that was
4011                          * successfully 'stolen', so back
4012                          * it up 1 for the auto-decrement on use
4013                          * and 1 more to bump back over this page
4014                          */
4015                         page_idx = tmp_start_idx + 2;
4016                         if (page_idx >= vm_pages_count) {
4017                                 if (wrapped)
4018                                         goto done_scanning;
4019                                 page_idx = last_idx = 0;
4020                                 wrapped = TRUE;
4021                         }
4022                         abort_run = FALSE;
4023
4024                         /*
4025                          * We didn't find a contiguous range but we didn't
4026                          * start from the very first page.
4027                          * Start again from the very first page.
4028                          */
4029                         RESET_STATE_OF_RUN();
4030
4031                         if( flags & KMA_LOMEM)
4032                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4033                         else
4034                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4035
4036                         last_idx = page_idx;
4037
4038                         lck_mtx_lock(&vm_page_queue_free_lock);
4039                         /*
4040                         * reset our free page limit since we
4041                         * dropped the lock protecting the vm_page_free_queue
4042                         */
4043                         free_available = vm_page_free_count - vm_page_free_reserved;
4044                         goto retry;
4045                 }
4046
4047                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4048
4049                         if (wire == TRUE)
4050                                 m1->wire_count++;
4051                         else
4052                                 m1->gobbled = TRUE;
4053                 }
4054                 if (wire == FALSE)
4055                         vm_page_gobble_count += npages;
4056
4057                 /*
4058                  * gobbled pages are also counted as wired pages
4059                  */
4060                 vm_page_wire_count += npages;
4061
4062                 assert(vm_page_verify_contiguous(m, npages));
4063         }
4064 done_scanning:
4065         vm_page_unlock_queues();
4066
4067 #if DEBUG
4068         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4069
4070         tv_end_sec -= tv_start_sec;
4071         if (tv_end_usec < tv_start_usec) {
4072                 tv_end_sec--;
4073                 tv_end_usec += 1000000;
4074         }
4075         tv_end_usec -= tv_start_usec;
4076         if (tv_end_usec >= 1000000) {
4077                 tv_end_sec++;
4078                 tv_end_sec -= 1000000;
4079         }
4080         if (vm_page_find_contig_debug) {
4081                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d... scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
4082                __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4083                (long)tv_end_sec, tv_end_usec, orig_last_idx,
4084                scanned, yielded, dumped_run, stolen_pages);
4085         }
4086
4087 #endif
4088 #if MACH_ASSERT
4089         vm_page_verify_free_lists();
4090 #endif
4091         return m;
4092 }
4093
4094 /*
4095  *      Allocate a list of contiguous, wired pages.
4096  */
4097 kern_return_t
4098 cpm_allocate(
4099         vm_size_t       size,
4100         vm_page_t       *list,
4101         ppnum_t         max_pnum,
4102         ppnum_t         pnum_mask,
4103         boolean_t       wire,
4104         int             flags)
4105 {
4106         vm_page_t               pages;
4107         unsigned int            npages;
4108
4109         if (size % PAGE_SIZE != 0)
4110                 return KERN_INVALID_ARGUMENT;
4111
4112         npages = (unsigned int) (size / PAGE_SIZE);
4113         if (npages != size / PAGE_SIZE) {
4114                 /* 32-bit overflow */
4115                 return KERN_INVALID_ARGUMENT;
4116         }
4117
4118         /*
4119          *      Obtain a pointer to a subset of the free
4120          *      list large enough to satisfy the request;
4121          *      the region will be physically contiguous.
4122          */
4123         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4124
4125         if (pages == VM_PAGE_NULL)
4126                 return KERN_NO_SPACE;
4127         /*
4128          * determine need for wakeups
4129          */
4130         if ((vm_page_free_count < vm_page_free_min) ||
4131             ((vm_page_free_count < vm_page_free_target) &&
4132              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4133                 thread_wakeup((event_t) &vm_page_free_wanted);
4134
4135         VM_CHECK_MEMORYSTATUS;
4136
4137         /*
4138          *      The CPM pages should now be available and
4139          *      ordered by ascending physical address.
4140          */
4141         assert(vm_page_verify_contiguous(pages, npages));
4142
4143         *list = pages;
4144         return KERN_SUCCESS;
4145 }
4146
4147
4148 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4149
4150 /*
4151  * when working on a 'run' of pages, it is necessary to hold
4152  * the vm_page_queue_lock (a hot global lock) for certain operations
4153  * on the page... however, the majority of the work can be done
4154  * while merely holding the object lock... in fact there are certain
4155  * collections of pages that don't require any work brokered by the
4156  * vm_page_queue_lock... to mitigate the time spent behind the global
4157  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4158  * while doing all of the work that doesn't require the vm_page_queue_lock...
4159  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4160  * necessary work for each page... we will grab the busy bit on the page
4161  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4162  * if it can't immediately take the vm_page_queue_lock in order to compete
4163  * for the locks in the same order that vm_pageout_scan takes them.
4164  * the operation names are modeled after the names of the routines that
4165  * need to be called in order to make the changes very obvious in the
4166  * original loop
4167  */
4168
4169 void
4170 vm_page_do_delayed_work(
4171         vm_object_t     object,
4172         struct vm_page_delayed_work *dwp,
4173         int             dw_count)
4174 {
4175         int             j;
4176         vm_page_t       m;
4177         vm_page_t       local_free_q = VM_PAGE_NULL;
4178         boolean_t       dropped_obj_lock = FALSE;
4179
4180         /*
4181          * pageout_scan takes the vm_page_lock_queues first
4182          * then tries for the object lock... to avoid what
4183          * is effectively a lock inversion, we'll go to the
4184          * trouble of taking them in that same order... otherwise
4185          * if this object contains the majority of the pages resident
4186          * in the UBC (or a small set of large objects actively being
4187          * worked on contain the majority of the pages), we could
4188          * cause the pageout_scan thread to 'starve' in its attempt
4189          * to find pages to move to the free queue, since it has to
4190          * successfully acquire the object lock of any candidate page
4191          * before it can steal/clean it.
4192          */
4193         if (!vm_page_trylockspin_queues()) {
4194                 vm_object_unlock(object);
4195
4196                 vm_page_lockspin_queues();
4197
4198                 for (j = 0; ; j++) {
4199                         if (!vm_object_lock_avoid(object) &&
4200                             _vm_object_lock_try(object))
4201                                 break;
4202                         vm_page_unlock_queues();
4203                         mutex_pause(j);
4204                         vm_page_lockspin_queues();
4205                 }
4206                 dropped_obj_lock = TRUE;
4207         }
4208         for (j = 0; j < dw_count; j++, dwp++) {
4209
4210                 m = dwp->dw_m;
4211
4212                 if (dwp->dw_mask & DW_set_list_req_pending) {
4213                         m->list_req_pending = TRUE;
4214
4215                         if (dropped_obj_lock == TRUE) {
4216                                 /*
4217                                  * need to make sure anyone that might have
4218                                  * blocked on busy == TRUE when we dropped
4219                                  * the object lock gets a chance to re-evaluate
4220                                  * its state since we have several places
4221                                  * where we avoid potential deadlocks with
4222                                  * the fileysystem by stealing pages with
4223                                  * list_req_pending == TRUE and busy == TRUE
4224                                  */
4225                                 dwp->dw_mask |= DW_PAGE_WAKEUP;
4226                         }
4227                 }
4228                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4229                         vm_pageout_throttle_up(m);
4230
4231                 if (dwp->dw_mask & DW_vm_page_wire)
4232                         vm_page_wire(m);
4233                 else if (dwp->dw_mask & DW_vm_page_unwire) {
4234                         boolean_t       queueit;
4235
4236                         queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4237
4238                         vm_page_unwire(m, queueit);
4239                 }
4240                 if (dwp->dw_mask & DW_vm_page_free) {
4241                         vm_page_free_prepare_queues(m);
4242
4243                         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4244                         /*
4245                          * Add this page to our list of reclaimed pages,
4246                          * to be freed later.
4247                          */
4248                         m->pageq.next = (queue_entry_t) local_free_q;
4249                         local_free_q = m;
4250                 } else {
4251                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4252                                 vm_page_deactivate_internal(m, FALSE);
4253                         else if (dwp->dw_mask & DW_vm_page_activate) {
4254                                 if (m->active == FALSE) {
4255                                         vm_page_activate(m);
4256                                 }
4257                         }
4258                         else if (dwp->dw_mask & DW_vm_page_speculate)
4259                                 vm_page_speculate(m, TRUE);
4260                         else if (dwp->dw_mask & DW_vm_page_lru)
4261                                 vm_page_lru(m);
4262                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE)
4263                                 VM_PAGE_QUEUES_REMOVE(m);
4264
4265                         if (dwp->dw_mask & DW_set_reference)
4266                                 m->reference = TRUE;
4267                         else if (dwp->dw_mask & DW_clear_reference)
4268                                 m->reference = FALSE;
4269
4270                         if (dwp->dw_mask & DW_move_page) {
4271                                 VM_PAGE_QUEUES_REMOVE(m);
4272
4273                                 assert(!m->laundry);
4274                                 assert(m->object != kernel_object);
4275                                 assert(m->pageq.next == NULL &&
4276                                        m->pageq.prev == NULL);
4277
4278                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4279                         }
4280                         if (dwp->dw_mask & DW_clear_busy)
4281                                 m->busy = FALSE;
4282
4283                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
4284                                 PAGE_WAKEUP(m);
4285                 }
4286         }
4287         vm_page_unlock_queues();
4288
4289         if (local_free_q)
4290                 vm_page_free_list(local_free_q, TRUE);
4291
4292         VM_CHECK_MEMORYSTATUS;
4293
4294 }
4295
4296
4297
4298
4299 void vm_check_memorystatus()
4300 {
4301 #if CONFIG_EMBEDDED
4302         static boolean_t in_critical = FALSE;
4303         static unsigned int last_memorystatus = 0;
4304         unsigned int pages_avail;
4305
4306         if (!kern_memorystatus_delta) {
4307             return;
4308         }
4309
4310         pages_avail = (vm_page_active_count +
4311                       vm_page_inactive_count +
4312                       vm_page_speculative_count +
4313                       vm_page_free_count +
4314                       (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count));
4315         if ( (!in_critical && (pages_avail < kern_memorystatus_delta)) ||
4316              (pages_avail >= (last_memorystatus + kern_memorystatus_delta)) ||
4317              (last_memorystatus >= (pages_avail + kern_memorystatus_delta)) ) {
4318             kern_memorystatus_level = pages_avail * 100 / atop_64(max_mem);
4319             last_memorystatus = pages_avail;
4320
4321             thread_wakeup((event_t)&kern_memorystatus_wakeup);
4322
4323             in_critical = (pages_avail < kern_memorystatus_delta) ? TRUE : FALSE;
4324         }
4325 #endif
4326 }
4327
4328 kern_return_t
4329 vm_page_alloc_list(
4330         int     page_count,
4331         int     flags,
4332         vm_page_t *list)
4333 {
4334         vm_page_t       lo_page_list = VM_PAGE_NULL;
4335         vm_page_t       mem;
4336         int             i;
4337
4338         if ( !(flags & KMA_LOMEM))
4339                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4340
4341         for (i = 0; i < page_count; i++) {
4342
4343                 mem = vm_page_grablo();
4344
4345                 if (mem == VM_PAGE_NULL) {
4346                         if (lo_page_list)
4347                                 vm_page_free_list(lo_page_list, FALSE);
4348
4349                         *list = VM_PAGE_NULL;
4350
4351                         return (KERN_RESOURCE_SHORTAGE);
4352                 }
4353                 mem->pageq.next = (queue_entry_t) lo_page_list;
4354                 lo_page_list = mem;
4355         }
4356         *list = lo_page_list;
4357
4358         return (KERN_SUCCESS);
4359 }
4360
4361 void
4362 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4363 {
4364         page->offset = offset;
4365 }
4366
4367 vm_page_t
4368 vm_page_get_next(vm_page_t page)
4369 {
4370         return ((vm_page_t) page->pageq.next);
4371 }
4372
4373 vm_object_offset_t
4374 vm_page_get_offset(vm_page_t page)
4375 {
4376         return (page->offset);
4377 }
4378
4379 ppnum_t
4380 vm_page_get_phys_page(vm_page_t page)
4381 {
4382         return (page->phys_page);
4383 }
4384
4385
4386 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4387
4388 #if HIBERNATION
4389
4390 static vm_page_t hibernate_gobble_queue;
4391
4392 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4393
4394 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4395 static int  hibernate_flush_dirty_pages(void);
4396 static int  hibernate_flush_queue(queue_head_t *, int);
4397 static void hibernate_dirty_page(vm_page_t);
4398
4399 void hibernate_flush_wait(void);
4400 void hibernate_mark_in_progress(void);
4401 void hibernate_clear_in_progress(void);
4402
4403
4404 struct hibernate_statistics {
4405         int hibernate_considered;
4406         int hibernate_reentered_on_q;
4407         int hibernate_found_dirty;
4408         int hibernate_skipped_cleaning;
4409         int hibernate_skipped_transient;
4410         int hibernate_skipped_precious;
4411         int hibernate_queue_nolock;
4412         int hibernate_queue_paused;
4413         int hibernate_throttled;
4414         int hibernate_throttle_timeout;
4415         int hibernate_drained;
4416         int hibernate_drain_timeout;
4417         int cd_lock_failed;
4418         int cd_found_precious;
4419         int cd_found_wired;
4420         int cd_found_busy;
4421         int cd_found_unusual;
4422         int cd_found_cleaning;
4423         int cd_found_laundry;
4424         int cd_found_dirty;
4425         int cd_local_free;
4426         int cd_total_free;
4427         int cd_vm_page_wire_count;
4428         int cd_pages;
4429         int cd_discarded;
4430         int cd_count_wire;
4431 } hibernate_stats;
4432
4433
4434
4435 static int
4436 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4437 {
4438         wait_result_t   wait_result;
4439
4440         vm_page_lock_queues();
4441
4442         while (q->pgo_laundry) {
4443
4444                 q->pgo_draining = TRUE;
4445
4446                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4447
4448                 vm_page_unlock_queues();
4449
4450                 wait_result = thread_block(THREAD_CONTINUE_NULL);
4451
4452                 if (wait_result == THREAD_TIMED_OUT) {
4453                         hibernate_stats.hibernate_drain_timeout++;
4454                         return (1);
4455                 }
4456                 vm_page_lock_queues();
4457
4458                 hibernate_stats.hibernate_drained++;
4459         }
4460         vm_page_unlock_queues();
4461
4462         return (0);
4463 }
4464
4465 static void
4466 hibernate_dirty_page(vm_page_t m)
4467 {
4468         vm_object_t     object = m->object;
4469         struct          vm_pageout_queue *q;
4470
4471 #if DEBUG
4472         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4473 #endif
4474         vm_object_lock_assert_exclusive(object);
4475
4476         /*
4477          * protect the object from collapse -
4478          * locking in the object's paging_offset.
4479          */
4480         vm_object_paging_begin(object);
4481
4482         m->list_req_pending = TRUE;
4483         m->cleaning = TRUE;
4484         m->busy = TRUE;
4485
4486         if (object->internal == TRUE)
4487                 q = &vm_pageout_queue_internal;
4488         else
4489                 q = &vm_pageout_queue_external;
4490
4491         /*
4492          * pgo_laundry count is tied to the laundry bit
4493          */
4494         m->laundry = TRUE;
4495         q->pgo_laundry++;
4496
4497         m->pageout_queue = TRUE;
4498         queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
4499
4500         if (q->pgo_idle == TRUE) {
4501                 q->pgo_idle = FALSE;
4502                 thread_wakeup((event_t) &q->pgo_pending);
4503         }
4504 }
4505
4506 static int
4507 hibernate_flush_queue(queue_head_t *q, int qcount)
4508 {
4509         vm_page_t       m;
4510         vm_object_t     l_object = NULL;
4511         vm_object_t     m_object = NULL;
4512         int             refmod_state = 0;
4513         int             try_failed_count = 0;
4514         int             retval = 0;
4515         int             current_run = 0;
4516         struct  vm_pageout_queue *iq;
4517         struct  vm_pageout_queue *eq;
4518         struct  vm_pageout_queue *tq;
4519
4520
4521         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4522
4523         iq = &vm_pageout_queue_internal;
4524         eq = &vm_pageout_queue_external;
4525
4526         vm_page_lock_queues();
4527
4528         while (qcount && !queue_empty(q)) {
4529
4530                 if (current_run++ == 1000) {
4531                         if (hibernate_should_abort()) {
4532                                 retval = 1;
4533                                 break;
4534                         }
4535                         current_run = 0;
4536                 }
4537
4538                 m = (vm_page_t) queue_first(q);
4539                 m_object = m->object;
4540
4541                 /*
4542                  * check to see if we currently are working
4543                  * with the same object... if so, we've
4544                  * already got the lock
4545                  */
4546                 if (m_object != l_object) {
4547                         /*
4548                          * the object associated with candidate page is
4549                          * different from the one we were just working
4550                          * with... dump the lock if we still own it
4551                          */
4552                         if (l_object != NULL) {
4553                                 vm_object_unlock(l_object);
4554                                 l_object = NULL;
4555                         }
4556                         /*
4557                          * Try to lock object; since we've alread got the
4558                          * page queues lock, we can only 'try' for this one.
4559                          * if the 'try' fails, we need to do a mutex_pause
4560                          * to allow the owner of the object lock a chance to
4561                          * run...
4562                          */
4563                         if ( !vm_object_lock_try_scan(m_object)) {
4564
4565                                 if (try_failed_count > 20) {
4566                                         hibernate_stats.hibernate_queue_nolock++;
4567
4568                                         goto reenter_pg_on_q;
4569                                 }
4570                                 vm_pageout_scan_wants_object = m_object;
4571
4572                                 vm_page_unlock_queues();
4573                                 mutex_pause(try_failed_count++);
4574                                 vm_page_lock_queues();
4575
4576                                 hibernate_stats.hibernate_queue_paused++;
4577                                 continue;
4578                         } else {
4579                                 l_object = m_object;
4580                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4581                         }
4582                 }
4583                 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
4584                         /*
4585                          * page is not to be cleaned
4586                          * put it back on the head of its queue
4587                          */
4588                         if (m->cleaning)
4589                                 hibernate_stats.hibernate_skipped_cleaning++;
4590                         else
4591                                 hibernate_stats.hibernate_skipped_transient++;
4592
4593                         goto reenter_pg_on_q;
4594                 }
4595                 if ( !m_object->pager_initialized && m_object->pager_created)
4596                         goto reenter_pg_on_q;
4597
4598                 if (m_object->copy == VM_OBJECT_NULL) {
4599                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4600                                 /*
4601                                  * let the normal hibernate image path
4602                                  * deal with these
4603                                  */
4604                                 goto reenter_pg_on_q;
4605                         }
4606                 }
4607                 if ( !m->dirty && m->pmapped) {
4608                         refmod_state = pmap_get_refmod(m->phys_page);
4609
4610                         if ((refmod_state & VM_MEM_MODIFIED))
4611                                 m->dirty = TRUE;
4612                 } else
4613                         refmod_state = 0;
4614
4615                 if ( !m->dirty) {
4616                         /*
4617                          * page is not to be cleaned
4618                          * put it back on the head of its queue
4619                          */
4620                         if (m->precious)
4621                                 hibernate_stats.hibernate_skipped_precious++;
4622
4623                         goto reenter_pg_on_q;
4624                 }
4625                 tq = NULL;
4626
4627                 if (m_object->internal) {
4628                         if (VM_PAGE_Q_THROTTLED(iq))
4629                                 tq = iq;
4630                 } else if (VM_PAGE_Q_THROTTLED(eq))
4631                         tq = eq;
4632
4633                 if (tq != NULL) {
4634                         wait_result_t   wait_result;
4635                         int             wait_count = 5;
4636
4637                         if (l_object != NULL) {
4638                                 vm_object_unlock(l_object);
4639                                 l_object = NULL;
4640                         }
4641                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4642
4643                         tq->pgo_throttled = TRUE;
4644
4645                         while (retval == 0) {
4646
4647                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4648
4649                         vm_page_unlock_queues();
4650
4651                         wait_result = thread_block(THREAD_CONTINUE_NULL);
4652
4653                                 vm_page_lock_queues();
4654
4655                                 if (hibernate_should_abort())
4656                                         retval = 1;
4657
4658                                 if (wait_result != THREAD_TIMED_OUT)
4659                                         break;
4660
4661                                 if (--wait_count == 0) {
4662                                 hibernate_stats.hibernate_throttle_timeout++;
4663                                 retval = 1;
4664                         }
4665                         }
4666                         if (retval)
4667                                 break;
4668
4669                         hibernate_stats.hibernate_throttled++;
4670
4671                         continue;
4672                 }
4673                 VM_PAGE_QUEUES_REMOVE(m);
4674
4675                 hibernate_dirty_page(m);
4676
4677                 hibernate_stats.hibernate_found_dirty++;
4678
4679                 goto next_pg;
4680
4681 reenter_pg_on_q:
4682                 queue_remove(q, m, vm_page_t, pageq);
4683                 queue_enter(q, m, vm_page_t, pageq);
4684
4685                 hibernate_stats.hibernate_reentered_on_q++;
4686 next_pg:
4687                 hibernate_stats.hibernate_considered++;
4688
4689                 qcount--;
4690                 try_failed_count = 0;
4691         }
4692         if (l_object != NULL) {
4693                 vm_object_unlock(l_object);
4694                 l_object = NULL;
4695         }
4696     vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4697
4698         vm_page_unlock_queues();
4699
4700         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4701
4702         return (retval);
4703 }
4704
4705
4706 static int
4707 hibernate_flush_dirty_pages()
4708 {
4709         struct vm_speculative_age_q     *aq;
4710         uint32_t        i;
4711
4712         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4713
4714         if (vm_page_local_q) {
4715                 for (i = 0; i < vm_page_local_q_count; i++)
4716                         vm_page_reactivate_local(i, TRUE, FALSE);
4717         }
4718
4719         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4720                 int             qcount;
4721                 vm_page_t       m;
4722
4723                 aq = &vm_page_queue_speculative[i];
4724
4725                 if (queue_empty(&aq->age_q))
4726                         continue;
4727                 qcount = 0;
4728
4729                 vm_page_lockspin_queues();
4730
4731                 queue_iterate(&aq->age_q,
4732                               m,
4733                               vm_page_t,
4734                               pageq)
4735                 {
4736                         qcount++;
4737                 }
4738                 vm_page_unlock_queues();
4739
4740                 if (qcount) {
4741                         if (hibernate_flush_queue(&aq->age_q, qcount))
4742                                 return (1);
4743                 }
4744         }
4745         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4746                 return (1);
4747         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
4748                 return (1);
4749         if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
4750                 return (1);
4751
4752         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4753                 return (1);
4754         return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4755 }
4756
4757
4758 extern void IOSleep(unsigned int);
4759 extern int sync_internal(void);
4760
4761 int
4762 hibernate_flush_memory()
4763 {
4764         int     retval;
4765
4766         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4767
4768         IOSleep(2 * 1000);
4769
4770         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4771
4772         if ((retval = hibernate_flush_dirty_pages()) == 0) {
4773                 if (consider_buffer_cache_collect != NULL) {
4774
4775                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4776
4777                         sync_internal();
4778                         (void)(*consider_buffer_cache_collect)(1);
4779                         consider_zone_gc(1);
4780
4781                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4782                 }
4783         }
4784         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4785
4786     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4787                 hibernate_stats.hibernate_considered,
4788                 hibernate_stats.hibernate_reentered_on_q,
4789                 hibernate_stats.hibernate_found_dirty);
4790     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4791                 hibernate_stats.hibernate_skipped_cleaning,
4792                 hibernate_stats.hibernate_skipped_transient,
4793                 hibernate_stats.hibernate_skipped_precious,
4794                 hibernate_stats.hibernate_queue_nolock);
4795     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4796                 hibernate_stats.hibernate_queue_paused,
4797                 hibernate_stats.hibernate_throttled,
4798                 hibernate_stats.hibernate_throttle_timeout,
4799                 hibernate_stats.hibernate_drained,
4800                 hibernate_stats.hibernate_drain_timeout);
4801
4802         return (retval);
4803 }
4804
4805
4806 static void
4807 hibernate_page_list_zero(hibernate_page_list_t *list)
4808 {
4809     uint32_t             bank;
4810     hibernate_bitmap_t * bitmap;
4811
4812     bitmap = &list->bank_bitmap[0];
4813     for (bank = 0; bank < list->bank_count; bank++)
4814     {
4815         uint32_t last_bit;
4816
4817         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4818         // set out-of-bound bits at end of bitmap.
4819         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4820         if (last_bit)
4821             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4822
4823         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4824     }
4825 }
4826
4827 void
4828 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4829 {
4830     uint32_t i;
4831     vm_page_t m;
4832     uint64_t start, end, timeout, nsec;
4833     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4834     clock_get_uptime(&start);
4835
4836     for (i = 0; i < gobble_count; i++)
4837     {
4838         while (VM_PAGE_NULL == (m = vm_page_grab()))
4839         {
4840             clock_get_uptime(&end);
4841             if (end >= timeout)
4842                 break;
4843             VM_PAGE_WAIT();
4844         }
4845         if (!m)
4846             break;
4847         m->busy = FALSE;
4848         vm_page_gobble(m);
4849
4850         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4851         hibernate_gobble_queue = m;
4852     }
4853
4854     clock_get_uptime(&end);
4855     absolutetime_to_nanoseconds(end - start, &nsec);
4856     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4857 }
4858
4859 void
4860 hibernate_free_gobble_pages(void)
4861 {
4862     vm_page_t m, next;
4863     uint32_t  count = 0;
4864
4865     m = (vm_page_t) hibernate_gobble_queue;
4866     while(m)
4867     {
4868         next = (vm_page_t) m->pageq.next;
4869         vm_page_free(m);
4870         count++;
4871         m = next;
4872     }
4873     hibernate_gobble_queue = VM_PAGE_NULL;
4874
4875     if (count)
4876         HIBLOG("Freed %d pages\n", count);
4877 }
4878
4879 static boolean_t
4880 hibernate_consider_discard(vm_page_t m)
4881 {
4882     vm_object_t object = NULL;
4883     int                  refmod_state;
4884     boolean_t            discard = FALSE;
4885
4886     do
4887     {
4888         if (m->private)
4889             panic("hibernate_consider_discard: private");
4890
4891         if (!vm_object_lock_try(m->object)) {
4892             hibernate_stats.cd_lock_failed++;
4893             break;
4894         }
4895         object = m->object;
4896
4897         if (VM_PAGE_WIRED(m)) {
4898             hibernate_stats.cd_found_wired++;
4899             break;
4900         }
4901         if (m->precious) {
4902             hibernate_stats.cd_found_precious++;
4903             break;
4904         }
4905         if (m->busy || !object->alive) {
4906            /*
4907             *   Somebody is playing with this page.
4908             */
4909             hibernate_stats.cd_found_busy++;
4910             break;
4911         }
4912         if (m->absent || m->unusual || m->error) {
4913            /*
4914             * If it's unusual in anyway, ignore it
4915             */
4916             hibernate_stats.cd_found_unusual++;
4917             break;
4918         }
4919         if (m->cleaning) {
4920             hibernate_stats.cd_found_cleaning++;
4921             break;
4922         }
4923         if (m->laundry || m->list_req_pending) {
4924             hibernate_stats.cd_found_laundry++;
4925             break;
4926         }
4927         if (!m->dirty)
4928         {
4929             refmod_state = pmap_get_refmod(m->phys_page);
4930
4931             if (refmod_state & VM_MEM_REFERENCED)
4932                 m->reference = TRUE;
4933             if (refmod_state & VM_MEM_MODIFIED)
4934                 m->dirty = TRUE;
4935         }
4936
4937         /*
4938          * If it's clean or purgeable we can discard the page on wakeup.
4939          */
4940         discard = (!m->dirty)
4941                     || (VM_PURGABLE_VOLATILE == object->purgable)
4942                     || (VM_PURGABLE_EMPTY    == object->purgable);
4943
4944         if (discard == FALSE)
4945             hibernate_stats.cd_found_dirty++;
4946     }
4947     while (FALSE);
4948
4949     if (object)
4950         vm_object_unlock(object);
4951
4952     return (discard);
4953 }
4954
4955
4956 static void
4957 hibernate_discard_page(vm_page_t m)
4958 {
4959     if (m->absent || m->unusual || m->error)
4960        /*
4961         * If it's unusual in anyway, ignore
4962         */
4963         return;
4964
4965     if (m->pmapped == TRUE)
4966     {
4967         __unused int refmod_state = pmap_disconnect(m->phys_page);
4968     }
4969
4970     if (m->laundry)
4971         panic("hibernate_discard_page(%p) laundry", m);
4972     if (m->private)
4973         panic("hibernate_discard_page(%p) private", m);
4974     if (m->fictitious)
4975         panic("hibernate_discard_page(%p) fictitious", m);
4976
4977     if (VM_PURGABLE_VOLATILE == m->object->purgable)
4978     {
4979         /* object should be on a queue */
4980         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4981         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4982         assert(old_queue);
4983         /* No need to lock page queue for token delete, hibernate_vm_unlock()
4984            makes sure these locks are uncontended before sleep */
4985         vm_purgeable_token_delete_first(old_queue);
4986         m->object->purgable = VM_PURGABLE_EMPTY;
4987     }
4988
4989     vm_page_free(m);
4990 }
4991
4992 /*
4993  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4994  pages known to VM to not need saving are subtracted.
4995  Wired pages to be saved are present in page_list_wired, pageable in page_list.
4996 */
4997
4998 void
4999 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5000                            hibernate_page_list_t * page_list_wired,
5001                            hibernate_page_list_t * page_list_pal,
5002                            uint32_t * pagesOut)
5003 {
5004     uint64_t start, end, nsec;
5005     vm_page_t m;
5006     uint32_t pages = page_list->page_count;
5007     uint32_t count_zf = 0, count_throttled = 0;
5008     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
5009     uint32_t count_wire = pages;
5010     uint32_t count_discard_active    = 0;
5011     uint32_t count_discard_inactive  = 0;
5012     uint32_t count_discard_purgeable = 0;
5013     uint32_t count_discard_speculative = 0;
5014     uint32_t i;
5015     uint32_t             bank;
5016     hibernate_bitmap_t * bitmap;
5017     hibernate_bitmap_t * bitmap_wired;
5018
5019
5020     HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
5021
5022     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5023
5024     clock_get_uptime(&start);
5025
5026     hibernate_page_list_zero(page_list);
5027     hibernate_page_list_zero(page_list_wired);
5028     hibernate_page_list_zero(page_list_pal);
5029
5030     hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5031     hibernate_stats.cd_pages = pages;
5032
5033     if (vm_page_local_q) {
5034             for (i = 0; i < vm_page_local_q_count; i++)
5035                     vm_page_reactivate_local(i, TRUE, TRUE);
5036     }
5037
5038     m = (vm_page_t) hibernate_gobble_queue;
5039     while(m)
5040     {
5041         pages--;
5042         count_wire--;
5043         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5044         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5045         m = (vm_page_t) m->pageq.next;
5046     }
5047
5048     for( i = 0; i < real_ncpus; i++ )
5049     {
5050         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5051         {
5052             for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5053             {
5054                 pages--;
5055                 count_wire--;
5056                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5057                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5058
5059                 hibernate_stats.cd_local_free++;
5060                 hibernate_stats.cd_total_free++;
5061             }
5062         }
5063     }
5064
5065     for( i = 0; i < vm_colors; i++ )
5066     {
5067         queue_iterate(&vm_page_queue_free[i],
5068                       m,
5069                       vm_page_t,
5070                       pageq)
5071         {
5072             pages--;
5073             count_wire--;
5074             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5075             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5076
5077             hibernate_stats.cd_total_free++;
5078         }
5079     }
5080
5081     queue_iterate(&vm_lopage_queue_free,
5082                   m,
5083                   vm_page_t,
5084                   pageq)
5085     {
5086         pages--;
5087         count_wire--;
5088         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5089         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5090
5091         hibernate_stats.cd_total_free++;
5092     }
5093
5094     queue_iterate( &vm_page_queue_throttled,
5095                     m,
5096                     vm_page_t,
5097                     pageq )
5098     {
5099         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5100          && hibernate_consider_discard(m))
5101         {
5102             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5103             count_discard_inactive++;
5104         }
5105         else
5106             count_throttled++;
5107         count_wire--;
5108         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5109     }
5110
5111     queue_iterate( &vm_page_queue_zf,
5112                     m,
5113                     vm_page_t,
5114                    pageq )
5115     {
5116         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5117          && hibernate_consider_discard(m))
5118         {
5119             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5120             if (m->dirty)
5121                 count_discard_purgeable++;
5122             else
5123                 count_discard_inactive++;
5124         }
5125         else
5126             count_zf++;
5127         count_wire--;
5128         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5129     }
5130
5131     queue_iterate( &vm_page_queue_inactive,
5132                     m,
5133                     vm_page_t,
5134                     pageq )
5135     {
5136         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5137          && hibernate_consider_discard(m))
5138         {
5139             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5140             if (m->dirty)
5141                 count_discard_purgeable++;
5142             else
5143                 count_discard_inactive++;
5144         }
5145         else
5146             count_inactive++;
5147         count_wire--;
5148         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5149     }
5150
5151     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5152     {
5153        queue_iterate(&vm_page_queue_speculative[i].age_q,
5154                      m,
5155                      vm_page_t,
5156                      pageq)
5157        {
5158            if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5159             && hibernate_consider_discard(m))
5160            {
5161                hibernate_page_bitset(page_list, TRUE, m->phys_page);
5162                count_discard_speculative++;
5163            }
5164            else
5165                count_speculative++;
5166            count_wire--;
5167            hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5168        }
5169     }
5170
5171     queue_iterate( &vm_page_queue_active,
5172                     m,
5173                     vm_page_t,
5174                     pageq )
5175     {
5176         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5177          && hibernate_consider_discard(m))
5178         {
5179             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5180             if (m->dirty)
5181                 count_discard_purgeable++;
5182             else
5183                 count_discard_active++;
5184         }
5185         else
5186             count_active++;
5187         count_wire--;
5188         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5189     }
5190
5191     // pull wired from hibernate_bitmap
5192
5193     bitmap = &page_list->bank_bitmap[0];
5194     bitmap_wired = &page_list_wired->bank_bitmap[0];
5195     for (bank = 0; bank < page_list->bank_count; bank++)
5196     {
5197         for (i = 0; i < bitmap->bitmapwords; i++)
5198             bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5199         bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5200         bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5201     }
5202
5203     // machine dependent adjustments
5204     hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5205
5206     hibernate_stats.cd_count_wire = count_wire;
5207     hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
5208
5209     clock_get_uptime(&end);
5210     absolutetime_to_nanoseconds(end - start, &nsec);
5211     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5212
5213     HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5214                 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
5215                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5216
5217     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
5218
5219     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5220 }
5221
5222 void
5223 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5224 {
5225     uint64_t  start, end, nsec;
5226     vm_page_t m;
5227     vm_page_t next;
5228     uint32_t  i;
5229     uint32_t  count_discard_active    = 0;
5230     uint32_t  count_discard_inactive  = 0;
5231     uint32_t  count_discard_purgeable = 0;
5232     uint32_t  count_discard_speculative = 0;
5233
5234     clock_get_uptime(&start);
5235
5236     m = (vm_page_t) queue_first(&vm_page_queue_zf);
5237     while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
5238     {
5239         next = (vm_page_t) m->pageq.next;
5240         if (hibernate_page_bittst(page_list, m->phys_page))
5241         {
5242             if (m->dirty)
5243                 count_discard_purgeable++;
5244             else
5245                 count_discard_inactive++;
5246             hibernate_discard_page(m);
5247         }
5248         m = next;
5249     }
5250
5251     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5252     {
5253        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5254        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5255        {
5256            next = (vm_page_t) m->pageq.next;
5257            if (hibernate_page_bittst(page_list, m->phys_page))
5258            {
5259                count_discard_speculative++;
5260                hibernate_discard_page(m);
5261            }
5262            m = next;
5263        }
5264     }
5265
5266     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5267     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5268     {
5269         next = (vm_page_t) m->pageq.next;
5270         if (hibernate_page_bittst(page_list, m->phys_page))
5271         {
5272             if (m->dirty)
5273                 count_discard_purgeable++;
5274             else
5275                 count_discard_inactive++;
5276             hibernate_discard_page(m);
5277         }
5278         m = next;
5279     }
5280
5281     m = (vm_page_t) queue_first(&vm_page_queue_active);
5282     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5283     {
5284         next = (vm_page_t) m->pageq.next;
5285         if (hibernate_page_bittst(page_list, m->phys_page))
5286         {
5287             if (m->dirty)
5288                 count_discard_purgeable++;
5289             else
5290                 count_discard_active++;
5291             hibernate_discard_page(m);
5292         }
5293         m = next;
5294     }
5295
5296     clock_get_uptime(&end);
5297     absolutetime_to_nanoseconds(end - start, &nsec);
5298     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5299                 nsec / 1000000ULL,
5300                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5301 }
5302
5303 #endif /* HIBERNATION */
5304
5305 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5306
5307 #include <mach_vm_debug.h>
5308 #if     MACH_VM_DEBUG
5309
5310 #include <mach_debug/hash_info.h>
5311 #include <vm/vm_debug.h>
5312
5313 /*
5314  *      Routine:        vm_page_info
5315  *      Purpose:
5316  *              Return information about the global VP table.
5317  *              Fills the buffer with as much information as possible
5318  *              and returns the desired size of the buffer.
5319  *      Conditions:
5320  *              Nothing locked.  The caller should provide
5321  *              possibly-pageable memory.
5322  */
5323
5324 unsigned int
5325 vm_page_info(
5326         hash_info_bucket_t *info,
5327         unsigned int count)
5328 {
5329         unsigned int i;
5330         lck_spin_t      *bucket_lock;
5331
5332         if (vm_page_bucket_count < count)
5333                 count = vm_page_bucket_count;
5334
5335         for (i = 0; i < count; i++) {
5336                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5337                 unsigned int bucket_count = 0;
5338                 vm_page_t m;
5339
5340                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5341                 lck_spin_lock(bucket_lock);
5342
5343                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5344                         bucket_count++;
5345
5346                 lck_spin_unlock(bucket_lock);
5347
5348                 /* don't touch pageable memory while holding locks */
5349                 info[i].hib_count = bucket_count;
5350         }
5351
5352         return vm_page_bucket_count;
5353 }
5354 #endif  /* MACH_VM_DEBUG */
5355
5356 #include <mach_kdb.h>
5357 #if     MACH_KDB
5358
5359 #include <ddb/db_output.h>
5360 #include <vm/vm_print.h>
5361 #define printf  kdbprintf
5362
5363 /*
5364  *      Routine:        vm_page_print [exported]
5365  */
5366 void
5367 vm_page_print(
5368         db_addr_t       db_addr)
5369 {
5370         vm_page_t       p;
5371
5372         p = (vm_page_t) (long) db_addr;
5373
5374         iprintf("page 0x%x\n", p);
5375
5376         db_indent += 2;
5377
5378         iprintf("object=0x%x", p->object);
5379         printf(", offset=0x%x", p->offset);
5380         printf(", wire_count=%d", p->wire_count);
5381
5382         iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5383                 (p->local ? "" : "!"),
5384                 (p->inactive ? "" : "!"),
5385                 (p->active ? "" : "!"),
5386                 (p->throttled ? "" : "!"),
5387                 (p->gobbled ? "" : "!"),
5388                 (p->laundry ? "" : "!"),
5389                 (p->free ? "" : "!"),
5390                 (p->reference ? "" : "!"),
5391                 (p->encrypted ? "" : "!"));
5392         iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5393                 (p->busy ? "" : "!"),
5394                 (p->wanted ? "" : "!"),
5395                 (p->tabled ? "" : "!"),
5396                 (p->fictitious ? "" : "!"),
5397                 (p->private ? "" : "!"),
5398                 (p->precious ? "" : "!"));
5399         iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5400                 (p->absent ? "" : "!"),
5401                 (p->error ? "" : "!"),
5402                 (p->dirty ? "" : "!"),
5403                 (p->cleaning ? "" : "!"),
5404                 (p->pageout ? "" : "!"),
5405                 (p->clustered ? "" : "!"));
5406         iprintf("%soverwriting, %srestart, %sunusual\n",
5407                 (p->overwriting ? "" : "!"),
5408                 (p->restart ? "" : "!"),
5409                 (p->unusual ? "" : "!"));
5410
5411         iprintf("phys_page=0x%x", p->phys_page);
5412
5413         db_indent -= 2;
5414 }
5415 #endif  /* MACH_KDB */