osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <vm/pmap.h>
  80 #include <vm/vm_init.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_page.h>
  83 #include <vm/vm_pageout.h>
  84 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  85 #include <kern/misc_protos.h>
  86 #include <zone_debug.h>
  87 #include <vm/cpm.h>
  88 #include <ppc/mappings.h>               /* (BRINGUP) */
  89 #include <pexpert/pexpert.h>    /* (BRINGUP) */
  90
  91 #include <vm/vm_protos.h>
  92 #include <vm/memory_object.h>
  93 #include <vm/vm_purgeable_internal.h>
  94
  95 #include <IOKit/IOHibernatePrivate.h>
  96
  97
  98 #if CONFIG_EMBEDDED
  99 #include <sys/kern_memorystatus.h>
 100 #endif
 101
 102 #include <sys/kdebug.h>
 103
 104 boolean_t       vm_page_free_verify = TRUE;
 105
 106 uint_t          vm_lopage_free_count = 0;
 107 uint_t          vm_lopage_free_limit = 0;
 108 uint_t          vm_lopage_lowater    = 0;
 109 boolean_t       vm_lopage_refill = FALSE;
 110 boolean_t       vm_lopage_needed = FALSE;
 111
 112 lck_mtx_ext_t   vm_page_queue_lock_ext;
 113 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 114 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 115
 116 int             speculative_age_index = 0;
 117 int             speculative_steal_index = 0;
 118 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 119
 120
 121 __private_extern__ void         vm_page_init_lck_grp(void);
 122
 123 static void                     vm_page_free_prepare(vm_page_t  page);
 124
 125
 126
 127 /*
 128  *      Associated with page of user-allocatable memory is a
 129  *      page structure.
 130  */
 131
 132 /*
 133  *      These variables record the values returned by vm_page_bootstrap,
 134  *      for debugging purposes.  The implementation of pmap_steal_memory
 135  *      and pmap_startup here also uses them internally.
 136  */
 137
 138 vm_offset_t virtual_space_start;
 139 vm_offset_t virtual_space_end;
 140 int     vm_page_pages;
 141
 142 /*
 143  *      The vm_page_lookup() routine, which provides for fast
 144  *      (virtual memory object, offset) to page lookup, employs
 145  *      the following hash table.  The vm_page_{insert,remove}
 146  *      routines install and remove associations in the table.
 147  *      [This table is often called the virtual-to-physical,
 148  *      or VP, table.]
 149  */
 150 typedef struct {
 151         vm_page_t       pages;
 152 #if     MACH_PAGE_HASH_STATS
 153         int             cur_count;              /* current count */
 154         int             hi_count;               /* high water mark */
 155 #endif /* MACH_PAGE_HASH_STATS */
 156 } vm_page_bucket_t;
 157
 158
 159 #define BUCKETS_PER_LOCK        16
 160
 161 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 162 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 163 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 164 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 165 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 166 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 167
 168 lck_spin_t      *vm_page_bucket_locks;
 169
 170
 171 #if     MACH_PAGE_HASH_STATS
 172 /* This routine is only for debug.  It is intended to be called by
 173  * hand by a developer using a kernel debugger.  This routine prints
 174  * out vm_page_hash table statistics to the kernel debug console.
 175  */
 176 void
 177 hash_debug(void)
 178 {
 179         int     i;
 180         int     numbuckets = 0;
 181         int     highsum = 0;
 182         int     maxdepth = 0;
 183
 184         for (i = 0; i < vm_page_bucket_count; i++) {
 185                 if (vm_page_buckets[i].hi_count) {
 186                         numbuckets++;
 187                         highsum += vm_page_buckets[i].hi_count;
 188                         if (vm_page_buckets[i].hi_count > maxdepth)
 189                                 maxdepth = vm_page_buckets[i].hi_count;
 190                 }
 191         }
 192         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 193         printf("Number used buckets:     %d = %d%%\n",
 194                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 195         printf("Number unused buckets:   %d = %d%%\n",
 196                 vm_page_bucket_count - numbuckets,
 197                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 198         printf("Sum of bucket max depth: %d\n", highsum);
 199         printf("Average bucket depth:    %d.%2d\n",
 200                 highsum/vm_page_bucket_count,
 201                 highsum%vm_page_bucket_count);
 202         printf("Maximum bucket depth:    %d\n", maxdepth);
 203 }
 204 #endif /* MACH_PAGE_HASH_STATS */
 205
 206 /*
 207  *      The virtual page size is currently implemented as a runtime
 208  *      variable, but is constant once initialized using vm_set_page_size.
 209  *      This initialization must be done in the machine-dependent
 210  *      bootstrap sequence, before calling other machine-independent
 211  *      initializations.
 212  *
 213  *      All references to the virtual page size outside this
 214  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 215  *      constants.
 216  */
 217 vm_size_t       page_size  = PAGE_SIZE;
 218 vm_size_t       page_mask  = PAGE_MASK;
 219 int             page_shift = PAGE_SHIFT;
 220
 221 /*
 222  *      Resident page structures are initialized from
 223  *      a template (see vm_page_alloc).
 224  *
 225  *      When adding a new field to the virtual memory
 226  *      object structure, be sure to add initialization
 227  *      (see vm_page_bootstrap).
 228  */
 229 struct vm_page  vm_page_template;
 230
 231 vm_page_t       vm_pages = VM_PAGE_NULL;
 232 unsigned int    vm_pages_count = 0;
 233 ppnum_t         vm_page_lowest = 0;
 234
 235 /*
 236  *      Resident pages that represent real memory
 237  *      are allocated from a set of free lists,
 238  *      one per color.
 239  */
 240 unsigned int    vm_colors;
 241 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 242 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 243 queue_head_t    vm_page_queue_free[MAX_COLORS];
 244 vm_page_t       vm_page_queue_fictitious;
 245 unsigned int    vm_page_free_wanted;
 246 unsigned int    vm_page_free_wanted_privileged;
 247 unsigned int    vm_page_free_count;
 248 unsigned int    vm_page_fictitious_count;
 249
 250 unsigned int    vm_page_free_count_minimum;     /* debugging */
 251
 252 /*
 253  *      Occasionally, the virtual memory system uses
 254  *      resident page structures that do not refer to
 255  *      real pages, for example to leave a page with
 256  *      important state information in the VP table.
 257  *
 258  *      These page structures are allocated the way
 259  *      most other kernel structures are.
 260  */
 261 zone_t  vm_page_zone;
 262 vm_locks_array_t vm_page_locks;
 263 decl_lck_mtx_data(,vm_page_alloc_lock)
 264 unsigned int io_throttle_zero_fill;
 265
 266 unsigned int    vm_page_local_q_count = 0;
 267 unsigned int    vm_page_local_q_soft_limit = 250;
 268 unsigned int    vm_page_local_q_hard_limit = 500;
 269 struct vplq     *vm_page_local_q = NULL;
 270
 271 /*
 272  *      Fictitious pages don't have a physical address,
 273  *      but we must initialize phys_page to something.
 274  *      For debugging, this should be a strange value
 275  *      that the pmap module can recognize in assertions.
 276  */
 277 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 278
 279 /*
 280  *      Guard pages are not accessible so they don't
 281  *      need a physical address, but we need to enter
 282  *      one in the pmap.
 283  *      Let's make it recognizable and make sure that
 284  *      we don't use a real physical page with that
 285  *      physical address.
 286  */
 287 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 288
 289 /*
 290  *      Resident page structures are also chained on
 291  *      queues that are used by the page replacement
 292  *      system (pageout daemon).  These queues are
 293  *      defined here, but are shared by the pageout
 294  *      module.  The inactive queue is broken into
 295  *      inactive and zf for convenience as the
 296  *      pageout daemon often assignes a higher
 297  *      affinity to zf pages
 298  */
 299 queue_head_t    vm_page_queue_active;
 300 queue_head_t    vm_page_queue_inactive;
 301 queue_head_t    vm_page_queue_zf;       /* inactive memory queue for zero fill */
 302 queue_head_t    vm_page_queue_throttled;
 303
 304 unsigned int    vm_page_active_count;
 305 unsigned int    vm_page_inactive_count;
 306 unsigned int    vm_page_throttled_count;
 307 unsigned int    vm_page_speculative_count;
 308 unsigned int    vm_page_wire_count;
 309 unsigned int    vm_page_wire_count_initial;
 310 unsigned int    vm_page_gobble_count = 0;
 311 unsigned int    vm_page_wire_count_warning = 0;
 312 unsigned int    vm_page_gobble_count_warning = 0;
 313
 314 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 315 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 316 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 317
 318 #if DEVELOPMENT || DEBUG
 319 unsigned int    vm_page_speculative_recreated = 0;
 320 unsigned int    vm_page_speculative_created = 0;
 321 unsigned int    vm_page_speculative_used = 0;
 322 #endif
 323
 324 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 325 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 326
 327
 328 /*
 329  *      Several page replacement parameters are also
 330  *      shared with this module, so that page allocation
 331  *      (done here in vm_page_alloc) can trigger the
 332  *      pageout daemon.
 333  */
 334 unsigned int    vm_page_free_target = 0;
 335 unsigned int    vm_page_free_min = 0;
 336 unsigned int    vm_page_throttle_limit = 0;
 337 uint32_t        vm_page_creation_throttle = 0;
 338 unsigned int    vm_page_inactive_target = 0;
 339 unsigned int    vm_page_inactive_min = 0;
 340 unsigned int    vm_page_free_reserved = 0;
 341 unsigned int    vm_page_throttle_count = 0;
 342
 343 /*
 344  *      The VM system has a couple of heuristics for deciding
 345  *      that pages are "uninteresting" and should be placed
 346  *      on the inactive queue as likely candidates for replacement.
 347  *      These variables let the heuristics be controlled at run-time
 348  *      to make experimentation easier.
 349  */
 350
 351 boolean_t vm_page_deactivate_hint = TRUE;
 352
 353 struct vm_page_stats_reusable vm_page_stats_reusable;
 354
 355 /*
 356  *      vm_set_page_size:
 357  *
 358  *      Sets the page size, perhaps based upon the memory
 359  *      size.  Must be called before any use of page-size
 360  *      dependent functions.
 361  *
 362  *      Sets page_shift and page_mask from page_size.
 363  */
 364 void
 365 vm_set_page_size(void)
 366 {
 367         page_mask = page_size - 1;
 368
 369         if ((page_mask & page_size) != 0)
 370                 panic("vm_set_page_size: page size not a power of two");
 371
 372         for (page_shift = 0; ; page_shift++)
 373                 if ((1U << page_shift) == page_size)
 374                         break;
 375 }
 376
 377
 378 /* Called once during statup, once the cache geometry is known.
 379  */
 380 static void
 381 vm_page_set_colors( void )
 382 {
 383         unsigned int    n, override;
 384
 385         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 386                 n = override;
 387         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 388                 n = vm_cache_geometry_colors;
 389         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 390
 391         if ( n == 0 )
 392                 n = 1;
 393         if ( n > MAX_COLORS )
 394                 n = MAX_COLORS;
 395
 396         /* the count must be a power of 2  */
 397         if ( ( n & (n - 1)) != 0  )
 398                 panic("vm_page_set_colors");
 399
 400         vm_colors = n;
 401         vm_color_mask = n - 1;
 402 }
 403
 404
 405 lck_grp_t               vm_page_lck_grp_free;
 406 lck_grp_t               vm_page_lck_grp_queue;
 407 lck_grp_t               vm_page_lck_grp_local;
 408 lck_grp_t               vm_page_lck_grp_purge;
 409 lck_grp_t               vm_page_lck_grp_alloc;
 410 lck_grp_t               vm_page_lck_grp_bucket;
 411 lck_grp_attr_t          vm_page_lck_grp_attr;
 412 lck_attr_t              vm_page_lck_attr;
 413
 414
 415 __private_extern__ void
 416 vm_page_init_lck_grp(void)
 417 {
 418         /*
 419          * initialze the vm_page lock world
 420          */
 421         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 422         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 423         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 424         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 425         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 426         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 427         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 428         lck_attr_setdefault(&vm_page_lck_attr);
 429 }
 430
 431 void
 432 vm_page_init_local_q()
 433 {
 434         unsigned int            num_cpus;
 435         unsigned int            i;
 436         struct vplq             *t_local_q;
 437
 438         num_cpus = ml_get_max_cpus();
 439
 440         /*
 441          * no point in this for a uni-processor system
 442          */
 443         if (num_cpus >= 2) {
 444                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 445
 446                 for (i = 0; i < num_cpus; i++) {
 447                         struct vpl      *lq;
 448
 449                         lq = &t_local_q[i].vpl_un.vpl;
 450                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 451                         queue_init(&lq->vpl_queue);
 452                         lq->vpl_count = 0;
 453                 }
 454                 vm_page_local_q_count = num_cpus;
 455
 456                 vm_page_local_q = (struct vplq *)t_local_q;
 457         }
 458 }
 459
 460
 461 uint64_t initial_max_mem;
 462 int initial_wire_count;
 463 int initial_free_count;
 464 int initial_lopage_count;
 465
 466 /*
 467  *      vm_page_bootstrap:
 468  *
 469  *      Initializes the resident memory module.
 470  *
 471  *      Allocates memory for the page cells, and
 472  *      for the object/offset-to-page hash table headers.
 473  *      Each page cell is initialized and placed on the free list.
 474  *      Returns the range of available kernel virtual memory.
 475  */
 476
 477 void
 478 vm_page_bootstrap(
 479         vm_offset_t             *startp,
 480         vm_offset_t             *endp)
 481 {
 482         register vm_page_t      m;
 483         unsigned int            i;
 484         unsigned int            log1;
 485         unsigned int            log2;
 486         unsigned int            size;
 487
 488         /*
 489          *      Initialize the vm_page template.
 490          */
 491
 492         m = &vm_page_template;
 493         bzero(m, sizeof (*m));
 494
 495         m->pageq.next = NULL;
 496         m->pageq.prev = NULL;
 497         m->listq.next = NULL;
 498         m->listq.prev = NULL;
 499         m->next = VM_PAGE_NULL;
 500
 501         m->object = VM_OBJECT_NULL;             /* reset later */
 502         m->offset = (vm_object_offset_t) -1;    /* reset later */
 503
 504         m->wire_count = 0;
 505         m->local = FALSE;
 506         m->inactive = FALSE;
 507         m->active = FALSE;
 508         m->pageout_queue = FALSE;
 509         m->speculative = FALSE;
 510         m->laundry = FALSE;
 511         m->free = FALSE;
 512         m->reference = FALSE;
 513         m->gobbled = FALSE;
 514         m->private = FALSE;
 515         m->throttled = FALSE;
 516         m->__unused_pageq_bits = 0;
 517
 518         m->phys_page = 0;               /* reset later */
 519
 520         m->busy = TRUE;
 521         m->wanted = FALSE;
 522         m->tabled = FALSE;
 523         m->fictitious = FALSE;
 524         m->pmapped = FALSE;
 525         m->wpmapped = FALSE;
 526         m->pageout = FALSE;
 527         m->absent = FALSE;
 528         m->error = FALSE;
 529         m->dirty = FALSE;
 530         m->cleaning = FALSE;
 531         m->precious = FALSE;
 532         m->clustered = FALSE;
 533         m->overwriting = FALSE;
 534         m->restart = FALSE;
 535         m->unusual = FALSE;
 536         m->encrypted = FALSE;
 537         m->encrypted_cleaning = FALSE;
 538         m->list_req_pending = FALSE;
 539         m->dump_cleaning = FALSE;
 540         m->cs_validated = FALSE;
 541         m->cs_tainted = FALSE;
 542         m->no_cache = FALSE;
 543         m->zero_fill = FALSE;
 544         m->reusable = FALSE;
 545         m->__unused_object_bits = 0;
 546
 547
 548         /*
 549          *      Initialize the page queues.
 550          */
 551         vm_page_init_lck_grp();
 552
 553         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 554         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 555         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 556
 557         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 558                 int group;
 559
 560                 purgeable_queues[i].token_q_head = 0;
 561                 purgeable_queues[i].token_q_tail = 0;
 562                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 563                         queue_init(&purgeable_queues[i].objq[group]);
 564
 565                 purgeable_queues[i].type = i;
 566                 purgeable_queues[i].new_pages = 0;
 567 #if MACH_ASSERT
 568                 purgeable_queues[i].debug_count_tokens = 0;
 569                 purgeable_queues[i].debug_count_objects = 0;
 570 #endif
 571         };
 572
 573         for (i = 0; i < MAX_COLORS; i++ )
 574                 queue_init(&vm_page_queue_free[i]);
 575         queue_init(&vm_lopage_queue_free);
 576         vm_page_queue_fictitious = VM_PAGE_NULL;
 577         queue_init(&vm_page_queue_active);
 578         queue_init(&vm_page_queue_inactive);
 579         queue_init(&vm_page_queue_throttled);
 580         queue_init(&vm_page_queue_zf);
 581
 582         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 583                 queue_init(&vm_page_queue_speculative[i].age_q);
 584
 585                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 586                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 587         }
 588         vm_page_free_wanted = 0;
 589         vm_page_free_wanted_privileged = 0;
 590
 591         vm_page_set_colors();
 592
 593
 594         /*
 595          *      Steal memory for the map and zone subsystems.
 596          */
 597
 598         vm_map_steal_memory();
 599         zone_steal_memory();
 600
 601         /*
 602          *      Allocate (and initialize) the virtual-to-physical
 603          *      table hash buckets.
 604          *
 605          *      The number of buckets should be a power of two to
 606          *      get a good hash function.  The following computation
 607          *      chooses the first power of two that is greater
 608          *      than the number of physical pages in the system.
 609          */
 610
 611         if (vm_page_bucket_count == 0) {
 612                 unsigned int npages = pmap_free_pages();
 613
 614                 vm_page_bucket_count = 1;
 615                 while (vm_page_bucket_count < npages)
 616                         vm_page_bucket_count <<= 1;
 617         }
 618         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 619
 620         vm_page_hash_mask = vm_page_bucket_count - 1;
 621
 622         /*
 623          *      Calculate object shift value for hashing algorithm:
 624          *              O = log2(sizeof(struct vm_object))
 625          *              B = log2(vm_page_bucket_count)
 626          *              hash shifts the object left by
 627          *              B/2 - O
 628          */
 629         size = vm_page_bucket_count;
 630         for (log1 = 0; size > 1; log1++)
 631                 size /= 2;
 632         size = sizeof(struct vm_object);
 633         for (log2 = 0; size > 1; log2++)
 634                 size /= 2;
 635         vm_page_hash_shift = log1/2 - log2 + 1;
 636
 637         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 638         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 639         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 640
 641         if (vm_page_hash_mask & vm_page_bucket_count)
 642                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 643
 644         vm_page_buckets = (vm_page_bucket_t *)
 645                 pmap_steal_memory(vm_page_bucket_count *
 646                                   sizeof(vm_page_bucket_t));
 647
 648         vm_page_bucket_locks = (lck_spin_t *)
 649                 pmap_steal_memory(vm_page_bucket_lock_count *
 650                                   sizeof(lck_spin_t));
 651
 652         for (i = 0; i < vm_page_bucket_count; i++) {
 653                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 654
 655                 bucket->pages = VM_PAGE_NULL;
 656 #if     MACH_PAGE_HASH_STATS
 657                 bucket->cur_count = 0;
 658                 bucket->hi_count = 0;
 659 #endif /* MACH_PAGE_HASH_STATS */
 660         }
 661
 662         for (i = 0; i < vm_page_bucket_lock_count; i++)
 663                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 664
 665         /*
 666          *      Machine-dependent code allocates the resident page table.
 667          *      It uses vm_page_init to initialize the page frames.
 668          *      The code also returns to us the virtual space available
 669          *      to the kernel.  We don't trust the pmap module
 670          *      to get the alignment right.
 671          */
 672
 673         pmap_startup(&virtual_space_start, &virtual_space_end);
 674         virtual_space_start = round_page(virtual_space_start);
 675         virtual_space_end = trunc_page(virtual_space_end);
 676
 677         *startp = virtual_space_start;
 678         *endp = virtual_space_end;
 679
 680         /*
 681          *      Compute the initial "wire" count.
 682          *      Up until now, the pages which have been set aside are not under
 683          *      the VM system's control, so although they aren't explicitly
 684          *      wired, they nonetheless can't be moved. At this moment,
 685          *      all VM managed pages are "free", courtesy of pmap_startup.
 686          */
 687         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 688         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 689         vm_page_wire_count_initial = vm_page_wire_count;
 690         vm_page_free_count_minimum = vm_page_free_count;
 691
 692         initial_max_mem = max_mem;
 693         initial_wire_count = vm_page_wire_count;
 694         initial_free_count = vm_page_free_count;
 695         initial_lopage_count = vm_lopage_free_count;
 696
 697         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 698                vm_page_free_count, vm_page_wire_count);
 699
 700         simple_lock_init(&vm_paging_lock, 0);
 701 }
 702
 703 #ifndef MACHINE_PAGES
 704 /*
 705  *      We implement pmap_steal_memory and pmap_startup with the help
 706  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 707  */
 708
 709 void *
 710 pmap_steal_memory(
 711         vm_size_t size)
 712 {
 713         vm_offset_t addr, vaddr;
 714         ppnum_t phys_page;
 715
 716         /*
 717          *      We round the size to a round multiple.
 718          */
 719
 720         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 721
 722         /*
 723          *      If this is the first call to pmap_steal_memory,
 724          *      we have to initialize ourself.
 725          */
 726
 727         if (virtual_space_start == virtual_space_end) {
 728                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 729
 730                 /*
 731                  *      The initial values must be aligned properly, and
 732                  *      we don't trust the pmap module to do it right.
 733                  */
 734
 735                 virtual_space_start = round_page(virtual_space_start);
 736                 virtual_space_end = trunc_page(virtual_space_end);
 737         }
 738
 739         /*
 740          *      Allocate virtual memory for this request.
 741          */
 742
 743         addr = virtual_space_start;
 744         virtual_space_start += size;
 745
 746         kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size);   /* (TEST/DEBUG) */
 747
 748         /*
 749          *      Allocate and map physical pages to back new virtual pages.
 750          */
 751
 752         for (vaddr = round_page(addr);
 753              vaddr < addr + size;
 754              vaddr += PAGE_SIZE) {
 755
 756                 if (!pmap_next_page_hi(&phys_page))
 757                         panic("pmap_steal_memory");
 758
 759                 /*
 760                  *      XXX Logically, these mappings should be wired,
 761                  *      but some pmap modules barf if they are.
 762                  */
 763 #if defined(__LP64__)
 764                 pmap_pre_expand(kernel_pmap, vaddr);
 765 #endif
 766
 767                 pmap_enter(kernel_pmap, vaddr, phys_page,
 768                            VM_PROT_READ|VM_PROT_WRITE,
 769                                 VM_WIMG_USE_DEFAULT, FALSE);
 770                 /*
 771                  * Account for newly stolen memory
 772                  */
 773                 vm_page_wire_count++;
 774
 775         }
 776
 777         return (void *) addr;
 778 }
 779
 780 void
 781 pmap_startup(
 782         vm_offset_t *startp,
 783         vm_offset_t *endp)
 784 {
 785         unsigned int i, npages, pages_initialized, fill, fillval;
 786         ppnum_t         phys_page;
 787         addr64_t        tmpaddr;
 788
 789         /*
 790          *      We calculate how many page frames we will have
 791          *      and then allocate the page structures in one chunk.
 792          */
 793
 794         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 795         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 796         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 797
 798         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 799
 800         /*
 801          *      Initialize the page frames.
 802          */
 803         for (i = 0, pages_initialized = 0; i < npages; i++) {
 804                 if (!pmap_next_page(&phys_page))
 805                         break;
 806                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
 807                         vm_page_lowest = phys_page;
 808
 809                 vm_page_init(&vm_pages[i], phys_page, FALSE);
 810                 vm_page_pages++;
 811                 pages_initialized++;
 812         }
 813         vm_pages_count = pages_initialized;
 814
 815         /*
 816          * Check if we want to initialize pages to a known value
 817          */
 818         fill = 0;                                                               /* Assume no fill */
 819         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 820
 821         // -debug code remove
 822         if (2 == vm_himemory_mode) {
 823                 // free low -> high so high is preferred
 824                 for (i = 1; i <= pages_initialized; i++) {
 825                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 826                         vm_page_release(&vm_pages[i - 1]);
 827                 }
 828         }
 829         else
 830         // debug code remove-
 831
 832         /*
 833          * Release pages in reverse order so that physical pages
 834          * initially get allocated in ascending addresses. This keeps
 835          * the devices (which must address physical memory) happy if
 836          * they require several consecutive pages.
 837          */
 838         for (i = pages_initialized; i > 0; i--) {
 839                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 840                 vm_page_release(&vm_pages[i - 1]);
 841         }
 842
 843 #if 0
 844         {
 845                 vm_page_t xx, xxo, xxl;
 846                 int i, j, k, l;
 847
 848                 j = 0;                                                                                                  /* (BRINGUP) */
 849                 xxl = 0;
 850
 851                 for( i = 0; i < vm_colors; i++ ) {
 852                         queue_iterate(&vm_page_queue_free[i],
 853                                       xx,
 854                                       vm_page_t,
 855                                       pageq) {  /* BRINGUP */
 856                                 j++;                                                                                            /* (BRINGUP) */
 857                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 858                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 859                                 }
 860
 861                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 862                                 k = 0;                                                                                          /* (BRINGUP) */
 863
 864                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 865
 866                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 867                                         k++;
 868                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 869                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 870                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 871                                         }
 872                                 }
 873
 874                                 xxl = xx;
 875                         }
 876                 }
 877
 878                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 879                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 880                 }
 881         }
 882 #endif
 883
 884
 885         /*
 886          *      We have to re-align virtual_space_start,
 887          *      because pmap_steal_memory has been using it.
 888          */
 889
 890         virtual_space_start = round_page(virtual_space_start);
 891
 892         *startp = virtual_space_start;
 893         *endp = virtual_space_end;
 894 }
 895 #endif  /* MACHINE_PAGES */
 896
 897 /*
 898  *      Routine:        vm_page_module_init
 899  *      Purpose:
 900  *              Second initialization pass, to be done after
 901  *              the basic VM system is ready.
 902  */
 903 void
 904 vm_page_module_init(void)
 905 {
 906         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
 907                              0, PAGE_SIZE, "vm pages");
 908
 909 #if     ZONE_DEBUG
 910         zone_debug_disable(vm_page_zone);
 911 #endif  /* ZONE_DEBUG */
 912
 913         zone_change(vm_page_zone, Z_EXPAND, FALSE);
 914         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
 915         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
 916
 917         /*
 918          * Adjust zone statistics to account for the real pages allocated
 919          * in vm_page_create(). [Q: is this really what we want?]
 920          */
 921         vm_page_zone->count += vm_page_pages;
 922         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
 923
 924         lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 925 }
 926
 927 /*
 928  *      Routine:        vm_page_create
 929  *      Purpose:
 930  *              After the VM system is up, machine-dependent code
 931  *              may stumble across more physical memory.  For example,
 932  *              memory that it was reserving for a frame buffer.
 933  *              vm_page_create turns this memory into available pages.
 934  */
 935
 936 void
 937 vm_page_create(
 938         ppnum_t start,
 939         ppnum_t end)
 940 {
 941         ppnum_t         phys_page;
 942         vm_page_t       m;
 943
 944         for (phys_page = start;
 945              phys_page < end;
 946              phys_page++) {
 947                 while ((m = (vm_page_t) vm_page_grab_fictitious())
 948                         == VM_PAGE_NULL)
 949                         vm_page_more_fictitious();
 950
 951                 vm_page_init(m, phys_page, FALSE);
 952                 pmap_clear_noencrypt(phys_page);
 953                 vm_page_pages++;
 954                 vm_page_release(m);
 955         }
 956 }
 957
 958 /*
 959  *      vm_page_hash:
 960  *
 961  *      Distributes the object/offset key pair among hash buckets.
 962  *
 963  *      NOTE:   The bucket count must be a power of 2
 964  */
 965 #define vm_page_hash(object, offset) (\
 966         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
 967          & vm_page_hash_mask)
 968
 969
 970 /*
 971  *      vm_page_insert:         [ internal use only ]
 972  *
 973  *      Inserts the given mem entry into the object/object-page
 974  *      table and object list.
 975  *
 976  *      The object must be locked.
 977  */
 978 void
 979 vm_page_insert(
 980         vm_page_t               mem,
 981         vm_object_t             object,
 982         vm_object_offset_t      offset)
 983 {
 984         vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
 985 }
 986
 987 void
 988 vm_page_insert_internal(
 989         vm_page_t               mem,
 990         vm_object_t             object,
 991         vm_object_offset_t      offset,
 992         boolean_t               queues_lock_held,
 993         boolean_t               insert_in_hash)
 994 {
 995         vm_page_bucket_t *bucket;
 996         lck_spin_t      *bucket_lock;
 997         int     hash_id;
 998
 999         XPR(XPR_VM_PAGE,
1000                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1001                 object, offset, mem, 0,0);
1002
1003         VM_PAGE_CHECK(mem);
1004
1005         if (object == vm_submap_object) {
1006                 /* the vm_submap_object is only a placeholder for submaps */
1007                 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1008         }
1009
1010         vm_object_lock_assert_exclusive(object);
1011 #if DEBUG
1012         lck_mtx_assert(&vm_page_queue_lock,
1013                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1014                                         : LCK_MTX_ASSERT_NOTOWNED);
1015 #endif  /* DEBUG */
1016
1017         if (insert_in_hash == TRUE) {
1018 #if DEBUG
1019                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1020                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1021                               "already in (obj=%p,off=0x%llx)",
1022                               mem, object, offset, mem->object, mem->offset);
1023 #endif
1024                 assert(!object->internal || offset < object->size);
1025
1026                 /* only insert "pageout" pages into "pageout" objects,
1027                  * and normal pages into normal objects */
1028                 assert(object->pageout == mem->pageout);
1029
1030                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1031
1032                 /*
1033                  *      Record the object/offset pair in this page
1034                  */
1035
1036                 mem->object = object;
1037                 mem->offset = offset;
1038
1039                 /*
1040                  *      Insert it into the object_object/offset hash table
1041                  */
1042                 hash_id = vm_page_hash(object, offset);
1043                 bucket = &vm_page_buckets[hash_id];
1044                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1045
1046                 lck_spin_lock(bucket_lock);
1047
1048                 mem->next = bucket->pages;
1049                 bucket->pages = mem;
1050 #if     MACH_PAGE_HASH_STATS
1051                 if (++bucket->cur_count > bucket->hi_count)
1052                         bucket->hi_count = bucket->cur_count;
1053 #endif /* MACH_PAGE_HASH_STATS */
1054
1055                 lck_spin_unlock(bucket_lock);
1056         }
1057         /*
1058          *      Now link into the object's list of backed pages.
1059          */
1060
1061         VM_PAGE_INSERT(mem, object);
1062         mem->tabled = TRUE;
1063
1064         /*
1065          *      Show that the object has one more resident page.
1066          */
1067
1068         object->resident_page_count++;
1069         if (VM_PAGE_WIRED(mem)) {
1070                 object->wired_page_count++;
1071         }
1072         assert(object->resident_page_count >= object->wired_page_count);
1073
1074         assert(!mem->reusable);
1075
1076         if (object->purgable == VM_PURGABLE_VOLATILE) {
1077                 if (VM_PAGE_WIRED(mem)) {
1078                         OSAddAtomic(1, &vm_page_purgeable_wired_count);
1079                 } else {
1080                         OSAddAtomic(1, &vm_page_purgeable_count);
1081                 }
1082         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1083                    mem->throttled) {
1084                 /*
1085                  * This page belongs to a purged VM object but hasn't
1086                  * been purged (because it was "busy").
1087                  * It's in the "throttled" queue and hence not
1088                  * visible to vm_pageout_scan().  Move it to a pageable
1089                  * queue, so that it can eventually be reclaimed, instead
1090                  * of lingering in the "empty" object.
1091                  */
1092                 if (queues_lock_held == FALSE)
1093                         vm_page_lockspin_queues();
1094                 vm_page_deactivate(mem);
1095                 if (queues_lock_held == FALSE)
1096                         vm_page_unlock_queues();
1097         }
1098 }
1099
1100 /*
1101  *      vm_page_replace:
1102  *
1103  *      Exactly like vm_page_insert, except that we first
1104  *      remove any existing page at the given offset in object.
1105  *
1106  *      The object must be locked.
1107  */
1108 void
1109 vm_page_replace(
1110         register vm_page_t              mem,
1111         register vm_object_t            object,
1112         register vm_object_offset_t     offset)
1113 {
1114         vm_page_bucket_t *bucket;
1115         vm_page_t        found_m = VM_PAGE_NULL;
1116         lck_spin_t      *bucket_lock;
1117         int             hash_id;
1118
1119         VM_PAGE_CHECK(mem);
1120         vm_object_lock_assert_exclusive(object);
1121 #if DEBUG
1122         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1123                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1124                       "already in (obj=%p,off=0x%llx)",
1125                       mem, object, offset, mem->object, mem->offset);
1126         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1127 #endif
1128         /*
1129          *      Record the object/offset pair in this page
1130          */
1131
1132         mem->object = object;
1133         mem->offset = offset;
1134
1135         /*
1136          *      Insert it into the object_object/offset hash table,
1137          *      replacing any page that might have been there.
1138          */
1139
1140         hash_id = vm_page_hash(object, offset);
1141         bucket = &vm_page_buckets[hash_id];
1142         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1143
1144         lck_spin_lock(bucket_lock);
1145
1146         if (bucket->pages) {
1147                 vm_page_t *mp = &bucket->pages;
1148                 vm_page_t m = *mp;
1149
1150                 do {
1151                         if (m->object == object && m->offset == offset) {
1152                                 /*
1153                                  * Remove old page from hash list
1154                                  */
1155                                 *mp = m->next;
1156
1157                                 found_m = m;
1158                                 break;
1159                         }
1160                         mp = &m->next;
1161                 } while ((m = *mp));
1162
1163                 mem->next = bucket->pages;
1164         } else {
1165                 mem->next = VM_PAGE_NULL;
1166         }
1167         /*
1168          * insert new page at head of hash list
1169          */
1170         bucket->pages = mem;
1171
1172         lck_spin_unlock(bucket_lock);
1173
1174         if (found_m) {
1175                 /*
1176                  * there was already a page at the specified
1177                  * offset for this object... remove it from
1178                  * the object and free it back to the free list
1179                  */
1180                 vm_page_free_unlocked(found_m, FALSE);
1181         }
1182         vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1183 }
1184
1185 /*
1186  *      vm_page_remove:         [ internal use only ]
1187  *
1188  *      Removes the given mem entry from the object/offset-page
1189  *      table and the object page list.
1190  *
1191  *      The object must be locked.
1192  */
1193
1194 void
1195 vm_page_remove(
1196         vm_page_t       mem,
1197         boolean_t       remove_from_hash)
1198 {
1199         vm_page_bucket_t *bucket;
1200         vm_page_t       this;
1201         lck_spin_t      *bucket_lock;
1202         int             hash_id;
1203
1204         XPR(XPR_VM_PAGE,
1205                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1206                 mem->object, mem->offset,
1207                 mem, 0,0);
1208
1209         vm_object_lock_assert_exclusive(mem->object);
1210         assert(mem->tabled);
1211         assert(!mem->cleaning);
1212         VM_PAGE_CHECK(mem);
1213
1214         if (remove_from_hash == TRUE) {
1215                 /*
1216                  *      Remove from the object_object/offset hash table
1217                  */
1218                 hash_id = vm_page_hash(mem->object, mem->offset);
1219                 bucket = &vm_page_buckets[hash_id];
1220                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1221
1222                 lck_spin_lock(bucket_lock);
1223
1224                 if ((this = bucket->pages) == mem) {
1225                         /* optimize for common case */
1226
1227                         bucket->pages = mem->next;
1228                 } else {
1229                         vm_page_t       *prev;
1230
1231                         for (prev = &this->next;
1232                              (this = *prev) != mem;
1233                              prev = &this->next)
1234                                 continue;
1235                         *prev = this->next;
1236                 }
1237 #if     MACH_PAGE_HASH_STATS
1238                 bucket->cur_count--;
1239 #endif /* MACH_PAGE_HASH_STATS */
1240
1241                 lck_spin_unlock(bucket_lock);
1242         }
1243         /*
1244          *      Now remove from the object's list of backed pages.
1245          */
1246
1247         VM_PAGE_REMOVE(mem);
1248
1249         /*
1250          *      And show that the object has one fewer resident
1251          *      page.
1252          */
1253
1254         assert(mem->object->resident_page_count > 0);
1255         mem->object->resident_page_count--;
1256         if (VM_PAGE_WIRED(mem)) {
1257                 assert(mem->object->wired_page_count > 0);
1258                 mem->object->wired_page_count--;
1259         }
1260         assert(mem->object->resident_page_count >=
1261                mem->object->wired_page_count);
1262         if (mem->reusable) {
1263                 assert(mem->object->reusable_page_count > 0);
1264                 mem->object->reusable_page_count--;
1265                 assert(mem->object->reusable_page_count <=
1266                        mem->object->resident_page_count);
1267                 mem->reusable = FALSE;
1268                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1269                 vm_page_stats_reusable.reused_remove++;
1270         } else if (mem->object->all_reusable) {
1271                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1272                 vm_page_stats_reusable.reused_remove++;
1273         }
1274
1275         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1276                 if (VM_PAGE_WIRED(mem)) {
1277                         assert(vm_page_purgeable_wired_count > 0);
1278                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1279                 } else {
1280                         assert(vm_page_purgeable_count > 0);
1281                         OSAddAtomic(-1, &vm_page_purgeable_count);
1282                 }
1283         }
1284         mem->tabled = FALSE;
1285         mem->object = VM_OBJECT_NULL;
1286         mem->offset = (vm_object_offset_t) -1;
1287 }
1288
1289
1290 /*
1291  *      vm_page_lookup:
1292  *
1293  *      Returns the page associated with the object/offset
1294  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1295  *
1296  *      The object must be locked.  No side effects.
1297  */
1298
1299 unsigned long vm_page_lookup_hint = 0;
1300 unsigned long vm_page_lookup_hint_next = 0;
1301 unsigned long vm_page_lookup_hint_prev = 0;
1302 unsigned long vm_page_lookup_hint_miss = 0;
1303 unsigned long vm_page_lookup_bucket_NULL = 0;
1304 unsigned long vm_page_lookup_miss = 0;
1305
1306
1307 vm_page_t
1308 vm_page_lookup(
1309         vm_object_t             object,
1310         vm_object_offset_t      offset)
1311 {
1312         vm_page_t       mem;
1313         vm_page_bucket_t *bucket;
1314         queue_entry_t   qe;
1315         lck_spin_t      *bucket_lock;
1316         int             hash_id;
1317
1318         vm_object_lock_assert_held(object);
1319         mem = object->memq_hint;
1320
1321         if (mem != VM_PAGE_NULL) {
1322                 assert(mem->object == object);
1323
1324                 if (mem->offset == offset) {
1325                         vm_page_lookup_hint++;
1326                         return mem;
1327                 }
1328                 qe = queue_next(&mem->listq);
1329
1330                 if (! queue_end(&object->memq, qe)) {
1331                         vm_page_t       next_page;
1332
1333                         next_page = (vm_page_t) qe;
1334                         assert(next_page->object == object);
1335
1336                         if (next_page->offset == offset) {
1337                                 vm_page_lookup_hint_next++;
1338                                 object->memq_hint = next_page; /* new hint */
1339                                 return next_page;
1340                         }
1341                 }
1342                 qe = queue_prev(&mem->listq);
1343
1344                 if (! queue_end(&object->memq, qe)) {
1345                         vm_page_t prev_page;
1346
1347                         prev_page = (vm_page_t) qe;
1348                         assert(prev_page->object == object);
1349
1350                         if (prev_page->offset == offset) {
1351                                 vm_page_lookup_hint_prev++;
1352                                 object->memq_hint = prev_page; /* new hint */
1353                                 return prev_page;
1354                         }
1355                 }
1356         }
1357         /*
1358          * Search the hash table for this object/offset pair
1359          */
1360         hash_id = vm_page_hash(object, offset);
1361         bucket = &vm_page_buckets[hash_id];
1362
1363         /*
1364          * since we hold the object lock, we are guaranteed that no
1365          * new pages can be inserted into this object... this in turn
1366          * guarantess that the page we're looking for can't exist
1367          * if the bucket it hashes to is currently NULL even when looked
1368          * at outside the scope of the hash bucket lock... this is a
1369          * really cheap optimiztion to avoid taking the lock
1370          */
1371         if (bucket->pages == VM_PAGE_NULL) {
1372                 vm_page_lookup_bucket_NULL++;
1373
1374                 return (VM_PAGE_NULL);
1375         }
1376         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1377
1378         lck_spin_lock(bucket_lock);
1379
1380         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1381                 VM_PAGE_CHECK(mem);
1382                 if ((mem->object == object) && (mem->offset == offset))
1383                         break;
1384         }
1385         lck_spin_unlock(bucket_lock);
1386
1387         if (mem != VM_PAGE_NULL) {
1388                 if (object->memq_hint != VM_PAGE_NULL) {
1389                         vm_page_lookup_hint_miss++;
1390                 }
1391                 assert(mem->object == object);
1392                 object->memq_hint = mem;
1393         } else
1394                 vm_page_lookup_miss++;
1395
1396         return(mem);
1397 }
1398
1399
1400 /*
1401  *      vm_page_rename:
1402  *
1403  *      Move the given memory entry from its
1404  *      current object to the specified target object/offset.
1405  *
1406  *      The object must be locked.
1407  */
1408 void
1409 vm_page_rename(
1410         register vm_page_t              mem,
1411         register vm_object_t            new_object,
1412         vm_object_offset_t              new_offset,
1413         boolean_t                       encrypted_ok)
1414 {
1415         assert(mem->object != new_object);
1416
1417         /*
1418          * ENCRYPTED SWAP:
1419          * The encryption key is based on the page's memory object
1420          * (aka "pager") and paging offset.  Moving the page to
1421          * another VM object changes its "pager" and "paging_offset"
1422          * so it has to be decrypted first, or we would lose the key.
1423          *
1424          * One exception is VM object collapsing, where we transfer pages
1425          * from one backing object to its parent object.  This operation also
1426          * transfers the paging information, so the <pager,paging_offset> info
1427          * should remain consistent.  The caller (vm_object_do_collapse())
1428          * sets "encrypted_ok" in this case.
1429          */
1430         if (!encrypted_ok && mem->encrypted) {
1431                 panic("vm_page_rename: page %p is encrypted\n", mem);
1432         }
1433
1434         XPR(XPR_VM_PAGE,
1435                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1436                 new_object, new_offset,
1437                 mem, 0,0);
1438
1439         /*
1440          *      Changes to mem->object require the page lock because
1441          *      the pageout daemon uses that lock to get the object.
1442          */
1443         vm_page_lockspin_queues();
1444
1445         vm_page_remove(mem, TRUE);
1446         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1447
1448         vm_page_unlock_queues();
1449 }
1450
1451 /*
1452  *      vm_page_init:
1453  *
1454  *      Initialize the fields in a new page.
1455  *      This takes a structure with random values and initializes it
1456  *      so that it can be given to vm_page_release or vm_page_insert.
1457  */
1458 void
1459 vm_page_init(
1460         vm_page_t       mem,
1461         ppnum_t         phys_page,
1462         boolean_t       lopage)
1463 {
1464         assert(phys_page);
1465
1466         *mem = vm_page_template;
1467         mem->phys_page = phys_page;
1468         mem->lopage = lopage;
1469 }
1470
1471 /*
1472  *      vm_page_grab_fictitious:
1473  *
1474  *      Remove a fictitious page from the free list.
1475  *      Returns VM_PAGE_NULL if there are no free pages.
1476  */
1477 int     c_vm_page_grab_fictitious = 0;
1478 int     c_vm_page_release_fictitious = 0;
1479 int     c_vm_page_more_fictitious = 0;
1480
1481 extern vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
1482
1483 vm_page_t
1484 vm_page_grab_fictitious_common(
1485         ppnum_t phys_addr)
1486 {
1487         register vm_page_t m;
1488
1489         m = (vm_page_t)zget(vm_page_zone);
1490         if (m) {
1491                 vm_page_init(m, phys_addr, FALSE);
1492                 m->fictitious = TRUE;
1493         }
1494
1495         c_vm_page_grab_fictitious++;
1496         return m;
1497 }
1498
1499 vm_page_t
1500 vm_page_grab_fictitious(void)
1501 {
1502         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1503 }
1504
1505 vm_page_t
1506 vm_page_grab_guard(void)
1507 {
1508         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1509 }
1510
1511 /*
1512  *      vm_page_release_fictitious:
1513  *
1514  *      Release a fictitious page to the free list.
1515  */
1516
1517 void
1518 vm_page_release_fictitious(
1519         register vm_page_t m)
1520 {
1521         assert(!m->free);
1522         assert(m->busy);
1523         assert(m->fictitious);
1524         assert(m->phys_page == vm_page_fictitious_addr ||
1525                m->phys_page == vm_page_guard_addr);
1526
1527         c_vm_page_release_fictitious++;
1528 #if DEBUG
1529         if (m->free)
1530                 panic("vm_page_release_fictitious");
1531 #endif
1532         m->free = TRUE;
1533         zfree(vm_page_zone, m);
1534 }
1535
1536 /*
1537  *      vm_page_more_fictitious:
1538  *
1539  *      Add more fictitious pages to the free list.
1540  *      Allowed to block. This routine is way intimate
1541  *      with the zones code, for several reasons:
1542  *      1. we need to carve some page structures out of physical
1543  *         memory before zones work, so they _cannot_ come from
1544  *         the zone_map.
1545  *      2. the zone needs to be collectable in order to prevent
1546  *         growth without bound. These structures are used by
1547  *         the device pager (by the hundreds and thousands), as
1548  *         private pages for pageout, and as blocking pages for
1549  *         pagein. Temporary bursts in demand should not result in
1550  *         permanent allocation of a resource.
1551  *      3. To smooth allocation humps, we allocate single pages
1552  *         with kernel_memory_allocate(), and cram them into the
1553  *         zone. This also allows us to initialize the vm_page_t's
1554  *         on the way into the zone, so that zget() always returns
1555  *         an initialized structure. The zone free element pointer
1556  *         and the free page pointer are both the first item in the
1557  *         vm_page_t.
1558  *      4. By having the pages in the zone pre-initialized, we need
1559  *         not keep 2 levels of lists. The garbage collector simply
1560  *         scans our list, and reduces physical memory usage as it
1561  *         sees fit.
1562  */
1563
1564 void vm_page_more_fictitious(void)
1565 {
1566         register vm_page_t m;
1567         vm_offset_t addr;
1568         kern_return_t retval;
1569         int i;
1570
1571         c_vm_page_more_fictitious++;
1572
1573         /*
1574          * Allocate a single page from the zone_map. Do not wait if no physical
1575          * pages are immediately available, and do not zero the space. We need
1576          * our own blocking lock here to prevent having multiple,
1577          * simultaneous requests from piling up on the zone_map lock. Exactly
1578          * one (of our) threads should be potentially waiting on the map lock.
1579          * If winner is not vm-privileged, then the page allocation will fail,
1580          * and it will temporarily block here in the vm_page_wait().
1581          */
1582         lck_mtx_lock(&vm_page_alloc_lock);
1583         /*
1584          * If another thread allocated space, just bail out now.
1585          */
1586         if (zone_free_count(vm_page_zone) > 5) {
1587                 /*
1588                  * The number "5" is a small number that is larger than the
1589                  * number of fictitious pages that any single caller will
1590                  * attempt to allocate. Otherwise, a thread will attempt to
1591                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1592                  * release all of the resources and locks already acquired,
1593                  * and then call this routine. This routine finds the pages
1594                  * that the caller released, so fails to allocate new space.
1595                  * The process repeats infinitely. The largest known number
1596                  * of fictitious pages required in this manner is 2. 5 is
1597                  * simply a somewhat larger number.
1598                  */
1599                 lck_mtx_unlock(&vm_page_alloc_lock);
1600                 return;
1601         }
1602
1603         retval = kernel_memory_allocate(zone_map,
1604                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1605                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1606         if (retval != KERN_SUCCESS) {
1607                 /*
1608                  * No page was available. Tell the pageout daemon, drop the
1609                  * lock to give another thread a chance at it, and
1610                  * wait for the pageout daemon to make progress.
1611                  */
1612                 lck_mtx_unlock(&vm_page_alloc_lock);
1613                 vm_page_wait(THREAD_UNINT);
1614                 return;
1615         }
1616         /*
1617          * Initialize as many vm_page_t's as will fit on this page. This
1618          * depends on the zone code disturbing ONLY the first item of
1619          * each zone element.
1620          */
1621         m = (vm_page_t)addr;
1622         for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1623                 vm_page_init(m, vm_page_fictitious_addr, FALSE);
1624                 m->fictitious = TRUE;
1625                 m++;
1626         }
1627         zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1628         lck_mtx_unlock(&vm_page_alloc_lock);
1629 }
1630
1631
1632 /*
1633  *      vm_pool_low():
1634  *
1635  *      Return true if it is not likely that a non-vm_privileged thread
1636  *      can get memory without blocking.  Advisory only, since the
1637  *      situation may change under us.
1638  */
1639 int
1640 vm_pool_low(void)
1641 {
1642         /* No locking, at worst we will fib. */
1643         return( vm_page_free_count <= vm_page_free_reserved );
1644 }
1645
1646
1647
1648 /*
1649  * this is an interface to support bring-up of drivers
1650  * on platforms with physical memory > 4G...
1651  */
1652 int             vm_himemory_mode = 0;
1653
1654
1655 /*
1656  * this interface exists to support hardware controllers
1657  * incapable of generating DMAs with more than 32 bits
1658  * of address on platforms with physical memory > 4G...
1659  */
1660 unsigned int    vm_lopages_allocated_q = 0;
1661 unsigned int    vm_lopages_allocated_cpm_success = 0;
1662 unsigned int    vm_lopages_allocated_cpm_failed = 0;
1663 queue_head_t    vm_lopage_queue_free;
1664
1665 vm_page_t
1666 vm_page_grablo(void)
1667 {
1668         vm_page_t       mem;
1669
1670         if (vm_lopage_needed == FALSE)
1671                 return (vm_page_grab());
1672
1673         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1674
1675         if ( !queue_empty(&vm_lopage_queue_free)) {
1676                 queue_remove_first(&vm_lopage_queue_free,
1677                                    mem,
1678                                    vm_page_t,
1679                                    pageq);
1680                 assert(vm_lopage_free_count);
1681
1682                 vm_lopage_free_count--;
1683                 vm_lopages_allocated_q++;
1684
1685                 if (vm_lopage_free_count < vm_lopage_lowater)
1686                         vm_lopage_refill = TRUE;
1687
1688                 lck_mtx_unlock(&vm_page_queue_free_lock);
1689         } else {
1690                 lck_mtx_unlock(&vm_page_queue_free_lock);
1691
1692                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1693
1694                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1695                         vm_lopages_allocated_cpm_failed++;
1696                         lck_mtx_unlock(&vm_page_queue_free_lock);
1697
1698                         return (VM_PAGE_NULL);
1699                 }
1700                 mem->busy = TRUE;
1701
1702                 vm_page_lockspin_queues();
1703
1704                 mem->gobbled = FALSE;
1705                 vm_page_gobble_count--;
1706                 vm_page_wire_count--;
1707
1708                 vm_lopages_allocated_cpm_success++;
1709                 vm_page_unlock_queues();
1710         }
1711         assert(mem->gobbled);
1712         assert(mem->busy);
1713         assert(!mem->free);
1714         assert(!mem->pmapped);
1715         assert(!mem->wpmapped);
1716
1717         mem->pageq.next = NULL;
1718         mem->pageq.prev = NULL;
1719
1720         return (mem);
1721 }
1722
1723 /*
1724  *      vm_page_grab:
1725  *
1726  *      first try to grab a page from the per-cpu free list...
1727  *      this must be done while pre-emption is disabled... if
1728  *      a page is available, we're done...
1729  *      if no page is available, grab the vm_page_queue_free_lock
1730  *      and see if current number of free pages would allow us
1731  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
1732  *      if there are pages available, disable preemption and
1733  *      recheck the state of the per-cpu free list... we could
1734  *      have been preempted and moved to a different cpu, or
1735  *      some other thread could have re-filled it... if still
1736  *      empty, figure out how many pages we can steal from the
1737  *      global free queue and move to the per-cpu queue...
1738  *      return 1 of these pages when done... only wakeup the
1739  *      pageout_scan thread if we moved pages from the global
1740  *      list... no need for the wakeup if we've satisfied the
1741  *      request from the per-cpu queue.
1742  */
1743
1744 #define COLOR_GROUPS_TO_STEAL   4
1745
1746
1747 vm_page_t
1748 vm_page_grab( void )
1749 {
1750         vm_page_t       mem;
1751
1752
1753         disable_preemption();
1754
1755         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1756 return_page_from_cpu_list:
1757                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1758                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1759                 mem->pageq.next = NULL;
1760
1761                 enable_preemption();
1762
1763                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1764                 assert(mem->tabled == FALSE);
1765                 assert(mem->object == VM_OBJECT_NULL);
1766                 assert(!mem->laundry);
1767                 assert(!mem->free);
1768                 assert(pmap_verify_free(mem->phys_page));
1769                 assert(mem->busy);
1770                 assert(!mem->encrypted);
1771                 assert(!mem->pmapped);
1772                 assert(!mem->wpmapped);
1773
1774                 return mem;
1775         }
1776         enable_preemption();
1777
1778
1779         /*
1780          *      Optionally produce warnings if the wire or gobble
1781          *      counts exceed some threshold.
1782          */
1783         if (vm_page_wire_count_warning > 0
1784             && vm_page_wire_count >= vm_page_wire_count_warning) {
1785                 printf("mk: vm_page_grab(): high wired page count of %d\n",
1786                         vm_page_wire_count);
1787                 assert(vm_page_wire_count < vm_page_wire_count_warning);
1788         }
1789         if (vm_page_gobble_count_warning > 0
1790             && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1791                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1792                         vm_page_gobble_count);
1793                 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1794         }
1795
1796         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1797
1798         /*
1799          *      Only let privileged threads (involved in pageout)
1800          *      dip into the reserved pool.
1801          */
1802         if ((vm_page_free_count < vm_page_free_reserved) &&
1803             !(current_thread()->options & TH_OPT_VMPRIV)) {
1804                 lck_mtx_unlock(&vm_page_queue_free_lock);
1805                 mem = VM_PAGE_NULL;
1806         }
1807         else {
1808                vm_page_t        head;
1809                vm_page_t        tail;
1810                unsigned int     pages_to_steal;
1811                unsigned int     color;
1812
1813                while ( vm_page_free_count == 0 ) {
1814
1815                         lck_mtx_unlock(&vm_page_queue_free_lock);
1816                         /*
1817                          * must be a privileged thread to be
1818                          * in this state since a non-privileged
1819                          * thread would have bailed if we were
1820                          * under the vm_page_free_reserved mark
1821                          */
1822                         VM_PAGE_WAIT();
1823                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1824                 }
1825
1826                 disable_preemption();
1827
1828                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1829                         lck_mtx_unlock(&vm_page_queue_free_lock);
1830
1831                         /*
1832                          * we got preempted and moved to another processor
1833                          * or we got preempted and someone else ran and filled the cache
1834                          */
1835                         goto return_page_from_cpu_list;
1836                 }
1837                 if (vm_page_free_count <= vm_page_free_reserved)
1838                         pages_to_steal = 1;
1839                 else {
1840                         pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1841
1842                         if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1843                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1844                 }
1845                 color = PROCESSOR_DATA(current_processor(), start_color);
1846                 head = tail = NULL;
1847
1848                 while (pages_to_steal--) {
1849                         if (--vm_page_free_count < vm_page_free_count_minimum)
1850                                 vm_page_free_count_minimum = vm_page_free_count;
1851
1852                         while (queue_empty(&vm_page_queue_free[color]))
1853                                 color = (color + 1) & vm_color_mask;
1854
1855                         queue_remove_first(&vm_page_queue_free[color],
1856                                            mem,
1857                                            vm_page_t,
1858                                            pageq);
1859                         mem->pageq.next = NULL;
1860                         mem->pageq.prev = NULL;
1861
1862                         color = (color + 1) & vm_color_mask;
1863
1864                         if (head == NULL)
1865                                 head = mem;
1866                         else
1867                                 tail->pageq.next = (queue_t)mem;
1868                         tail = mem;
1869
1870                         mem->pageq.prev = NULL;
1871                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1872                         assert(mem->tabled == FALSE);
1873                         assert(mem->object == VM_OBJECT_NULL);
1874                         assert(!mem->laundry);
1875                         assert(mem->free);
1876                         mem->free = FALSE;
1877
1878                         assert(pmap_verify_free(mem->phys_page));
1879                         assert(mem->busy);
1880                         assert(!mem->free);
1881                         assert(!mem->encrypted);
1882                         assert(!mem->pmapped);
1883                         assert(!mem->wpmapped);
1884                 }
1885                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1886                 PROCESSOR_DATA(current_processor(), start_color) = color;
1887
1888                 /*
1889                  * satisfy this request
1890                  */
1891                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1892                 mem = head;
1893                 mem->pageq.next = NULL;
1894
1895                 lck_mtx_unlock(&vm_page_queue_free_lock);
1896
1897                 enable_preemption();
1898         }
1899         /*
1900          *      Decide if we should poke the pageout daemon.
1901          *      We do this if the free count is less than the low
1902          *      water mark, or if the free count is less than the high
1903          *      water mark (but above the low water mark) and the inactive
1904          *      count is less than its target.
1905          *
1906          *      We don't have the counts locked ... if they change a little,
1907          *      it doesn't really matter.
1908          */
1909         if ((vm_page_free_count < vm_page_free_min) ||
1910             ((vm_page_free_count < vm_page_free_target) &&
1911              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1912                 thread_wakeup((event_t) &vm_page_free_wanted);
1913
1914 #if CONFIG_EMBEDDED
1915         {
1916         int     percent_avail;
1917
1918         /*
1919          * Decide if we need to poke the memorystatus notification thread.
1920          */
1921         percent_avail =
1922                 (vm_page_active_count + vm_page_inactive_count +
1923                  vm_page_speculative_count + vm_page_free_count +
1924                  (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1925                 atop_64(max_mem);
1926         if (percent_avail <= (kern_memorystatus_level - 5)) {
1927                 kern_memorystatus_level = percent_avail;
1928                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1929         }
1930         }
1931 #endif
1932
1933 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
1934
1935         return mem;
1936 }
1937
1938 /*
1939  *      vm_page_release:
1940  *
1941  *      Return a page to the free list.
1942  */
1943
1944 void
1945 vm_page_release(
1946         register vm_page_t      mem)
1947 {
1948         unsigned int    color;
1949         int     need_wakeup = 0;
1950         int     need_priv_wakeup = 0;
1951 #if 0
1952         unsigned int pindex;
1953         phys_entry *physent;
1954
1955         physent = mapping_phys_lookup(mem->phys_page, &pindex);         /* (BRINGUP) */
1956         if(physent->ppLink & ppN) {                                                                                     /* (BRINGUP) */
1957                 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1958         }
1959         physent->ppLink = physent->ppLink | ppN;                                                        /* (BRINGUP) */
1960 #endif
1961         assert(!mem->private && !mem->fictitious);
1962         if (vm_page_free_verify) {
1963                 assert(pmap_verify_free(mem->phys_page));
1964         }
1965 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
1966
1967
1968         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1969 #if DEBUG
1970         if (mem->free)
1971                 panic("vm_page_release");
1972 #endif
1973         assert(mem->busy);
1974         assert(!mem->laundry);
1975         assert(mem->object == VM_OBJECT_NULL);
1976         assert(mem->pageq.next == NULL &&
1977                mem->pageq.prev == NULL);
1978         assert(mem->listq.next == NULL &&
1979                mem->listq.prev == NULL);
1980
1981         if ((mem->lopage || vm_lopage_refill == TRUE) &&
1982             vm_lopage_free_count < vm_lopage_free_limit &&
1983             mem->phys_page < max_valid_low_ppnum) {
1984                 /*
1985                  * this exists to support hardware controllers
1986                  * incapable of generating DMAs with more than 32 bits
1987                  * of address on platforms with physical memory > 4G...
1988                  */
1989                 queue_enter_first(&vm_lopage_queue_free,
1990                                   mem,
1991                                   vm_page_t,
1992                                   pageq);
1993                 vm_lopage_free_count++;
1994
1995                 if (vm_lopage_free_count >= vm_lopage_free_limit)
1996                         vm_lopage_refill = FALSE;
1997
1998                 mem->lopage = TRUE;
1999         } else {
2000                 mem->lopage = FALSE;
2001                 mem->free = TRUE;
2002
2003                 color = mem->phys_page & vm_color_mask;
2004                 queue_enter_first(&vm_page_queue_free[color],
2005                                   mem,
2006                                   vm_page_t,
2007                                   pageq);
2008                 vm_page_free_count++;
2009                 /*
2010                  *      Check if we should wake up someone waiting for page.
2011                  *      But don't bother waking them unless they can allocate.
2012                  *
2013                  *      We wakeup only one thread, to prevent starvation.
2014                  *      Because the scheduling system handles wait queues FIFO,
2015                  *      if we wakeup all waiting threads, one greedy thread
2016                  *      can starve multiple niceguy threads.  When the threads
2017                  *      all wakeup, the greedy threads runs first, grabs the page,
2018                  *      and waits for another page.  It will be the first to run
2019                  *      when the next page is freed.
2020                  *
2021                  *      However, there is a slight danger here.
2022                  *      The thread we wake might not use the free page.
2023                  *      Then the other threads could wait indefinitely
2024                  *      while the page goes unused.  To forestall this,
2025                  *      the pageout daemon will keep making free pages
2026                  *      as long as vm_page_free_wanted is non-zero.
2027                  */
2028
2029                 assert(vm_page_free_count > 0);
2030                 if (vm_page_free_wanted_privileged > 0) {
2031                         vm_page_free_wanted_privileged--;
2032                         need_priv_wakeup = 1;
2033                 } else if (vm_page_free_wanted > 0 &&
2034                            vm_page_free_count > vm_page_free_reserved) {
2035                         vm_page_free_wanted--;
2036                         need_wakeup = 1;
2037                 }
2038         }
2039         lck_mtx_unlock(&vm_page_queue_free_lock);
2040
2041         if (need_priv_wakeup)
2042                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2043         else if (need_wakeup)
2044                 thread_wakeup_one((event_t) &vm_page_free_count);
2045
2046 #if CONFIG_EMBEDDED
2047         {
2048         int     percent_avail;
2049
2050         /*
2051          * Decide if we need to poke the memorystatus notification thread.
2052          * Locking is not a big issue, as only a single thread delivers these.
2053          */
2054         percent_avail =
2055                 (vm_page_active_count + vm_page_inactive_count +
2056                  vm_page_speculative_count + vm_page_free_count +
2057                  (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
2058                 atop_64(max_mem);
2059         if (percent_avail >= (kern_memorystatus_level + 5)) {
2060                 kern_memorystatus_level = percent_avail;
2061                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2062         }
2063         }
2064 #endif
2065 }
2066
2067 /*
2068  *      vm_page_wait:
2069  *
2070  *      Wait for a page to become available.
2071  *      If there are plenty of free pages, then we don't sleep.
2072  *
2073  *      Returns:
2074  *              TRUE:  There may be another page, try again
2075  *              FALSE: We were interrupted out of our wait, don't try again
2076  */
2077
2078 boolean_t
2079 vm_page_wait(
2080         int     interruptible )
2081 {
2082         /*
2083          *      We can't use vm_page_free_reserved to make this
2084          *      determination.  Consider: some thread might
2085          *      need to allocate two pages.  The first allocation
2086          *      succeeds, the second fails.  After the first page is freed,
2087          *      a call to vm_page_wait must really block.
2088          */
2089         kern_return_t   wait_result;
2090         int             need_wakeup = 0;
2091         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2092
2093         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2094
2095         if (is_privileged && vm_page_free_count) {
2096                 lck_mtx_unlock(&vm_page_queue_free_lock);
2097                 return TRUE;
2098         }
2099         if (vm_page_free_count < vm_page_free_target) {
2100
2101                 if (is_privileged) {
2102                         if (vm_page_free_wanted_privileged++ == 0)
2103                                 need_wakeup = 1;
2104                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2105                 } else {
2106                         if (vm_page_free_wanted++ == 0)
2107                                 need_wakeup = 1;
2108                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2109                 }
2110                 lck_mtx_unlock(&vm_page_queue_free_lock);
2111                 counter(c_vm_page_wait_block++);
2112
2113                 if (need_wakeup)
2114                         thread_wakeup((event_t)&vm_page_free_wanted);
2115
2116                 if (wait_result == THREAD_WAITING)
2117                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2118
2119                 return(wait_result == THREAD_AWAKENED);
2120         } else {
2121                 lck_mtx_unlock(&vm_page_queue_free_lock);
2122                 return TRUE;
2123         }
2124 }
2125
2126 /*
2127  *      vm_page_alloc:
2128  *
2129  *      Allocate and return a memory cell associated
2130  *      with this VM object/offset pair.
2131  *
2132  *      Object must be locked.
2133  */
2134
2135 vm_page_t
2136 vm_page_alloc(
2137         vm_object_t             object,
2138         vm_object_offset_t      offset)
2139 {
2140         register vm_page_t      mem;
2141
2142         vm_object_lock_assert_exclusive(object);
2143         mem = vm_page_grab();
2144         if (mem == VM_PAGE_NULL)
2145                 return VM_PAGE_NULL;
2146
2147         vm_page_insert(mem, object, offset);
2148
2149         return(mem);
2150 }
2151
2152 vm_page_t
2153 vm_page_alloclo(
2154         vm_object_t             object,
2155         vm_object_offset_t      offset)
2156 {
2157         register vm_page_t      mem;
2158
2159         vm_object_lock_assert_exclusive(object);
2160         mem = vm_page_grablo();
2161         if (mem == VM_PAGE_NULL)
2162                 return VM_PAGE_NULL;
2163
2164         vm_page_insert(mem, object, offset);
2165
2166         return(mem);
2167 }
2168
2169
2170 /*
2171  *      vm_page_alloc_guard:
2172  *
2173  *      Allocate a fictitious page which will be used
2174  *      as a guard page.  The page will be inserted into
2175  *      the object and returned to the caller.
2176  */
2177
2178 vm_page_t
2179 vm_page_alloc_guard(
2180         vm_object_t             object,
2181         vm_object_offset_t      offset)
2182 {
2183         register vm_page_t      mem;
2184
2185         vm_object_lock_assert_exclusive(object);
2186         mem = vm_page_grab_guard();
2187         if (mem == VM_PAGE_NULL)
2188                 return VM_PAGE_NULL;
2189
2190         vm_page_insert(mem, object, offset);
2191
2192         return(mem);
2193 }
2194
2195
2196 counter(unsigned int c_laundry_pages_freed = 0;)
2197
2198 /*
2199  *      vm_page_free:
2200  *
2201  *      Returns the given page to the free list,
2202  *      disassociating it with any VM object.
2203  *
2204  *      Object and page queues must be locked prior to entry.
2205  */
2206 static void
2207 vm_page_free_prepare(
2208         register vm_page_t      mem)
2209 {
2210         vm_page_free_prepare_queues(mem);
2211         vm_page_free_prepare_object(mem, TRUE);
2212 }
2213
2214
2215 void
2216 vm_page_free_prepare_queues(
2217         vm_page_t       mem)
2218 {
2219         VM_PAGE_CHECK(mem);
2220         assert(!mem->free);
2221         assert(!mem->cleaning);
2222         assert(!mem->pageout);
2223 #if DEBUG
2224         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2225         if (mem->free)
2226                 panic("vm_page_free: freeing page on free list\n");
2227 #endif
2228         if (mem->object) {
2229                 vm_object_lock_assert_exclusive(mem->object);
2230         }
2231
2232         if (mem->laundry) {
2233                 /*
2234                  * We may have to free a page while it's being laundered
2235                  * if we lost its pager (due to a forced unmount, for example).
2236                  * We need to call vm_pageout_throttle_up() before removing
2237                  * the page from its VM object, so that we can find out on
2238                  * which pageout queue the page is on.
2239                  */
2240                 vm_pageout_throttle_up(mem);
2241                 counter(++c_laundry_pages_freed);
2242         }
2243         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2244
2245         if (VM_PAGE_WIRED(mem)) {
2246                 if (mem->object) {
2247                         assert(mem->object->wired_page_count > 0);
2248                         mem->object->wired_page_count--;
2249                         assert(mem->object->resident_page_count >=
2250                                mem->object->wired_page_count);
2251                 }
2252                 if (!mem->private && !mem->fictitious)
2253                         vm_page_wire_count--;
2254                 mem->wire_count = 0;
2255                 assert(!mem->gobbled);
2256         } else if (mem->gobbled) {
2257                 if (!mem->private && !mem->fictitious)
2258                         vm_page_wire_count--;
2259                 vm_page_gobble_count--;
2260         }
2261 }
2262
2263
2264 void
2265 vm_page_free_prepare_object(
2266         vm_page_t       mem,
2267         boolean_t       remove_from_hash)
2268 {
2269         if (mem->object) {
2270                 vm_object_lock_assert_exclusive(mem->object);
2271         }
2272
2273         if (mem->tabled)
2274                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2275
2276         PAGE_WAKEUP(mem);               /* clears wanted */
2277
2278         if (mem->private) {
2279                 mem->private = FALSE;
2280                 mem->fictitious = TRUE;
2281                 mem->phys_page = vm_page_fictitious_addr;
2282         }
2283         if (mem->fictitious) {
2284                 /* Some of these may be unnecessary */
2285                 mem->gobbled = FALSE;
2286                 mem->busy = TRUE;
2287                 mem->absent = FALSE;
2288                 mem->error = FALSE;
2289                 mem->dirty = FALSE;
2290                 mem->precious = FALSE;
2291                 mem->reference = FALSE;
2292                 mem->encrypted = FALSE;
2293                 mem->encrypted_cleaning = FALSE;
2294                 mem->pmapped = FALSE;
2295                 mem->wpmapped = FALSE;
2296                 mem->reusable = FALSE;
2297         } else {
2298                 if (mem->zero_fill == TRUE)
2299                         VM_ZF_COUNT_DECR();
2300                 vm_page_init(mem, mem->phys_page, mem->lopage);
2301         }
2302 }
2303
2304
2305 void
2306 vm_page_free(
2307         vm_page_t       mem)
2308 {
2309         vm_page_free_prepare(mem);
2310         if (mem->fictitious) {
2311                 vm_page_release_fictitious(mem);
2312         } else {
2313                 vm_page_release(mem);
2314         }
2315 }
2316
2317
2318 void
2319 vm_page_free_unlocked(
2320         vm_page_t       mem,
2321         boolean_t       remove_from_hash)
2322 {
2323         vm_page_lockspin_queues();
2324         vm_page_free_prepare_queues(mem);
2325         vm_page_unlock_queues();
2326
2327         vm_page_free_prepare_object(mem, remove_from_hash);
2328
2329         if (mem->fictitious) {
2330                 vm_page_release_fictitious(mem);
2331         } else {
2332                 vm_page_release(mem);
2333         }
2334 }
2335
2336 /*
2337  * Free a list of pages.  The list can be up to several hundred pages,
2338  * as blocked up by vm_pageout_scan().
2339  * The big win is not having to take the free list lock once
2340  * per page.  We sort the incoming pages into n lists, one for
2341  * each color.
2342  */
2343 void
2344 vm_page_free_list(
2345         vm_page_t       mem,
2346         boolean_t       prepare_object)
2347 {
2348         vm_page_t       nxt;
2349         int             pg_count = 0;
2350         int             color;
2351         int             inuse_list_head = -1;
2352
2353         queue_head_t    free_list[MAX_COLORS];
2354         int             inuse[MAX_COLORS];
2355
2356         for (color = 0; color < (signed) vm_colors; color++) {
2357                 queue_init(&free_list[color]);
2358         }
2359
2360         while (mem) {
2361                 assert(!mem->inactive);
2362                 assert(!mem->active);
2363                 assert(!mem->throttled);
2364                 assert(!mem->free);
2365                 assert(!mem->speculative);
2366                 assert(!VM_PAGE_WIRED(mem));
2367                 assert(mem->pageq.prev == NULL);
2368
2369                 nxt = (vm_page_t)(mem->pageq.next);
2370
2371                 if (prepare_object == TRUE)
2372                         vm_page_free_prepare_object(mem, TRUE);
2373
2374                 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2375                         assert(pmap_verify_free(mem->phys_page));
2376                 }
2377                 assert(mem->busy);
2378
2379                 if (!mem->fictitious) {
2380                         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2381                             vm_lopage_free_count < vm_lopage_free_limit &&
2382                             mem->phys_page < max_valid_low_ppnum) {
2383                                 mem->pageq.next = NULL;
2384                                 vm_page_release(mem);
2385                         } else {
2386
2387                         /*
2388                          * IMPORTANT: we can't set the page "free" here
2389                          * because that would make the page eligible for
2390                          * a physically-contiguous allocation (see
2391                          * vm_page_find_contiguous()) right away (we don't
2392                          * hold the vm_page_queue_free lock).  That would
2393                          * cause trouble because the page is not actually
2394                          * in the free queue yet...
2395                          */
2396                                 color = mem->phys_page & vm_color_mask;
2397                                 if (queue_empty(&free_list[color])) {
2398                                         inuse[color] = inuse_list_head;
2399                                         inuse_list_head = color;
2400                                 }
2401                                 queue_enter_first(&free_list[color],
2402                                                   mem,
2403                                                   vm_page_t,
2404                                                   pageq);
2405                                 pg_count++;
2406                         }
2407                 } else {
2408                         assert(mem->phys_page == vm_page_fictitious_addr ||
2409                                mem->phys_page == vm_page_guard_addr);
2410                         vm_page_release_fictitious(mem);
2411                 }
2412                 mem = nxt;
2413         }
2414         if (pg_count) {
2415                 unsigned int    avail_free_count;
2416                 unsigned int    need_wakeup = 0;
2417                 unsigned int    need_priv_wakeup = 0;
2418
2419                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2420
2421                 color = inuse_list_head;
2422
2423                 while( color != -1 ) {
2424                         vm_page_t first, last;
2425                         vm_page_t first_free;
2426
2427                         /*
2428                          * Now that we hold the vm_page_queue_free lock,
2429                          * it's safe to mark all pages in our local queue
2430                          * as "free"...
2431                          */
2432                         queue_iterate(&free_list[color],
2433                                       mem,
2434                                       vm_page_t,
2435                                       pageq) {
2436                                 assert(!mem->free);
2437                                 assert(mem->busy);
2438                                 mem->free = TRUE;
2439                         }
2440
2441                         /*
2442                          * ... and insert our local queue at the head of
2443                          * the global free queue.
2444                          */
2445                         first = (vm_page_t) queue_first(&free_list[color]);
2446                         last = (vm_page_t) queue_last(&free_list[color]);
2447                         first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2448                         if (queue_empty(&vm_page_queue_free[color])) {
2449                                 queue_last(&vm_page_queue_free[color]) =
2450                                         (queue_entry_t) last;
2451                         } else {
2452                                 queue_prev(&first_free->pageq) =
2453                                         (queue_entry_t) last;
2454                         }
2455                         queue_first(&vm_page_queue_free[color]) =
2456                                 (queue_entry_t) first;
2457                         queue_prev(&first->pageq) =
2458                                 (queue_entry_t) &vm_page_queue_free[color];
2459                         queue_next(&last->pageq) =
2460                                 (queue_entry_t) first_free;
2461
2462                         /* next color */
2463                         color = inuse[color];
2464                 }
2465
2466                 vm_page_free_count += pg_count;
2467                 avail_free_count = vm_page_free_count;
2468
2469                 if (vm_page_free_wanted_privileged > 0 &&
2470                     avail_free_count > 0) {
2471                         if (avail_free_count < vm_page_free_wanted_privileged) {
2472                                 need_priv_wakeup = avail_free_count;
2473                                 vm_page_free_wanted_privileged -=
2474                                         avail_free_count;
2475                                 avail_free_count = 0;
2476                         } else {
2477                                 need_priv_wakeup = vm_page_free_wanted_privileged;
2478                                 vm_page_free_wanted_privileged = 0;
2479                                 avail_free_count -=
2480                                         vm_page_free_wanted_privileged;
2481                         }
2482                 }
2483
2484                 if (vm_page_free_wanted > 0 &&
2485                     avail_free_count > vm_page_free_reserved) {
2486                         unsigned int  available_pages;
2487
2488                         available_pages = (avail_free_count -
2489                                            vm_page_free_reserved);
2490
2491                         if (available_pages >= vm_page_free_wanted) {
2492                                 need_wakeup = vm_page_free_wanted;
2493                                 vm_page_free_wanted = 0;
2494                         } else {
2495                                 need_wakeup = available_pages;
2496                                 vm_page_free_wanted -= available_pages;
2497                         }
2498                 }
2499                 lck_mtx_unlock(&vm_page_queue_free_lock);
2500
2501                 if (need_priv_wakeup != 0) {
2502                         /*
2503                          * There shouldn't be that many VM-privileged threads,
2504                          * so let's wake them all up, even if we don't quite
2505                          * have enough pages to satisfy them all.
2506                          */
2507                         thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2508                 }
2509                 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2510                         /*
2511                          * We don't expect to have any more waiters
2512                          * after this, so let's wake them all up at
2513                          * once.
2514                          */
2515                         thread_wakeup((event_t) &vm_page_free_count);
2516                 } else for (; need_wakeup != 0; need_wakeup--) {
2517                         /*
2518                          * Wake up one waiter per page we just released.
2519                          */
2520                         thread_wakeup_one((event_t) &vm_page_free_count);
2521                 }
2522 #if CONFIG_EMBEDDED
2523                 {
2524                 int percent_avail;
2525
2526                 /*
2527                  * Decide if we need to poke the memorystatus notification thread.
2528                  */
2529                 percent_avail =
2530                         (vm_page_active_count + vm_page_inactive_count +
2531                          vm_page_speculative_count + vm_page_free_count +
2532                          (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
2533                         atop_64(max_mem);
2534                 if (percent_avail >= (kern_memorystatus_level + 5)) {
2535                         kern_memorystatus_level = percent_avail;
2536                         thread_wakeup((event_t)&kern_memorystatus_wakeup);
2537                 }
2538                 }
2539 #endif
2540         }
2541 }
2542
2543
2544 /*
2545  *      vm_page_wire:
2546  *
2547  *      Mark this page as wired down by yet
2548  *      another map, removing it from paging queues
2549  *      as necessary.
2550  *
2551  *      The page's object and the page queues must be locked.
2552  */
2553 void
2554 vm_page_wire(
2555         register vm_page_t      mem)
2556 {
2557
2558 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2559
2560         VM_PAGE_CHECK(mem);
2561         if (mem->object) {
2562                 vm_object_lock_assert_exclusive(mem->object);
2563         } else {
2564                 /*
2565                  * In theory, the page should be in an object before it
2566                  * gets wired, since we need to hold the object lock
2567                  * to update some fields in the page structure.
2568                  * However, some code (i386 pmap, for example) might want
2569                  * to wire a page before it gets inserted into an object.
2570                  * That's somewhat OK, as long as nobody else can get to
2571                  * that page and update it at the same time.
2572                  */
2573         }
2574 #if DEBUG
2575         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2576 #endif
2577         if ( !VM_PAGE_WIRED(mem)) {
2578                 VM_PAGE_QUEUES_REMOVE(mem);
2579
2580                 if (mem->object) {
2581                         mem->object->wired_page_count++;
2582                         assert(mem->object->resident_page_count >=
2583                                mem->object->wired_page_count);
2584                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2585                                 assert(vm_page_purgeable_count > 0);
2586                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2587                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2588                         }
2589                         if (mem->object->all_reusable) {
2590                                 /*
2591                                  * Wired pages are not counted as "re-usable"
2592                                  * in "all_reusable" VM objects, so nothing
2593                                  * to do here.
2594                                  */
2595                         } else if (mem->reusable) {
2596                                 /*
2597                                  * This page is not "re-usable" when it's
2598                                  * wired, so adjust its state and the
2599                                  * accounting.
2600                                  */
2601                                 vm_object_reuse_pages(mem->object,
2602                                                       mem->offset,
2603                                                       mem->offset+PAGE_SIZE_64,
2604                                                       FALSE);
2605                         }
2606                 }
2607                 assert(!mem->reusable);
2608
2609                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2610                         vm_page_wire_count++;
2611                 if (mem->gobbled)
2612                         vm_page_gobble_count--;
2613                 mem->gobbled = FALSE;
2614                 if (mem->zero_fill == TRUE) {
2615                         mem->zero_fill = FALSE;
2616                         VM_ZF_COUNT_DECR();
2617                 }
2618 #if CONFIG_EMBEDDED
2619                 {
2620                 int     percent_avail;
2621
2622                 /*
2623                  * Decide if we need to poke the memorystatus notification thread.
2624                  */
2625                 percent_avail =
2626                         (vm_page_active_count + vm_page_inactive_count +
2627                          vm_page_speculative_count + vm_page_free_count +
2628                          (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2629                         atop_64(max_mem);
2630                 if (percent_avail <= (kern_memorystatus_level - 5)) {
2631                         kern_memorystatus_level = percent_avail;
2632                         thread_wakeup((event_t)&kern_memorystatus_wakeup);
2633                 }
2634                 }
2635 #endif
2636                 /*
2637                  * ENCRYPTED SWAP:
2638                  * The page could be encrypted, but
2639                  * We don't have to decrypt it here
2640                  * because we don't guarantee that the
2641                  * data is actually valid at this point.
2642                  * The page will get decrypted in
2643                  * vm_fault_wire() if needed.
2644                  */
2645         }
2646         assert(!mem->gobbled);
2647         mem->wire_count++;
2648         VM_PAGE_CHECK(mem);
2649 }
2650
2651 /*
2652  *      vm_page_gobble:
2653  *
2654  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2655  *
2656  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2657  */
2658 void
2659 vm_page_gobble(
2660         register vm_page_t      mem)
2661 {
2662         vm_page_lockspin_queues();
2663         VM_PAGE_CHECK(mem);
2664
2665         assert(!mem->gobbled);
2666         assert( !VM_PAGE_WIRED(mem));
2667
2668         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2669                 if (!mem->private && !mem->fictitious)
2670                         vm_page_wire_count++;
2671         }
2672         vm_page_gobble_count++;
2673         mem->gobbled = TRUE;
2674         vm_page_unlock_queues();
2675 }
2676
2677 /*
2678  *      vm_page_unwire:
2679  *
2680  *      Release one wiring of this page, potentially
2681  *      enabling it to be paged again.
2682  *
2683  *      The page's object and the page queues must be locked.
2684  */
2685 void
2686 vm_page_unwire(
2687         vm_page_t       mem,
2688         boolean_t       queueit)
2689 {
2690
2691 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2692
2693         VM_PAGE_CHECK(mem);
2694         assert(VM_PAGE_WIRED(mem));
2695         assert(mem->object != VM_OBJECT_NULL);
2696 #if DEBUG
2697         vm_object_lock_assert_exclusive(mem->object);
2698         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2699 #endif
2700         if (--mem->wire_count == 0) {
2701                 assert(!mem->private && !mem->fictitious);
2702                 vm_page_wire_count--;
2703                 assert(mem->object->wired_page_count > 0);
2704                 mem->object->wired_page_count--;
2705                 assert(mem->object->resident_page_count >=
2706                        mem->object->wired_page_count);
2707                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2708                         OSAddAtomic(+1, &vm_page_purgeable_count);
2709                         assert(vm_page_purgeable_wired_count > 0);
2710                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2711                 }
2712                 assert(!mem->laundry);
2713                 assert(mem->object != kernel_object);
2714                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2715
2716                 if (queueit == TRUE) {
2717                         if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2718                                 vm_page_deactivate(mem);
2719                         } else {
2720                                 vm_page_activate(mem);
2721                         }
2722                 }
2723 #if CONFIG_EMBEDDED
2724                 {
2725                 int     percent_avail;
2726
2727                 /*
2728                  * Decide if we need to poke the memorystatus notification thread.
2729                  */
2730                 percent_avail =
2731                         (vm_page_active_count + vm_page_inactive_count +
2732                          vm_page_speculative_count + vm_page_free_count +
2733                          (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2734                         atop_64(max_mem);
2735                 if (percent_avail >= (kern_memorystatus_level + 5)) {
2736                         kern_memorystatus_level = percent_avail;
2737                         thread_wakeup((event_t)&kern_memorystatus_wakeup);
2738                 }
2739                 }
2740 #endif
2741         }
2742         VM_PAGE_CHECK(mem);
2743 }
2744
2745 /*
2746  *      vm_page_deactivate:
2747  *
2748  *      Returns the given page to the inactive list,
2749  *      indicating that no physical maps have access
2750  *      to this page.  [Used by the physical mapping system.]
2751  *
2752  *      The page queues must be locked.
2753  */
2754 void
2755 vm_page_deactivate(
2756         vm_page_t       m)
2757 {
2758         vm_page_deactivate_internal(m, TRUE);
2759 }
2760
2761
2762 void
2763 vm_page_deactivate_internal(
2764         vm_page_t       m,
2765         boolean_t       clear_hw_reference)
2766 {
2767
2768         VM_PAGE_CHECK(m);
2769         assert(m->object != kernel_object);
2770         assert(m->phys_page != vm_page_guard_addr);
2771
2772 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
2773 #if DEBUG
2774         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2775 #endif
2776         /*
2777          *      This page is no longer very interesting.  If it was
2778          *      interesting (active or inactive/referenced), then we
2779          *      clear the reference bit and (re)enter it in the
2780          *      inactive queue.  Note wired pages should not have
2781          *      their reference bit cleared.
2782          */
2783
2784         if (m->absent && !m->unusual)
2785                 panic("vm_page_deactivate: %p absent", m);
2786
2787         if (m->gobbled) {               /* can this happen? */
2788                 assert( !VM_PAGE_WIRED(m));
2789
2790                 if (!m->private && !m->fictitious)
2791                         vm_page_wire_count--;
2792                 vm_page_gobble_count--;
2793                 m->gobbled = FALSE;
2794         }
2795         if (m->private || (VM_PAGE_WIRED(m)))
2796                 return;
2797
2798         if (!m->fictitious && !m->absent && clear_hw_reference == TRUE)
2799                 pmap_clear_reference(m->phys_page);
2800
2801         m->reference = FALSE;
2802         m->no_cache = FALSE;
2803
2804         if (!m->inactive) {
2805                 VM_PAGE_QUEUES_REMOVE(m);
2806
2807                 assert(!m->laundry);
2808                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2809
2810                 if (!IP_VALID(memory_manager_default) &&
2811                     m->dirty && m->object->internal &&
2812                     (m->object->purgable == VM_PURGABLE_DENY ||
2813                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2814                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2815                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2816                         m->throttled = TRUE;
2817                         vm_page_throttled_count++;
2818                 } else {
2819                         if (!m->fictitious && m->object->named && m->object->ref_count == 1) {
2820                                 vm_page_speculate(m, FALSE);
2821 #if DEVELOPMENT || DEBUG
2822                                 vm_page_speculative_recreated++;
2823 #endif
2824                                 return;
2825                         } else {
2826                                 if (m->zero_fill) {
2827                                         queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2828                                         vm_zf_queue_count++;
2829                                 } else {
2830                                         queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2831                                 }
2832                         }
2833                         m->inactive = TRUE;
2834                         if (!m->fictitious) {
2835                                 vm_page_inactive_count++;
2836                                 token_new_pagecount++;
2837                         }
2838                 }
2839         }
2840 }
2841
2842 /*
2843  *      vm_page_activate:
2844  *
2845  *      Put the specified page on the active list (if appropriate).
2846  *
2847  *      The page queues must be locked.
2848  */
2849
2850 void
2851 vm_page_activate(
2852         register vm_page_t      m)
2853 {
2854         VM_PAGE_CHECK(m);
2855 #ifdef  FIXME_4778297
2856         assert(m->object != kernel_object);
2857 #endif
2858         assert(m->phys_page != vm_page_guard_addr);
2859 #if DEBUG
2860         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2861 #endif
2862
2863         if (m->absent && !m->unusual)
2864                 panic("vm_page_activate: %p absent", m);
2865
2866         if (m->gobbled) {
2867                 assert( !VM_PAGE_WIRED(m));
2868                 if (!m->private && !m->fictitious)
2869                         vm_page_wire_count--;
2870                 vm_page_gobble_count--;
2871                 m->gobbled = FALSE;
2872         }
2873         if (m->private)
2874                 return;
2875
2876 #if DEBUG
2877         if (m->active)
2878                 panic("vm_page_activate: already active");
2879 #endif
2880
2881         if (m->speculative) {
2882                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2883                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2884         }
2885
2886         VM_PAGE_QUEUES_REMOVE(m);
2887
2888         if ( !VM_PAGE_WIRED(m)) {
2889                 assert(!m->laundry);
2890                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2891                 if (!IP_VALID(memory_manager_default) &&
2892                     !m->fictitious && m->dirty && m->object->internal &&
2893                     (m->object->purgable == VM_PURGABLE_DENY ||
2894                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2895                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2896                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2897                         m->throttled = TRUE;
2898                         vm_page_throttled_count++;
2899                 } else {
2900                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2901                         m->active = TRUE;
2902                         if (!m->fictitious)
2903                                 vm_page_active_count++;
2904                 }
2905                 m->reference = TRUE;
2906                 m->no_cache = FALSE;
2907         }
2908         VM_PAGE_CHECK(m);
2909 }
2910
2911
2912 /*
2913  *      vm_page_speculate:
2914  *
2915  *      Put the specified page on the speculative list (if appropriate).
2916  *
2917  *      The page queues must be locked.
2918  */
2919 void
2920 vm_page_speculate(
2921         vm_page_t       m,
2922         boolean_t       new)
2923 {
2924         struct vm_speculative_age_q     *aq;
2925
2926         VM_PAGE_CHECK(m);
2927         assert(m->object != kernel_object);
2928         assert(m->phys_page != vm_page_guard_addr);
2929 #if DEBUG
2930         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2931 #endif
2932
2933         if (m->absent && !m->unusual)
2934                 panic("vm_page_speculate: %p absent", m);
2935
2936         VM_PAGE_QUEUES_REMOVE(m);
2937
2938         if ( !VM_PAGE_WIRED(m)) {
2939                 mach_timespec_t         ts;
2940                 clock_sec_t sec;
2941                 clock_nsec_t nsec;
2942
2943                 clock_get_system_nanotime(&sec, &nsec);
2944                 ts.tv_sec = (unsigned int) sec;
2945                 ts.tv_nsec = nsec;
2946
2947                 if (vm_page_speculative_count == 0) {
2948
2949                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2950                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2951
2952                         aq = &vm_page_queue_speculative[speculative_age_index];
2953
2954                         /*
2955                          * set the timer to begin a new group
2956                          */
2957                         aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2958                         aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2959
2960                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2961                 } else {
2962                         aq = &vm_page_queue_speculative[speculative_age_index];
2963
2964                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2965
2966                                 speculative_age_index++;
2967
2968                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2969                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2970                                 if (speculative_age_index == speculative_steal_index) {
2971                                         speculative_steal_index = speculative_age_index + 1;
2972
2973                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2974                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2975                                 }
2976                                 aq = &vm_page_queue_speculative[speculative_age_index];
2977
2978                                 if (!queue_empty(&aq->age_q))
2979                                         vm_page_speculate_ageit(aq);
2980
2981                                 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2982                                 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2983
2984                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2985                         }
2986                 }
2987                 enqueue_tail(&aq->age_q, &m->pageq);
2988                 m->speculative = TRUE;
2989                 vm_page_speculative_count++;
2990
2991                 if (new == TRUE) {
2992                         m->object->pages_created++;
2993 #if DEVELOPMENT || DEBUG
2994                         vm_page_speculative_created++;
2995 #endif
2996                 }
2997         }
2998         VM_PAGE_CHECK(m);
2999 }
3000
3001
3002 /*
3003  * move pages from the specified aging bin to
3004  * the speculative bin that pageout_scan claims from
3005  *
3006  *      The page queues must be locked.
3007  */
3008 void
3009 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3010 {
3011         struct vm_speculative_age_q     *sq;
3012         vm_page_t       t;
3013
3014         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3015
3016         if (queue_empty(&sq->age_q)) {
3017                 sq->age_q.next = aq->age_q.next;
3018                 sq->age_q.prev = aq->age_q.prev;
3019
3020                 t = (vm_page_t)sq->age_q.next;
3021                 t->pageq.prev = &sq->age_q;
3022
3023                 t = (vm_page_t)sq->age_q.prev;
3024                 t->pageq.next = &sq->age_q;
3025         } else {
3026                 t = (vm_page_t)sq->age_q.prev;
3027                 t->pageq.next = aq->age_q.next;
3028
3029                 t = (vm_page_t)aq->age_q.next;
3030                 t->pageq.prev = sq->age_q.prev;
3031
3032                 t = (vm_page_t)aq->age_q.prev;
3033                 t->pageq.next = &sq->age_q;
3034
3035                 sq->age_q.prev = aq->age_q.prev;
3036         }
3037         queue_init(&aq->age_q);
3038 }
3039
3040
3041 void
3042 vm_page_lru(
3043         vm_page_t       m)
3044 {
3045         VM_PAGE_CHECK(m);
3046         assert(m->object != kernel_object);
3047         assert(m->phys_page != vm_page_guard_addr);
3048
3049 #if DEBUG
3050         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051 #endif
3052         if (m->active || m->reference)
3053                 return;
3054
3055         if (m->private || (VM_PAGE_WIRED(m)))
3056                 return;
3057
3058         m->no_cache = FALSE;
3059
3060         VM_PAGE_QUEUES_REMOVE(m);
3061
3062         assert(!m->laundry);
3063         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
3064
3065         queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
3066         m->inactive = TRUE;
3067
3068         vm_page_inactive_count++;
3069         token_new_pagecount++;
3070 }
3071
3072
3073 void
3074 vm_page_reactivate_all_throttled(void)
3075 {
3076         vm_page_t       first_throttled, last_throttled;
3077         vm_page_t       first_active;
3078         vm_page_t       m;
3079         int             extra_active_count;
3080
3081         extra_active_count = 0;
3082         vm_page_lock_queues();
3083         if (! queue_empty(&vm_page_queue_throttled)) {
3084                 /*
3085                  * Switch "throttled" pages to "active".
3086                  */
3087                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3088                         VM_PAGE_CHECK(m);
3089                         assert(m->throttled);
3090                         assert(!m->active);
3091                         assert(!m->inactive);
3092                         assert(!m->speculative);
3093                         assert(!VM_PAGE_WIRED(m));
3094                         if (!m->fictitious) {
3095                                 extra_active_count++;
3096                         }
3097                         m->throttled = FALSE;
3098                         m->active = TRUE;
3099                         VM_PAGE_CHECK(m);
3100                 }
3101
3102                 /*
3103                  * Transfer the entire throttled queue to a regular LRU page queues.
3104                  * We insert it at the head of the active queue, so that these pages
3105                  * get re-evaluated by the LRU algorithm first, since they've been
3106                  * completely out of it until now.
3107                  */
3108                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3109                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3110                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3111                 if (queue_empty(&vm_page_queue_active)) {
3112                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3113                 } else {
3114                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3115                 }
3116                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3117                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3118                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3119
3120 #if DEBUG
3121                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3122 #endif
3123                 queue_init(&vm_page_queue_throttled);
3124                 /*
3125                  * Adjust the global page counts.
3126                  */
3127                 vm_page_active_count += extra_active_count;
3128                 vm_page_throttled_count = 0;
3129         }
3130         assert(vm_page_throttled_count == 0);
3131         assert(queue_empty(&vm_page_queue_throttled));
3132         vm_page_unlock_queues();
3133 }
3134
3135
3136 /*
3137  * move pages from the indicated local queue to the global active queue
3138  * its ok to fail if we're below the hard limit and force == FALSE
3139  * the nolocks == TRUE case is to allow this function to be run on
3140  * the hibernate path
3141  */
3142
3143 void
3144 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3145 {
3146         struct vpl      *lq;
3147         vm_page_t       first_local, last_local;
3148         vm_page_t       first_active;
3149         vm_page_t       m;
3150         uint32_t        count = 0;
3151
3152         if (vm_page_local_q == NULL)
3153                 return;
3154
3155         lq = &vm_page_local_q[lid].vpl_un.vpl;
3156
3157         if (nolocks == FALSE) {
3158                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3159                         if ( !vm_page_trylockspin_queues())
3160                                 return;
3161                 } else
3162                         vm_page_lockspin_queues();
3163
3164                 VPL_LOCK(&lq->vpl_lock);
3165         }
3166         if (lq->vpl_count) {
3167                 /*
3168                  * Switch "local" pages to "active".
3169                  */
3170                 assert(!queue_empty(&lq->vpl_queue));
3171
3172                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3173                         VM_PAGE_CHECK(m);
3174                         assert(m->local);
3175                         assert(!m->active);
3176                         assert(!m->inactive);
3177                         assert(!m->speculative);
3178                         assert(!VM_PAGE_WIRED(m));
3179                         assert(!m->throttled);
3180                         assert(!m->fictitious);
3181
3182                         if (m->local_id != lid)
3183                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3184
3185                         m->local_id = 0;
3186                         m->local = FALSE;
3187                         m->active = TRUE;
3188                         VM_PAGE_CHECK(m);
3189
3190                         count++;
3191                 }
3192                 if (count != lq->vpl_count)
3193                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3194
3195                 /*
3196                  * Transfer the entire local queue to a regular LRU page queues.
3197                  */
3198                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3199                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3200                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3201
3202                 if (queue_empty(&vm_page_queue_active)) {
3203                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3204                 } else {
3205                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3206                 }
3207                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3208                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3209                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3210
3211                 queue_init(&lq->vpl_queue);
3212                 /*
3213                  * Adjust the global page counts.
3214                  */
3215                 vm_page_active_count += lq->vpl_count;
3216                 lq->vpl_count = 0;
3217         }
3218         assert(queue_empty(&lq->vpl_queue));
3219
3220         if (nolocks == FALSE) {
3221                 VPL_UNLOCK(&lq->vpl_lock);
3222                 vm_page_unlock_queues();
3223         }
3224 }
3225
3226 /*
3227  *      vm_page_part_zero_fill:
3228  *
3229  *      Zero-fill a part of the page.
3230  */
3231 void
3232 vm_page_part_zero_fill(
3233         vm_page_t       m,
3234         vm_offset_t     m_pa,
3235         vm_size_t       len)
3236 {
3237         vm_page_t       tmp;
3238
3239         VM_PAGE_CHECK(m);
3240 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3241         pmap_zero_part_page(m->phys_page, m_pa, len);
3242 #else
3243         while (1) {
3244                 tmp = vm_page_grab();
3245                 if (tmp == VM_PAGE_NULL) {
3246                         vm_page_wait(THREAD_UNINT);
3247                         continue;
3248                 }
3249                 break;
3250         }
3251         vm_page_zero_fill(tmp);
3252         if(m_pa != 0) {
3253                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3254         }
3255         if((m_pa + len) <  PAGE_SIZE) {
3256                 vm_page_part_copy(m, m_pa + len, tmp,
3257                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3258         }
3259         vm_page_copy(tmp,m);
3260         VM_PAGE_FREE(tmp);
3261 #endif
3262
3263 }
3264
3265 /*
3266  *      vm_page_zero_fill:
3267  *
3268  *      Zero-fill the specified page.
3269  */
3270 void
3271 vm_page_zero_fill(
3272         vm_page_t       m)
3273 {
3274         XPR(XPR_VM_PAGE,
3275                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3276                 m->object, m->offset, m, 0,0);
3277
3278         VM_PAGE_CHECK(m);
3279
3280 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3281         pmap_zero_page(m->phys_page);
3282 }
3283
3284 /*
3285  *      vm_page_part_copy:
3286  *
3287  *      copy part of one page to another
3288  */
3289
3290 void
3291 vm_page_part_copy(
3292         vm_page_t       src_m,
3293         vm_offset_t     src_pa,
3294         vm_page_t       dst_m,
3295         vm_offset_t     dst_pa,
3296         vm_size_t       len)
3297 {
3298         VM_PAGE_CHECK(src_m);
3299         VM_PAGE_CHECK(dst_m);
3300
3301         pmap_copy_part_page(src_m->phys_page, src_pa,
3302                         dst_m->phys_page, dst_pa, len);
3303 }
3304
3305 /*
3306  *      vm_page_copy:
3307  *
3308  *      Copy one page to another
3309  *
3310  * ENCRYPTED SWAP:
3311  * The source page should not be encrypted.  The caller should
3312  * make sure the page is decrypted first, if necessary.
3313  */
3314
3315 int vm_page_copy_cs_validations = 0;
3316 int vm_page_copy_cs_tainted = 0;
3317
3318 void
3319 vm_page_copy(
3320         vm_page_t       src_m,
3321         vm_page_t       dest_m)
3322 {
3323         XPR(XPR_VM_PAGE,
3324         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3325         src_m->object, src_m->offset,
3326         dest_m->object, dest_m->offset,
3327         0);
3328
3329         VM_PAGE_CHECK(src_m);
3330         VM_PAGE_CHECK(dest_m);
3331
3332         /*
3333          * ENCRYPTED SWAP:
3334          * The source page should not be encrypted at this point.
3335          * The destination page will therefore not contain encrypted
3336          * data after the copy.
3337          */
3338         if (src_m->encrypted) {
3339                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3340         }
3341         dest_m->encrypted = FALSE;
3342
3343         if (src_m->object != VM_OBJECT_NULL &&
3344             src_m->object->code_signed) {
3345                 /*
3346                  * We're copying a page from a code-signed object.
3347                  * Whoever ends up mapping the copy page might care about
3348                  * the original page's integrity, so let's validate the
3349                  * source page now.
3350                  */
3351                 vm_page_copy_cs_validations++;
3352                 vm_page_validate_cs(src_m);
3353         }
3354         /*
3355          * Propagate the cs_tainted bit to the copy page. Do not propagate
3356          * the cs_validated bit.
3357          */
3358         dest_m->cs_tainted = src_m->cs_tainted;
3359         if (dest_m->cs_tainted) {
3360                 vm_page_copy_cs_tainted++;
3361         }
3362
3363         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3364 }
3365
3366 #if MACH_ASSERT
3367 static void
3368 _vm_page_print(
3369         vm_page_t       p)
3370 {
3371         printf("vm_page %p: \n", p);
3372         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3373         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3374         printf("  next=%p\n", p->next);
3375         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3376         printf("  wire_count=%u\n", p->wire_count);
3377
3378         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3379                (p->local ? "" : "!"),
3380                (p->inactive ? "" : "!"),
3381                (p->active ? "" : "!"),
3382                (p->pageout_queue ? "" : "!"),
3383                (p->speculative ? "" : "!"),
3384                (p->laundry ? "" : "!"));
3385         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3386                (p->free ? "" : "!"),
3387                (p->reference ? "" : "!"),
3388                (p->gobbled ? "" : "!"),
3389                (p->private ? "" : "!"),
3390                (p->throttled ? "" : "!"));
3391         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3392                 (p->busy ? "" : "!"),
3393                 (p->wanted ? "" : "!"),
3394                 (p->tabled ? "" : "!"),
3395                 (p->fictitious ? "" : "!"),
3396                 (p->pmapped ? "" : "!"),
3397                 (p->wpmapped ? "" : "!"));
3398         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3399                (p->pageout ? "" : "!"),
3400                (p->absent ? "" : "!"),
3401                (p->error ? "" : "!"),
3402                (p->dirty ? "" : "!"),
3403                (p->cleaning ? "" : "!"),
3404                (p->precious ? "" : "!"),
3405                (p->clustered ? "" : "!"));
3406         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3407                (p->overwriting ? "" : "!"),
3408                (p->restart ? "" : "!"),
3409                (p->unusual ? "" : "!"),
3410                (p->encrypted ? "" : "!"),
3411                (p->encrypted_cleaning ? "" : "!"));
3412         printf("  %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3413                (p->list_req_pending ? "" : "!"),
3414                (p->dump_cleaning ? "" : "!"),
3415                (p->cs_validated ? "" : "!"),
3416                (p->cs_tainted ? "" : "!"),
3417                (p->no_cache ? "" : "!"));
3418         printf("  %szero_fill\n",
3419                (p->zero_fill ? "" : "!"));
3420
3421         printf("phys_page=0x%x\n", p->phys_page);
3422 }
3423
3424 /*
3425  *      Check that the list of pages is ordered by
3426  *      ascending physical address and has no holes.
3427  */
3428 static int
3429 vm_page_verify_contiguous(
3430         vm_page_t       pages,
3431         unsigned int    npages)
3432 {
3433         register vm_page_t      m;
3434         unsigned int            page_count;
3435         vm_offset_t             prev_addr;
3436
3437         prev_addr = pages->phys_page;
3438         page_count = 1;
3439         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3440                 if (m->phys_page != prev_addr + 1) {
3441                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3442                                m, (long)prev_addr, m->phys_page);
3443                         printf("pages %p page_count %d\n", pages, page_count);
3444                         panic("vm_page_verify_contiguous:  not contiguous!");
3445                 }
3446                 prev_addr = m->phys_page;
3447                 ++page_count;
3448         }
3449         if (page_count != npages) {
3450                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3451                        pages, page_count, npages);
3452                 panic("vm_page_verify_contiguous:  count error");
3453         }
3454         return 1;
3455 }
3456
3457
3458 /*
3459  *      Check the free lists for proper length etc.
3460  */
3461 static unsigned int
3462 vm_page_verify_free_list(
3463         queue_head_t    *vm_page_queue,
3464         unsigned int    color,
3465         vm_page_t       look_for_page,
3466         boolean_t       expect_page)
3467 {
3468         unsigned int    npages;
3469         vm_page_t       m;
3470         vm_page_t       prev_m;
3471         boolean_t       found_page;
3472
3473         found_page = FALSE;
3474         npages = 0;
3475         prev_m = (vm_page_t) vm_page_queue;
3476         queue_iterate(vm_page_queue,
3477                       m,
3478                       vm_page_t,
3479                       pageq) {
3480                 if (m == look_for_page) {
3481                         found_page = TRUE;
3482                 }
3483                 if ((vm_page_t) m->pageq.prev != prev_m)
3484                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3485                               color, npages, m, m->pageq.prev, prev_m);
3486                 if ( ! m->free )
3487                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3488                               color, npages, m);
3489                 if ( ! m->busy )
3490                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3491                               color, npages, m);
3492                 if ( color != (unsigned int) -1 && (m->phys_page & vm_color_mask) != color)
3493                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3494                               color, npages, m, m->phys_page & vm_color_mask, color);
3495                 ++npages;
3496                 prev_m = m;
3497         }
3498         if (look_for_page != VM_PAGE_NULL) {
3499                 unsigned int other_color;
3500
3501                 if (expect_page && !found_page) {
3502                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3503                                color, npages, look_for_page, look_for_page->phys_page);
3504                         _vm_page_print(look_for_page);
3505                         for (other_color = 0;
3506                              other_color < vm_colors;
3507                              other_color++) {
3508                                 if (other_color == color)
3509                                         continue;
3510                                 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3511                                                         other_color, look_for_page, FALSE);
3512                         }
3513                         if (color != (unsigned int) -1) {
3514                                 vm_page_verify_free_list(&vm_lopage_queue_free,
3515                                                          (unsigned int) -1, look_for_page, FALSE);
3516                         }
3517
3518                         panic("vm_page_verify_free_list(color=%u)\n", color);
3519                 }
3520                 if (!expect_page && found_page) {
3521                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3522                                color, npages, look_for_page, look_for_page->phys_page);
3523                 }
3524         }
3525         return npages;
3526 }
3527
3528 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3529 static void
3530 vm_page_verify_free_lists( void )
3531 {
3532         unsigned int    color, npages, nlopages;
3533
3534         if (! vm_page_verify_free_lists_enabled)
3535                 return;
3536
3537         npages = 0;
3538
3539         lck_mtx_lock(&vm_page_queue_free_lock);
3540
3541         for( color = 0; color < vm_colors; color++ ) {
3542                 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3543                                                 color, VM_PAGE_NULL, FALSE);
3544         }
3545
3546         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3547                                             (unsigned int) -1,
3548                                             VM_PAGE_NULL, FALSE);
3549         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3550                 panic("vm_page_verify_free_lists:  "
3551                       "npages %u free_count %d nlopages %u lo_free_count %u",
3552                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3553         lck_mtx_unlock(&vm_page_queue_free_lock);
3554 }
3555
3556 void
3557 vm_page_queues_assert(
3558         vm_page_t       mem,
3559         int             val)
3560 {
3561         if (mem->free + mem->active + mem->inactive + mem->speculative +
3562             mem->throttled + mem->pageout_queue > (val)) {
3563                 _vm_page_print(mem);
3564                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3565         }
3566         if (VM_PAGE_WIRED(mem)) {
3567                 assert(!mem->active);
3568                 assert(!mem->inactive);
3569                 assert(!mem->speculative);
3570                 assert(!mem->throttled);
3571         }
3572 }
3573 #endif  /* MACH_ASSERT */
3574
3575
3576 /*
3577  *      CONTIGUOUS PAGE ALLOCATION
3578  *
3579  *      Find a region large enough to contain at least n pages
3580  *      of contiguous physical memory.
3581  *
3582  *      This is done by traversing the vm_page_t array in a linear fashion
3583  *      we assume that the vm_page_t array has the avaiable physical pages in an
3584  *      ordered, ascending list... this is currently true of all our implementations
3585  *      and must remain so... there can be 'holes' in the array...  we also can
3586  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3587  *      which use to happen via 'vm_page_convert'... that function was no longer
3588  *      being called and was removed...
3589  *
3590  *      The basic flow consists of stabilizing some of the interesting state of
3591  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3592  *      sweep at the beginning of the array looking for pages that meet our criterea
3593  *      for a 'stealable' page... currently we are pretty conservative... if the page
3594  *      meets this criterea and is physically contiguous to the previous page in the 'run'
3595  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
3596  *      and start to develop a new run... if at this point we've already considered
3597  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3598  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
3599  *      to other threads trying to acquire free pages (or move pages from q to q),
3600  *      and then continue from the spot we left off... we only make 1 pass through the
3601  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
3602  *      which steals the pages from the queues they're currently on... pages on the free
3603  *      queue can be stolen directly... pages that are on any of the other queues
3604  *      must be removed from the object they are tabled on... this requires taking the
3605  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3606  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
3607  *      dump the pages we've currently stolen back to the free list, and pick up our
3608  *      scan from the point where we aborted the 'current' run.
3609  *
3610  *
3611  *      Requirements:
3612  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
3613  *
3614  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3615  *
3616  * Algorithm:
3617  */
3618
3619 #define MAX_CONSIDERED_BEFORE_YIELD     1000
3620
3621
3622 #define RESET_STATE_OF_RUN()    \
3623         MACRO_BEGIN             \
3624         prevcontaddr = -2;      \
3625         start_pnum = -1;        \
3626         free_considered = 0;    \
3627         substitute_needed = 0;  \
3628         npages = 0;             \
3629         MACRO_END
3630
3631 /*
3632  * Can we steal in-use (i.e. not free) pages when searching for
3633  * physically-contiguous pages ?
3634  */
3635 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3636
3637 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
3638 #if DEBUG
3639 int vm_page_find_contig_debug = 0;
3640 #endif
3641
3642 static vm_page_t
3643 vm_page_find_contiguous(
3644         unsigned int    contig_pages,
3645         ppnum_t         max_pnum,
3646         ppnum_t     pnum_mask,
3647         boolean_t       wire,
3648         int             flags)
3649 {
3650         vm_page_t       m = NULL;
3651         ppnum_t         prevcontaddr;
3652         ppnum_t         start_pnum;
3653         unsigned int    npages, considered, scanned;
3654         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
3655         unsigned int    idx_last_contig_page_found = 0;
3656         int             free_considered, free_available;
3657         int             substitute_needed;
3658         boolean_t       wrapped;
3659 #if DEBUG
3660         clock_sec_t     tv_start_sec, tv_end_sec;
3661         clock_usec_t    tv_start_usec, tv_end_usec;
3662 #endif
3663 #if MACH_ASSERT
3664         int             yielded = 0;
3665         int             dumped_run = 0;
3666         int             stolen_pages = 0;
3667 #endif
3668
3669         if (contig_pages == 0)
3670                 return VM_PAGE_NULL;
3671
3672 #if MACH_ASSERT
3673         vm_page_verify_free_lists();
3674 #endif
3675 #if DEBUG
3676         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3677 #endif
3678         vm_page_lock_queues();
3679         lck_mtx_lock(&vm_page_queue_free_lock);
3680
3681         RESET_STATE_OF_RUN();
3682
3683         scanned = 0;
3684         considered = 0;
3685         free_available = vm_page_free_count - vm_page_free_reserved;
3686
3687         wrapped = FALSE;
3688
3689         if(flags & KMA_LOMEM)
3690                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3691         else
3692                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
3693
3694         orig_last_idx = idx_last_contig_page_found;
3695         last_idx = orig_last_idx;
3696
3697         for (page_idx = last_idx, start_idx = last_idx;
3698              npages < contig_pages && page_idx < vm_pages_count;
3699              page_idx++) {
3700 retry:
3701                 if (wrapped &&
3702                     npages == 0 &&
3703                     page_idx >= orig_last_idx) {
3704                         /*
3705                          * We're back where we started and we haven't
3706                          * found any suitable contiguous range.  Let's
3707                          * give up.
3708                          */
3709                         break;
3710                 }
3711                 scanned++;
3712                 m = &vm_pages[page_idx];
3713
3714                 assert(!m->fictitious);
3715                 assert(!m->private);
3716
3717                 if (max_pnum && m->phys_page > max_pnum) {
3718                         /* no more low pages... */
3719                         break;
3720                 }
3721                 if (!npages && ((m->phys_page & pnum_mask) != 0)) {
3722                         /*
3723                          * not aligned
3724                          */
3725                         RESET_STATE_OF_RUN();
3726
3727                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3728                            m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3729                            m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3730                            m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3731                            m->pageout) {
3732                         /*
3733                          * page is in a transient state
3734                          * or a state we don't want to deal
3735                          * with, so don't consider it which
3736                          * means starting a new run
3737                          */
3738                         RESET_STATE_OF_RUN();
3739
3740                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3741                         /*
3742                          * page needs to be on one of our queues
3743                          * in order for it to be stable behind the
3744                          * locks we hold at this point...
3745                          * if not, don't consider it which
3746                          * means starting a new run
3747                          */
3748                         RESET_STATE_OF_RUN();
3749
3750                 } else if (!m->free && (!m->tabled || m->busy)) {
3751                         /*
3752                          * pages on the free list are always 'busy'
3753                          * so we couldn't test for 'busy' in the check
3754                          * for the transient states... pages that are
3755                          * 'free' are never 'tabled', so we also couldn't
3756                          * test for 'tabled'.  So we check here to make
3757                          * sure that a non-free page is not busy and is
3758                          * tabled on an object...
3759                          * if not, don't consider it which
3760                          * means starting a new run
3761                          */
3762                         RESET_STATE_OF_RUN();
3763
3764                 } else {
3765                         if (m->phys_page != prevcontaddr + 1) {
3766                                 if ((m->phys_page & pnum_mask) != 0) {
3767                                         RESET_STATE_OF_RUN();
3768                                         goto did_consider;
3769                                 } else {
3770                                         npages = 1;
3771                                         start_idx = page_idx;
3772                                         start_pnum = m->phys_page;
3773                                 }
3774                         } else {
3775                                 npages++;
3776                         }
3777                         prevcontaddr = m->phys_page;
3778
3779                         VM_PAGE_CHECK(m);
3780                         if (m->free) {
3781                                 free_considered++;
3782                         } else {
3783                                 /*
3784                                  * This page is not free.
3785                                  * If we can't steal used pages,
3786                                  * we have to give up this run
3787                                  * and keep looking.
3788                                  * Otherwise, we might need to
3789                                  * move the contents of this page
3790                                  * into a substitute page.
3791                                  */
3792 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3793                                 if (m->pmapped || m->dirty) {
3794                                         substitute_needed++;
3795                                 }
3796 #else
3797                                 RESET_STATE_OF_RUN();
3798 #endif
3799                         }
3800
3801                         if ((free_considered + substitute_needed) > free_available) {
3802                                 /*
3803                                  * if we let this run continue
3804                                  * we will end up dropping the vm_page_free_count
3805                                  * below the reserve limit... we need to abort
3806                                  * this run, but we can at least re-consider this
3807                                  * page... thus the jump back to 'retry'
3808                                  */
3809                                 RESET_STATE_OF_RUN();
3810
3811                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3812                                         considered++;
3813                                         goto retry;
3814                                 }
3815                                 /*
3816                                  * free_available == 0
3817                                  * so can't consider any free pages... if
3818                                  * we went to retry in this case, we'd
3819                                  * get stuck looking at the same page
3820                                  * w/o making any forward progress
3821                                  * we also want to take this path if we've already
3822                                  * reached our limit that controls the lock latency
3823                                  */
3824                         }
3825                 }
3826 did_consider:
3827                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3828
3829                         lck_mtx_unlock(&vm_page_queue_free_lock);
3830                         vm_page_unlock_queues();
3831
3832                         mutex_pause(0);
3833
3834                         vm_page_lock_queues();
3835                         lck_mtx_lock(&vm_page_queue_free_lock);
3836
3837                         RESET_STATE_OF_RUN();
3838                         /*
3839                          * reset our free page limit since we
3840                          * dropped the lock protecting the vm_page_free_queue
3841                          */
3842                         free_available = vm_page_free_count - vm_page_free_reserved;
3843                         considered = 0;
3844 #if MACH_ASSERT
3845                         yielded++;
3846 #endif
3847                         goto retry;
3848                 }
3849                 considered++;
3850         }
3851         m = VM_PAGE_NULL;
3852
3853         if (npages != contig_pages) {
3854                 if (!wrapped) {
3855                         /*
3856                          * We didn't find a contiguous range but we didn't
3857                          * start from the very first page.
3858                          * Start again from the very first page.
3859                          */
3860                         RESET_STATE_OF_RUN();
3861                         if( flags & KMA_LOMEM)
3862                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
3863                         else
3864                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3865                         last_idx = 0;
3866                         page_idx = last_idx;
3867                         wrapped = TRUE;
3868                         goto retry;
3869                 }
3870                 lck_mtx_unlock(&vm_page_queue_free_lock);
3871         } else {
3872                 vm_page_t       m1;
3873                 vm_page_t       m2;
3874                 unsigned int    cur_idx;
3875                 unsigned int    tmp_start_idx;
3876                 vm_object_t     locked_object = VM_OBJECT_NULL;
3877                 boolean_t       abort_run = FALSE;
3878
3879                 assert(page_idx - start_idx == contig_pages);
3880
3881                 tmp_start_idx = start_idx;
3882
3883                 /*
3884                  * first pass through to pull the free pages
3885                  * off of the free queue so that in case we
3886                  * need substitute pages, we won't grab any
3887                  * of the free pages in the run... we'll clear
3888                  * the 'free' bit in the 2nd pass, and even in
3889                  * an abort_run case, we'll collect all of the
3890                  * free pages in this run and return them to the free list
3891                  */
3892                 while (start_idx < page_idx) {
3893
3894                         m1 = &vm_pages[start_idx++];
3895
3896 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3897                         assert(m1->free);
3898 #endif
3899
3900                         if (m1->free) {
3901                                 unsigned int color;
3902
3903                                 color = m1->phys_page & vm_color_mask;
3904 #if MACH_ASSERT
3905                                 vm_page_verify_free_list(&vm_page_queue_free[color],
3906                                                          color, m1, TRUE);
3907 #endif
3908                                 queue_remove(&vm_page_queue_free[color],
3909                                              m1,
3910                                              vm_page_t,
3911                                              pageq);
3912                                 m1->pageq.next = NULL;
3913                                 m1->pageq.prev = NULL;
3914 #if MACH_ASSERT
3915                                 vm_page_verify_free_list(&vm_page_queue_free[color],
3916                                                          color, VM_PAGE_NULL, FALSE);
3917 #endif
3918                                 /*
3919                                  * Clear the "free" bit so that this page
3920                                  * does not get considered for another
3921                                  * concurrent physically-contiguous allocation.
3922                                  */
3923                                 m1->free = FALSE;
3924                                 assert(m1->busy);
3925
3926                                 vm_page_free_count--;
3927                         }
3928                 }
3929                 /*
3930                  * adjust global freelist counts
3931                  */
3932                 if (vm_page_free_count < vm_page_free_count_minimum)
3933                         vm_page_free_count_minimum = vm_page_free_count;
3934
3935                 if( flags & KMA_LOMEM)
3936                         vm_page_lomem_find_contiguous_last_idx = page_idx;
3937                 else
3938                         vm_page_find_contiguous_last_idx = page_idx;
3939
3940                 /*
3941                  * we can drop the free queue lock at this point since
3942                  * we've pulled any 'free' candidates off of the list
3943                  * we need it dropped so that we can do a vm_page_grab
3944                  * when substituing for pmapped/dirty pages
3945                  */
3946                 lck_mtx_unlock(&vm_page_queue_free_lock);
3947
3948                 start_idx = tmp_start_idx;
3949                 cur_idx = page_idx - 1;
3950
3951                 while (start_idx++ < page_idx) {
3952                         /*
3953                          * must go through the list from back to front
3954                          * so that the page list is created in the
3955                          * correct order - low -> high phys addresses
3956                          */
3957                         m1 = &vm_pages[cur_idx--];
3958
3959                         assert(!m1->free);
3960                         if (m1->object == VM_OBJECT_NULL) {
3961                                 /*
3962                                  * page has already been removed from
3963                                  * the free list in the 1st pass
3964                                  */
3965                                 assert(m1->offset == (vm_object_offset_t) -1);
3966                                 assert(m1->busy);
3967                                 assert(!m1->wanted);
3968                                 assert(!m1->laundry);
3969                         } else {
3970                                 vm_object_t object;
3971
3972                                 if (abort_run == TRUE)
3973                                         continue;
3974
3975                                 object = m1->object;
3976
3977                                 if (object != locked_object) {
3978                                         if (locked_object) {
3979                                                 vm_object_unlock(locked_object);
3980                                                 locked_object = VM_OBJECT_NULL;
3981                                         }
3982                                         if (vm_object_lock_try(object))
3983                                                 locked_object = object;
3984                                 }
3985                                 if (locked_object == VM_OBJECT_NULL ||
3986                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
3987                                      m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3988                                      m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3989                                      m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3990
3991                                         if (locked_object) {
3992                                                 vm_object_unlock(locked_object);
3993                                                 locked_object = VM_OBJECT_NULL;
3994                                         }
3995                                         tmp_start_idx = cur_idx;
3996                                         abort_run = TRUE;
3997                                         continue;
3998                                 }
3999                                 if (m1->pmapped || m1->dirty) {
4000                                         int refmod;
4001                                         vm_object_offset_t offset;
4002
4003                                         m2 = vm_page_grab();
4004
4005                                         if (m2 == VM_PAGE_NULL) {
4006                                                 if (locked_object) {
4007                                                         vm_object_unlock(locked_object);
4008                                                         locked_object = VM_OBJECT_NULL;
4009                                                 }
4010                                                 tmp_start_idx = cur_idx;
4011                                                 abort_run = TRUE;
4012                                                 continue;
4013                                         }
4014                                         if (m1->pmapped)
4015                                                 refmod = pmap_disconnect(m1->phys_page);
4016                                         else
4017                                                 refmod = 0;
4018                                         vm_page_copy(m1, m2);
4019
4020                                         m2->reference = m1->reference;
4021                                         m2->dirty     = m1->dirty;
4022
4023                                         if (refmod & VM_MEM_REFERENCED)
4024                                                 m2->reference = TRUE;
4025                                         if (refmod & VM_MEM_MODIFIED)
4026                                                 m2->dirty = TRUE;
4027                                         offset = m1->offset;
4028
4029                                         /*
4030                                          * completely cleans up the state
4031                                          * of the page so that it is ready
4032                                          * to be put onto the free list, or
4033                                          * for this purpose it looks like it
4034                                          * just came off of the free list
4035                                          */
4036                                         vm_page_free_prepare(m1);
4037
4038                                         /*
4039                                          * make sure we clear the ref/mod state
4040                                          * from the pmap layer... else we risk
4041                                          * inheriting state from the last time
4042                                          * this page was used...
4043                                          */
4044                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4045                                         /*
4046                                          * now put the substitute page on the object
4047                                          */
4048                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
4049
4050                                         if (m2->reference)
4051                                                 vm_page_activate(m2);
4052                                         else
4053                                                 vm_page_deactivate(m2);
4054
4055                                         PAGE_WAKEUP_DONE(m2);
4056
4057                                 } else {
4058                                         /*
4059                                          * completely cleans up the state
4060                                          * of the page so that it is ready
4061                                          * to be put onto the free list, or
4062                                          * for this purpose it looks like it
4063                                          * just came off of the free list
4064                                          */
4065                                         vm_page_free_prepare(m1);
4066                                 }
4067 #if MACH_ASSERT
4068                                 stolen_pages++;
4069 #endif
4070                         }
4071                         m1->pageq.next = (queue_entry_t) m;
4072                         m1->pageq.prev = NULL;
4073                         m = m1;
4074                 }
4075                 if (locked_object) {
4076                         vm_object_unlock(locked_object);
4077                         locked_object = VM_OBJECT_NULL;
4078                 }
4079
4080                 if (abort_run == TRUE) {
4081                         if (m != VM_PAGE_NULL) {
4082                                 vm_page_free_list(m, FALSE);
4083                         }
4084 #if MACH_ASSERT
4085                         dumped_run++;
4086 #endif
4087                         /*
4088                          * want the index of the last
4089                          * page in this run that was
4090                          * successfully 'stolen', so back
4091                          * it up 1 for the auto-decrement on use
4092                          * and 1 more to bump back over this page
4093                          */
4094                         page_idx = tmp_start_idx + 2;
4095                         if (page_idx >= vm_pages_count) {
4096                                 if (wrapped)
4097                                         goto done_scanning;
4098                                 page_idx = last_idx = 0;
4099                                 wrapped = TRUE;
4100                         }
4101                         abort_run = FALSE;
4102
4103                         /*
4104                          * We didn't find a contiguous range but we didn't
4105                          * start from the very first page.
4106                          * Start again from the very first page.
4107                          */
4108                         RESET_STATE_OF_RUN();
4109
4110                         if( flags & KMA_LOMEM)
4111                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4112                         else
4113                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4114
4115                         last_idx = page_idx;
4116
4117                         lck_mtx_lock(&vm_page_queue_free_lock);
4118                         /*
4119                         * reset our free page limit since we
4120                         * dropped the lock protecting the vm_page_free_queue
4121                         */
4122                         free_available = vm_page_free_count - vm_page_free_reserved;
4123                         goto retry;
4124                 }
4125
4126                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4127
4128                         if (wire == TRUE)
4129                                 m1->wire_count++;
4130                         else
4131                                 m1->gobbled = TRUE;
4132                 }
4133                 if (wire == FALSE)
4134                         vm_page_gobble_count += npages;
4135
4136                 /*
4137                  * gobbled pages are also counted as wired pages
4138                  */
4139                 vm_page_wire_count += npages;
4140
4141                 assert(vm_page_verify_contiguous(m, npages));
4142         }
4143 done_scanning:
4144         vm_page_unlock_queues();
4145
4146 #if DEBUG
4147         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4148
4149         tv_end_sec -= tv_start_sec;
4150         if (tv_end_usec < tv_start_usec) {
4151                 tv_end_sec--;
4152                 tv_end_usec += 1000000;
4153         }
4154         tv_end_usec -= tv_start_usec;
4155         if (tv_end_usec >= 1000000) {
4156                 tv_end_sec++;
4157                 tv_end_sec -= 1000000;
4158         }
4159         if (vm_page_find_contig_debug) {
4160                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d... scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
4161                __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4162                (long)tv_end_sec, tv_end_usec, orig_last_idx,
4163                scanned, yielded, dumped_run, stolen_pages);
4164         }
4165
4166 #endif
4167 #if MACH_ASSERT
4168         vm_page_verify_free_lists();
4169 #endif
4170         return m;
4171 }
4172
4173 /*
4174  *      Allocate a list of contiguous, wired pages.
4175  */
4176 kern_return_t
4177 cpm_allocate(
4178         vm_size_t       size,
4179         vm_page_t       *list,
4180         ppnum_t         max_pnum,
4181         ppnum_t         pnum_mask,
4182         boolean_t       wire,
4183         int             flags)
4184 {
4185         vm_page_t               pages;
4186         unsigned int            npages;
4187
4188         if (size % page_size != 0)
4189                 return KERN_INVALID_ARGUMENT;
4190
4191         npages = (unsigned int) (size / PAGE_SIZE);
4192         if (npages != size / PAGE_SIZE) {
4193                 /* 32-bit overflow */
4194                 return KERN_INVALID_ARGUMENT;
4195         }
4196
4197         /*
4198          *      Obtain a pointer to a subset of the free
4199          *      list large enough to satisfy the request;
4200          *      the region will be physically contiguous.
4201          */
4202         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4203
4204         if (pages == VM_PAGE_NULL)
4205                 return KERN_NO_SPACE;
4206         /*
4207          * determine need for wakeups
4208          */
4209         if ((vm_page_free_count < vm_page_free_min) ||
4210             ((vm_page_free_count < vm_page_free_target) &&
4211              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4212                 thread_wakeup((event_t) &vm_page_free_wanted);
4213
4214 #if CONFIG_EMBEDDED
4215         {
4216         int                     percent_avail;
4217
4218         /*
4219          * Decide if we need to poke the memorystatus notification thread.
4220          */
4221         percent_avail =
4222                 (vm_page_active_count + vm_page_inactive_count +
4223                  vm_page_speculative_count + vm_page_free_count +
4224                  (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
4225                 atop_64(max_mem);
4226         if (percent_avail <= (kern_memorystatus_level - 5)) {
4227                 kern_memorystatus_level = percent_avail;
4228                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4229         }
4230         }
4231 #endif
4232         /*
4233          *      The CPM pages should now be available and
4234          *      ordered by ascending physical address.
4235          */
4236         assert(vm_page_verify_contiguous(pages, npages));
4237
4238         *list = pages;
4239         return KERN_SUCCESS;
4240 }
4241
4242
4243 kern_return_t
4244 vm_page_alloc_list(
4245         int     page_count,
4246         int     flags,
4247         vm_page_t *list)
4248 {
4249         vm_page_t       lo_page_list = VM_PAGE_NULL;
4250         vm_page_t       mem;
4251         int             i;
4252
4253         if ( !(flags & KMA_LOMEM))
4254                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4255
4256         for (i = 0; i < page_count; i++) {
4257
4258                 mem = vm_page_grablo();
4259
4260                 if (mem == VM_PAGE_NULL) {
4261                         if (lo_page_list)
4262                                 vm_page_free_list(lo_page_list, FALSE);
4263
4264                         *list = VM_PAGE_NULL;
4265
4266                         return (KERN_RESOURCE_SHORTAGE);
4267                 }
4268                 mem->pageq.next = (queue_entry_t) lo_page_list;
4269                 lo_page_list = mem;
4270         }
4271         *list = lo_page_list;
4272
4273         return (KERN_SUCCESS);
4274 }
4275
4276 void
4277 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4278 {
4279         page->offset = offset;
4280 }
4281
4282 vm_page_t
4283 vm_page_get_next(vm_page_t page)
4284 {
4285         return ((vm_page_t) page->pageq.next);
4286 }
4287
4288 vm_object_offset_t
4289 vm_page_get_offset(vm_page_t page)
4290 {
4291         return (page->offset);
4292 }
4293
4294 ppnum_t
4295 vm_page_get_phys_page(vm_page_t page)
4296 {
4297         return (page->phys_page);
4298 }
4299
4300
4301 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4302
4303 #if HIBERNATION
4304
4305 static vm_page_t hibernate_gobble_queue;
4306
4307 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4308
4309 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4310 static int  hibernate_flush_dirty_pages(void);
4311 static int  hibernate_flush_queue(queue_head_t *, int);
4312 static void hibernate_dirty_page(vm_page_t);
4313
4314 void hibernate_flush_wait(void);
4315 void hibernate_mark_in_progress(void);
4316 void hibernate_clear_in_progress(void);
4317
4318
4319 struct hibernate_statistics {
4320         int hibernate_considered;
4321         int hibernate_reentered_on_q;
4322         int hibernate_found_dirty;
4323         int hibernate_skipped_cleaning;
4324         int hibernate_skipped_transient;
4325         int hibernate_skipped_precious;
4326         int hibernate_queue_nolock;
4327         int hibernate_queue_paused;
4328         int hibernate_throttled;
4329         int hibernate_throttle_timeout;
4330         int hibernate_drained;
4331         int hibernate_drain_timeout;
4332         int cd_lock_failed;
4333         int cd_found_precious;
4334         int cd_found_wired;
4335         int cd_found_busy;
4336         int cd_found_unusual;
4337         int cd_found_cleaning;
4338         int cd_found_laundry;
4339         int cd_found_dirty;
4340         int cd_local_free;
4341         int cd_total_free;
4342         int cd_vm_page_wire_count;
4343         int cd_pages;
4344         int cd_discarded;
4345         int cd_count_wire;
4346 } hibernate_stats;
4347
4348
4349
4350 static int
4351 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4352 {
4353         wait_result_t   wait_result;
4354
4355         vm_page_lock_queues();
4356
4357         while (q->pgo_laundry) {
4358
4359                 q->pgo_draining = TRUE;
4360
4361                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4362
4363                 vm_page_unlock_queues();
4364
4365                 wait_result = thread_block(THREAD_CONTINUE_NULL);
4366
4367                 if (wait_result == THREAD_TIMED_OUT) {
4368                         hibernate_stats.hibernate_drain_timeout++;
4369                         return (1);
4370                 }
4371                 vm_page_lock_queues();
4372
4373                 hibernate_stats.hibernate_drained++;
4374         }
4375         vm_page_unlock_queues();
4376
4377         return (0);
4378 }
4379
4380 static void
4381 hibernate_dirty_page(vm_page_t m)
4382 {
4383         vm_object_t     object = m->object;
4384         struct          vm_pageout_queue *q;
4385
4386 #if DEBUG
4387         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4388 #endif
4389         vm_object_lock_assert_exclusive(object);
4390
4391         /*
4392          * protect the object from collapse -
4393          * locking in the object's paging_offset.
4394          */
4395         vm_object_paging_begin(object);
4396
4397         m->list_req_pending = TRUE;
4398         m->cleaning = TRUE;
4399         m->busy = TRUE;
4400
4401         if (object->internal == TRUE)
4402                 q = &vm_pageout_queue_internal;
4403         else
4404                 q = &vm_pageout_queue_external;
4405
4406         /*
4407          * pgo_laundry count is tied to the laundry bit
4408          */
4409         m->laundry = TRUE;
4410         q->pgo_laundry++;
4411
4412         m->pageout_queue = TRUE;
4413         queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
4414
4415         if (q->pgo_idle == TRUE) {
4416                 q->pgo_idle = FALSE;
4417                 thread_wakeup((event_t) &q->pgo_pending);
4418         }
4419 }
4420
4421 static int
4422 hibernate_flush_queue(queue_head_t *q, int qcount)
4423 {
4424         vm_page_t       m;
4425         vm_object_t     l_object = NULL;
4426         vm_object_t     m_object = NULL;
4427         int             refmod_state = 0;
4428         int             try_failed_count = 0;
4429         int             retval = 0;
4430         int             current_run = 0;
4431         struct  vm_pageout_queue *iq;
4432         struct  vm_pageout_queue *eq;
4433         struct  vm_pageout_queue *tq;
4434
4435
4436         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4437
4438         iq = &vm_pageout_queue_internal;
4439         eq = &vm_pageout_queue_external;
4440
4441         vm_page_lock_queues();
4442
4443         while (qcount && !queue_empty(q)) {
4444
4445                 if (current_run++ == 1000) {
4446                         if (hibernate_should_abort()) {
4447                                 retval = 1;
4448                                 break;
4449                         }
4450                         current_run = 0;
4451                 }
4452
4453                 m = (vm_page_t) queue_first(q);
4454                 m_object = m->object;
4455
4456                 /*
4457                  * check to see if we currently are working
4458                  * with the same object... if so, we've
4459                  * already got the lock
4460                  */
4461                 if (m_object != l_object) {
4462                         /*
4463                          * the object associated with candidate page is
4464                          * different from the one we were just working
4465                          * with... dump the lock if we still own it
4466                          */
4467                         if (l_object != NULL) {
4468                                 vm_object_unlock(l_object);
4469                                 l_object = NULL;
4470                         }
4471                         /*
4472                          * Try to lock object; since we've alread got the
4473                          * page queues lock, we can only 'try' for this one.
4474                          * if the 'try' fails, we need to do a mutex_pause
4475                          * to allow the owner of the object lock a chance to
4476                          * run...
4477                          */
4478                         if ( !vm_object_lock_try_scan(m_object)) {
4479
4480                                 if (try_failed_count > 20) {
4481                                         hibernate_stats.hibernate_queue_nolock++;
4482
4483                                         goto reenter_pg_on_q;
4484                                 }
4485                                 vm_pageout_scan_wants_object = m_object;
4486
4487                                 vm_page_unlock_queues();
4488                                 mutex_pause(try_failed_count++);
4489                                 vm_page_lock_queues();
4490
4491                                 hibernate_stats.hibernate_queue_paused++;
4492                                 continue;
4493                         } else {
4494                                 l_object = m_object;
4495                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4496                         }
4497                 }
4498                 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
4499                         /*
4500                          * page is not to be cleaned
4501                          * put it back on the head of its queue
4502                          */
4503                         if (m->cleaning)
4504                                 hibernate_stats.hibernate_skipped_cleaning++;
4505                         else
4506                                 hibernate_stats.hibernate_skipped_transient++;
4507
4508                         goto reenter_pg_on_q;
4509                 }
4510                 if ( !m_object->pager_initialized && m_object->pager_created)
4511                         goto reenter_pg_on_q;
4512
4513                 if (m_object->copy == VM_OBJECT_NULL) {
4514                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4515                                 /*
4516                                  * let the normal hibernate image path
4517                                  * deal with these
4518                                  */
4519                                 goto reenter_pg_on_q;
4520                         }
4521                 }
4522                 if ( !m->dirty && m->pmapped) {
4523                         refmod_state = pmap_get_refmod(m->phys_page);
4524
4525                         if ((refmod_state & VM_MEM_MODIFIED))
4526                                 m->dirty = TRUE;
4527                 } else
4528                         refmod_state = 0;
4529
4530                 if ( !m->dirty) {
4531                         /*
4532                          * page is not to be cleaned
4533                          * put it back on the head of its queue
4534                          */
4535                         if (m->precious)
4536                                 hibernate_stats.hibernate_skipped_precious++;
4537
4538                         goto reenter_pg_on_q;
4539                 }
4540                 tq = NULL;
4541
4542                 if (m_object->internal) {
4543                         if (VM_PAGE_Q_THROTTLED(iq))
4544                                 tq = iq;
4545                 } else if (VM_PAGE_Q_THROTTLED(eq))
4546                         tq = eq;
4547
4548                 if (tq != NULL) {
4549                         wait_result_t   wait_result;
4550                         int             wait_count = 5;
4551
4552                         if (l_object != NULL) {
4553                                 vm_object_unlock(l_object);
4554                                 l_object = NULL;
4555                         }
4556                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4557
4558                         tq->pgo_throttled = TRUE;
4559
4560                         while (retval == 0) {
4561
4562                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4563
4564                         vm_page_unlock_queues();
4565
4566                         wait_result = thread_block(THREAD_CONTINUE_NULL);
4567
4568                                 vm_page_lock_queues();
4569
4570                                 if (hibernate_should_abort())
4571                                         retval = 1;
4572
4573                                 if (wait_result != THREAD_TIMED_OUT)
4574                                         break;
4575
4576                                 if (--wait_count == 0) {
4577                                 hibernate_stats.hibernate_throttle_timeout++;
4578                                 retval = 1;
4579                         }
4580                         }
4581                         if (retval)
4582                                 break;
4583
4584                         hibernate_stats.hibernate_throttled++;
4585
4586                         continue;
4587                 }
4588                 VM_PAGE_QUEUES_REMOVE(m);
4589
4590                 hibernate_dirty_page(m);
4591
4592                 hibernate_stats.hibernate_found_dirty++;
4593
4594                 goto next_pg;
4595
4596 reenter_pg_on_q:
4597                 queue_remove(q, m, vm_page_t, pageq);
4598                 queue_enter(q, m, vm_page_t, pageq);
4599
4600                 hibernate_stats.hibernate_reentered_on_q++;
4601 next_pg:
4602                 hibernate_stats.hibernate_considered++;
4603
4604                 qcount--;
4605                 try_failed_count = 0;
4606         }
4607         if (l_object != NULL) {
4608                 vm_object_unlock(l_object);
4609                 l_object = NULL;
4610         }
4611     vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4612
4613         vm_page_unlock_queues();
4614
4615         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4616
4617         return (retval);
4618 }
4619
4620
4621 static int
4622 hibernate_flush_dirty_pages()
4623 {
4624         struct vm_speculative_age_q     *aq;
4625         uint32_t        i;
4626
4627         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4628
4629         if (vm_page_local_q) {
4630                 for (i = 0; i < vm_page_local_q_count; i++)
4631                         vm_page_reactivate_local(i, TRUE, FALSE);
4632         }
4633
4634         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4635                 int             qcount;
4636                 vm_page_t       m;
4637
4638                 aq = &vm_page_queue_speculative[i];
4639
4640                 if (queue_empty(&aq->age_q))
4641                         continue;
4642                 qcount = 0;
4643
4644                 vm_page_lockspin_queues();
4645
4646                 queue_iterate(&aq->age_q,
4647                               m,
4648                               vm_page_t,
4649                               pageq)
4650                 {
4651                         qcount++;
4652                 }
4653                 vm_page_unlock_queues();
4654
4655                 if (qcount) {
4656                         if (hibernate_flush_queue(&aq->age_q, qcount))
4657                                 return (1);
4658                 }
4659         }
4660         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4661                 return (1);
4662         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
4663                 return (1);
4664         if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
4665                 return (1);
4666
4667         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4668                 return (1);
4669         return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4670 }
4671
4672
4673 extern void IOSleep(unsigned int);
4674 extern int sync_internal(void);
4675
4676 int
4677 hibernate_flush_memory()
4678 {
4679         int     retval;
4680
4681         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4682
4683         IOSleep(2 * 1000);
4684
4685         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4686
4687         if ((retval = hibernate_flush_dirty_pages()) == 0) {
4688                 if (consider_buffer_cache_collect != NULL) {
4689
4690                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4691
4692                         sync_internal();
4693                         (void)(*consider_buffer_cache_collect)(1);
4694                         consider_zone_gc(1);
4695
4696                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4697                 }
4698         }
4699         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4700
4701     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4702                 hibernate_stats.hibernate_considered,
4703                 hibernate_stats.hibernate_reentered_on_q,
4704                 hibernate_stats.hibernate_found_dirty);
4705     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4706                 hibernate_stats.hibernate_skipped_cleaning,
4707                 hibernate_stats.hibernate_skipped_transient,
4708                 hibernate_stats.hibernate_skipped_precious,
4709                 hibernate_stats.hibernate_queue_nolock);
4710     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4711                 hibernate_stats.hibernate_queue_paused,
4712                 hibernate_stats.hibernate_throttled,
4713                 hibernate_stats.hibernate_throttle_timeout,
4714                 hibernate_stats.hibernate_drained,
4715                 hibernate_stats.hibernate_drain_timeout);
4716
4717         return (retval);
4718 }
4719
4720 static void
4721 hibernate_page_list_zero(hibernate_page_list_t *list)
4722 {
4723     uint32_t             bank;
4724     hibernate_bitmap_t * bitmap;
4725
4726     bitmap = &list->bank_bitmap[0];
4727     for (bank = 0; bank < list->bank_count; bank++)
4728     {
4729         uint32_t last_bit;
4730
4731         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4732         // set out-of-bound bits at end of bitmap.
4733         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4734         if (last_bit)
4735             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4736
4737         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4738     }
4739 }
4740
4741 void
4742 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4743 {
4744     uint32_t i;
4745     vm_page_t m;
4746     uint64_t start, end, timeout, nsec;
4747     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4748     clock_get_uptime(&start);
4749
4750     for (i = 0; i < gobble_count; i++)
4751     {
4752         while (VM_PAGE_NULL == (m = vm_page_grab()))
4753         {
4754             clock_get_uptime(&end);
4755             if (end >= timeout)
4756                 break;
4757             VM_PAGE_WAIT();
4758         }
4759         if (!m)
4760             break;
4761         m->busy = FALSE;
4762         vm_page_gobble(m);
4763
4764         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4765         hibernate_gobble_queue = m;
4766     }
4767
4768     clock_get_uptime(&end);
4769     absolutetime_to_nanoseconds(end - start, &nsec);
4770     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4771 }
4772
4773 void
4774 hibernate_free_gobble_pages(void)
4775 {
4776     vm_page_t m, next;
4777     uint32_t  count = 0;
4778
4779     m = (vm_page_t) hibernate_gobble_queue;
4780     while(m)
4781     {
4782         next = (vm_page_t) m->pageq.next;
4783         vm_page_free(m);
4784         count++;
4785         m = next;
4786     }
4787     hibernate_gobble_queue = VM_PAGE_NULL;
4788
4789     if (count)
4790         HIBLOG("Freed %d pages\n", count);
4791 }
4792
4793 static boolean_t
4794 hibernate_consider_discard(vm_page_t m)
4795 {
4796     vm_object_t object = NULL;
4797     int                  refmod_state;
4798     boolean_t            discard = FALSE;
4799
4800     do
4801     {
4802         if (m->private)
4803             panic("hibernate_consider_discard: private");
4804
4805         if (!vm_object_lock_try(m->object)) {
4806             hibernate_stats.cd_lock_failed++;
4807             break;
4808         }
4809         object = m->object;
4810
4811         if (VM_PAGE_WIRED(m)) {
4812             hibernate_stats.cd_found_wired++;
4813             break;
4814         }
4815         if (m->precious) {
4816             hibernate_stats.cd_found_precious++;
4817             break;
4818         }
4819         if (m->busy || !object->alive) {
4820            /*
4821             *   Somebody is playing with this page.
4822             */
4823             hibernate_stats.cd_found_busy++;
4824             break;
4825         }
4826         if (m->absent || m->unusual || m->error) {
4827            /*
4828             * If it's unusual in anyway, ignore it
4829             */
4830             hibernate_stats.cd_found_unusual++;
4831             break;
4832         }
4833         if (m->cleaning) {
4834             hibernate_stats.cd_found_cleaning++;
4835             break;
4836         }
4837         if (m->laundry || m->list_req_pending) {
4838             hibernate_stats.cd_found_laundry++;
4839             break;
4840         }
4841         if (!m->dirty)
4842         {
4843             refmod_state = pmap_get_refmod(m->phys_page);
4844
4845             if (refmod_state & VM_MEM_REFERENCED)
4846                 m->reference = TRUE;
4847             if (refmod_state & VM_MEM_MODIFIED)
4848                 m->dirty = TRUE;
4849         }
4850
4851         /*
4852          * If it's clean or purgeable we can discard the page on wakeup.
4853          */
4854         discard = (!m->dirty)
4855                     || (VM_PURGABLE_VOLATILE == object->purgable)
4856                     || (VM_PURGABLE_EMPTY    == object->purgable);
4857
4858         if (discard == FALSE)
4859             hibernate_stats.cd_found_dirty++;
4860     }
4861     while (FALSE);
4862
4863     if (object)
4864         vm_object_unlock(object);
4865
4866     return (discard);
4867 }
4868
4869
4870 static void
4871 hibernate_discard_page(vm_page_t m)
4872 {
4873     if (m->absent || m->unusual || m->error)
4874        /*
4875         * If it's unusual in anyway, ignore
4876         */
4877         return;
4878
4879     if (m->pmapped == TRUE)
4880     {
4881         __unused int refmod_state = pmap_disconnect(m->phys_page);
4882     }
4883
4884     if (m->laundry)
4885         panic("hibernate_discard_page(%p) laundry", m);
4886     if (m->private)
4887         panic("hibernate_discard_page(%p) private", m);
4888     if (m->fictitious)
4889         panic("hibernate_discard_page(%p) fictitious", m);
4890
4891     if (VM_PURGABLE_VOLATILE == m->object->purgable)
4892     {
4893         /* object should be on a queue */
4894         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4895         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4896         assert(old_queue);
4897         /* No need to lock page queue for token delete, hibernate_vm_unlock()
4898            makes sure these locks are uncontended before sleep */
4899         vm_purgeable_token_delete_first(old_queue);
4900         m->object->purgable = VM_PURGABLE_EMPTY;
4901     }
4902
4903     vm_page_free(m);
4904 }
4905
4906 /*
4907  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4908  pages known to VM to not need saving are subtracted.
4909  Wired pages to be saved are present in page_list_wired, pageable in page_list.
4910 */
4911
4912 void
4913 hibernate_page_list_setall(hibernate_page_list_t * page_list,
4914                            hibernate_page_list_t * page_list_wired,
4915                            uint32_t * pagesOut)
4916 {
4917     uint64_t start, end, nsec;
4918     vm_page_t m;
4919     uint32_t pages = page_list->page_count;
4920     uint32_t count_zf = 0, count_throttled = 0;
4921     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
4922     uint32_t count_wire = pages;
4923     uint32_t count_discard_active    = 0;
4924     uint32_t count_discard_inactive  = 0;
4925     uint32_t count_discard_purgeable = 0;
4926     uint32_t count_discard_speculative = 0;
4927     uint32_t i;
4928     uint32_t             bank;
4929     hibernate_bitmap_t * bitmap;
4930     hibernate_bitmap_t * bitmap_wired;
4931
4932
4933     HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
4934
4935     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
4936
4937     clock_get_uptime(&start);
4938
4939     hibernate_page_list_zero(page_list);
4940     hibernate_page_list_zero(page_list_wired);
4941
4942     hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
4943     hibernate_stats.cd_pages = pages;
4944
4945     if (vm_page_local_q) {
4946             for (i = 0; i < vm_page_local_q_count; i++)
4947                     vm_page_reactivate_local(i, TRUE, TRUE);
4948     }
4949
4950     m = (vm_page_t) hibernate_gobble_queue;
4951     while(m)
4952     {
4953         pages--;
4954         count_wire--;
4955         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
4956         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4957         m = (vm_page_t) m->pageq.next;
4958     }
4959 #ifndef PPC
4960     for( i = 0; i < real_ncpus; i++ )
4961     {
4962         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
4963         {
4964             for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
4965             {
4966                 pages--;
4967                 count_wire--;
4968                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
4969                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4970
4971                 hibernate_stats.cd_local_free++;
4972                 hibernate_stats.cd_total_free++;
4973             }
4974         }
4975     }
4976 #endif
4977     for( i = 0; i < vm_colors; i++ )
4978     {
4979         queue_iterate(&vm_page_queue_free[i],
4980                       m,
4981                       vm_page_t,
4982                       pageq)
4983         {
4984             pages--;
4985             count_wire--;
4986             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
4987             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4988
4989             hibernate_stats.cd_total_free++;
4990         }
4991     }
4992
4993     queue_iterate(&vm_lopage_queue_free,
4994                   m,
4995                   vm_page_t,
4996                   pageq)
4997     {
4998         pages--;
4999         count_wire--;
5000         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5001         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5002
5003         hibernate_stats.cd_total_free++;
5004     }
5005
5006     queue_iterate( &vm_page_queue_throttled,
5007                     m,
5008                     vm_page_t,
5009                     pageq )
5010     {
5011         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5012          && hibernate_consider_discard(m))
5013         {
5014             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5015             count_discard_inactive++;
5016         }
5017         else
5018             count_throttled++;
5019         count_wire--;
5020         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5021     }
5022
5023     queue_iterate( &vm_page_queue_zf,
5024                     m,
5025                     vm_page_t,
5026                    pageq )
5027     {
5028         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5029          && hibernate_consider_discard(m))
5030         {
5031             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5032             if (m->dirty)
5033                 count_discard_purgeable++;
5034             else
5035                 count_discard_inactive++;
5036         }
5037         else
5038             count_zf++;
5039         count_wire--;
5040         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5041     }
5042
5043     queue_iterate( &vm_page_queue_inactive,
5044                     m,
5045                     vm_page_t,
5046                     pageq )
5047     {
5048         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5049          && hibernate_consider_discard(m))
5050         {
5051             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5052             if (m->dirty)
5053                 count_discard_purgeable++;
5054             else
5055                 count_discard_inactive++;
5056         }
5057         else
5058             count_inactive++;
5059         count_wire--;
5060         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5061     }
5062
5063     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5064     {
5065        queue_iterate(&vm_page_queue_speculative[i].age_q,
5066                      m,
5067                      vm_page_t,
5068                      pageq)
5069        {
5070            if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5071             && hibernate_consider_discard(m))
5072            {
5073                hibernate_page_bitset(page_list, TRUE, m->phys_page);
5074                count_discard_speculative++;
5075            }
5076            else
5077                count_speculative++;
5078            count_wire--;
5079            hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5080        }
5081     }
5082
5083     queue_iterate( &vm_page_queue_active,
5084                     m,
5085                     vm_page_t,
5086                     pageq )
5087     {
5088         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5089          && hibernate_consider_discard(m))
5090         {
5091             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5092             if (m->dirty)
5093                 count_discard_purgeable++;
5094             else
5095                 count_discard_active++;
5096         }
5097         else
5098             count_active++;
5099         count_wire--;
5100         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5101     }
5102
5103     // pull wired from hibernate_bitmap
5104
5105     bitmap = &page_list->bank_bitmap[0];
5106     bitmap_wired = &page_list_wired->bank_bitmap[0];
5107     for (bank = 0; bank < page_list->bank_count; bank++)
5108     {
5109         for (i = 0; i < bitmap->bitmapwords; i++)
5110             bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5111         bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5112         bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5113     }
5114
5115     // machine dependent adjustments
5116     hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5117
5118     hibernate_stats.cd_count_wire = count_wire;
5119     hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
5120
5121     clock_get_uptime(&end);
5122     absolutetime_to_nanoseconds(end - start, &nsec);
5123     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5124
5125     HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5126                 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
5127                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5128
5129     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
5130
5131     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5132 }
5133
5134 void
5135 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5136 {
5137     uint64_t  start, end, nsec;
5138     vm_page_t m;
5139     vm_page_t next;
5140     uint32_t  i;
5141     uint32_t  count_discard_active    = 0;
5142     uint32_t  count_discard_inactive  = 0;
5143     uint32_t  count_discard_purgeable = 0;
5144     uint32_t  count_discard_speculative = 0;
5145
5146     clock_get_uptime(&start);
5147
5148     m = (vm_page_t) queue_first(&vm_page_queue_zf);
5149     while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
5150     {
5151         next = (vm_page_t) m->pageq.next;
5152         if (hibernate_page_bittst(page_list, m->phys_page))
5153         {
5154             if (m->dirty)
5155                 count_discard_purgeable++;
5156             else
5157                 count_discard_inactive++;
5158             hibernate_discard_page(m);
5159         }
5160         m = next;
5161     }
5162
5163     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5164     {
5165        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5166        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5167        {
5168            next = (vm_page_t) m->pageq.next;
5169            if (hibernate_page_bittst(page_list, m->phys_page))
5170            {
5171                count_discard_speculative++;
5172                hibernate_discard_page(m);
5173            }
5174            m = next;
5175        }
5176     }
5177
5178     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5179     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5180     {
5181         next = (vm_page_t) m->pageq.next;
5182         if (hibernate_page_bittst(page_list, m->phys_page))
5183         {
5184             if (m->dirty)
5185                 count_discard_purgeable++;
5186             else
5187                 count_discard_inactive++;
5188             hibernate_discard_page(m);
5189         }
5190         m = next;
5191     }
5192
5193     m = (vm_page_t) queue_first(&vm_page_queue_active);
5194     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5195     {
5196         next = (vm_page_t) m->pageq.next;
5197         if (hibernate_page_bittst(page_list, m->phys_page))
5198         {
5199             if (m->dirty)
5200                 count_discard_purgeable++;
5201             else
5202                 count_discard_active++;
5203             hibernate_discard_page(m);
5204         }
5205         m = next;
5206     }
5207
5208     clock_get_uptime(&end);
5209     absolutetime_to_nanoseconds(end - start, &nsec);
5210     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5211                 nsec / 1000000ULL,
5212                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5213 }
5214
5215 #endif /* HIBERNATION */
5216
5217 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5218
5219 #include <mach_vm_debug.h>
5220 #if     MACH_VM_DEBUG
5221
5222 #include <mach_debug/hash_info.h>
5223 #include <vm/vm_debug.h>
5224
5225 /*
5226  *      Routine:        vm_page_info
5227  *      Purpose:
5228  *              Return information about the global VP table.
5229  *              Fills the buffer with as much information as possible
5230  *              and returns the desired size of the buffer.
5231  *      Conditions:
5232  *              Nothing locked.  The caller should provide
5233  *              possibly-pageable memory.
5234  */
5235
5236 unsigned int
5237 vm_page_info(
5238         hash_info_bucket_t *info,
5239         unsigned int count)
5240 {
5241         unsigned int i;
5242         lck_spin_t      *bucket_lock;
5243
5244         if (vm_page_bucket_count < count)
5245                 count = vm_page_bucket_count;
5246
5247         for (i = 0; i < count; i++) {
5248                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5249                 unsigned int bucket_count = 0;
5250                 vm_page_t m;
5251
5252                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5253                 lck_spin_lock(bucket_lock);
5254
5255                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5256                         bucket_count++;
5257
5258                 lck_spin_unlock(bucket_lock);
5259
5260                 /* don't touch pageable memory while holding locks */
5261                 info[i].hib_count = bucket_count;
5262         }
5263
5264         return vm_page_bucket_count;
5265 }
5266 #endif  /* MACH_VM_DEBUG */
5267
5268 #include <mach_kdb.h>
5269 #if     MACH_KDB
5270
5271 #include <ddb/db_output.h>
5272 #include <vm/vm_print.h>
5273 #define printf  kdbprintf
5274
5275 /*
5276  *      Routine:        vm_page_print [exported]
5277  */
5278 void
5279 vm_page_print(
5280         db_addr_t       db_addr)
5281 {
5282         vm_page_t       p;
5283
5284         p = (vm_page_t) (long) db_addr;
5285
5286         iprintf("page 0x%x\n", p);
5287
5288         db_indent += 2;
5289
5290         iprintf("object=0x%x", p->object);
5291         printf(", offset=0x%x", p->offset);
5292         printf(", wire_count=%d", p->wire_count);
5293
5294         iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5295                 (p->local ? "" : "!"),
5296                 (p->inactive ? "" : "!"),
5297                 (p->active ? "" : "!"),
5298                 (p->throttled ? "" : "!"),
5299                 (p->gobbled ? "" : "!"),
5300                 (p->laundry ? "" : "!"),
5301                 (p->free ? "" : "!"),
5302                 (p->reference ? "" : "!"),
5303                 (p->encrypted ? "" : "!"));
5304         iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5305                 (p->busy ? "" : "!"),
5306                 (p->wanted ? "" : "!"),
5307                 (p->tabled ? "" : "!"),
5308                 (p->fictitious ? "" : "!"),
5309                 (p->private ? "" : "!"),
5310                 (p->precious ? "" : "!"));
5311         iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5312                 (p->absent ? "" : "!"),
5313                 (p->error ? "" : "!"),
5314                 (p->dirty ? "" : "!"),
5315                 (p->cleaning ? "" : "!"),
5316                 (p->pageout ? "" : "!"),
5317                 (p->clustered ? "" : "!"));
5318         iprintf("%soverwriting, %srestart, %sunusual\n",
5319                 (p->overwriting ? "" : "!"),
5320                 (p->restart ? "" : "!"),
5321                 (p->unusual ? "" : "!"));
5322
5323         iprintf("phys_page=0x%x", p->phys_page);
5324
5325         db_indent -= 2;
5326 }
5327 #endif  /* MACH_KDB */