osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/kalloc.h>
  77 #include <kern/zalloc.h>
  78 #include <kern/xpr.h>
  79 #include <vm/pmap.h>
  80 #include <vm/vm_init.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_page.h>
  83 #include <vm/vm_pageout.h>
  84 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  85 #include <kern/misc_protos.h>
  86 #include <zone_debug.h>
  87 #include <vm/cpm.h>
  88 #include <pexpert/pexpert.h>
  89
  90 #include <vm/vm_protos.h>
  91 #include <vm/memory_object.h>
  92 #include <vm/vm_purgeable_internal.h>
  93
  94 #include <IOKit/IOHibernatePrivate.h>
  95
  96 #include <sys/kdebug.h>
  97
  98 boolean_t       hibernate_cleaning_in_progress = FALSE;
  99 boolean_t       vm_page_free_verify = TRUE;
 100
 101 uint32_t        vm_lopage_free_count = 0;
 102 uint32_t        vm_lopage_free_limit = 0;
 103 uint32_t        vm_lopage_lowater    = 0;
 104 boolean_t       vm_lopage_refill = FALSE;
 105 boolean_t       vm_lopage_needed = FALSE;
 106
 107 lck_mtx_ext_t   vm_page_queue_lock_ext;
 108 lck_mtx_ext_t   vm_page_queue_free_lock_ext;
 109 lck_mtx_ext_t   vm_purgeable_queue_lock_ext;
 110
 111 int             speculative_age_index = 0;
 112 int             speculative_steal_index = 0;
 113 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 114
 115
 116 __private_extern__ void         vm_page_init_lck_grp(void);
 117
 118 static void             vm_page_free_prepare(vm_page_t  page);
 119 static vm_page_t        vm_page_grab_fictitious_common(ppnum_t phys_addr);
 120
 121
 122
 123
 124 /*
 125  *      Associated with page of user-allocatable memory is a
 126  *      page structure.
 127  */
 128
 129 /*
 130  *      These variables record the values returned by vm_page_bootstrap,
 131  *      for debugging purposes.  The implementation of pmap_steal_memory
 132  *      and pmap_startup here also uses them internally.
 133  */
 134
 135 vm_offset_t virtual_space_start;
 136 vm_offset_t virtual_space_end;
 137 uint32_t        vm_page_pages;
 138
 139 /*
 140  *      The vm_page_lookup() routine, which provides for fast
 141  *      (virtual memory object, offset) to page lookup, employs
 142  *      the following hash table.  The vm_page_{insert,remove}
 143  *      routines install and remove associations in the table.
 144  *      [This table is often called the virtual-to-physical,
 145  *      or VP, table.]
 146  */
 147 typedef struct {
 148         vm_page_t       pages;
 149 #if     MACH_PAGE_HASH_STATS
 150         int             cur_count;              /* current count */
 151         int             hi_count;               /* high water mark */
 152 #endif /* MACH_PAGE_HASH_STATS */
 153 } vm_page_bucket_t;
 154
 155
 156 #define BUCKETS_PER_LOCK        16
 157
 158 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 159 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 160 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 161 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 162 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 163 unsigned int    vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 164
 165 lck_spin_t      *vm_page_bucket_locks;
 166
 167
 168 #if     MACH_PAGE_HASH_STATS
 169 /* This routine is only for debug.  It is intended to be called by
 170  * hand by a developer using a kernel debugger.  This routine prints
 171  * out vm_page_hash table statistics to the kernel debug console.
 172  */
 173 void
 174 hash_debug(void)
 175 {
 176         int     i;
 177         int     numbuckets = 0;
 178         int     highsum = 0;
 179         int     maxdepth = 0;
 180
 181         for (i = 0; i < vm_page_bucket_count; i++) {
 182                 if (vm_page_buckets[i].hi_count) {
 183                         numbuckets++;
 184                         highsum += vm_page_buckets[i].hi_count;
 185                         if (vm_page_buckets[i].hi_count > maxdepth)
 186                                 maxdepth = vm_page_buckets[i].hi_count;
 187                 }
 188         }
 189         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 190         printf("Number used buckets:     %d = %d%%\n",
 191                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 192         printf("Number unused buckets:   %d = %d%%\n",
 193                 vm_page_bucket_count - numbuckets,
 194                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 195         printf("Sum of bucket max depth: %d\n", highsum);
 196         printf("Average bucket depth:    %d.%2d\n",
 197                 highsum/vm_page_bucket_count,
 198                 highsum%vm_page_bucket_count);
 199         printf("Maximum bucket depth:    %d\n", maxdepth);
 200 }
 201 #endif /* MACH_PAGE_HASH_STATS */
 202
 203 /*
 204  *      The virtual page size is currently implemented as a runtime
 205  *      variable, but is constant once initialized using vm_set_page_size.
 206  *      This initialization must be done in the machine-dependent
 207  *      bootstrap sequence, before calling other machine-independent
 208  *      initializations.
 209  *
 210  *      All references to the virtual page size outside this
 211  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 212  *      constants.
 213  */
 214 vm_size_t       page_size  = PAGE_SIZE;
 215 vm_size_t       page_mask  = PAGE_MASK;
 216 int             page_shift = PAGE_SHIFT;
 217
 218 /*
 219  *      Resident page structures are initialized from
 220  *      a template (see vm_page_alloc).
 221  *
 222  *      When adding a new field to the virtual memory
 223  *      object structure, be sure to add initialization
 224  *      (see vm_page_bootstrap).
 225  */
 226 struct vm_page  vm_page_template;
 227
 228 vm_page_t       vm_pages = VM_PAGE_NULL;
 229 unsigned int    vm_pages_count = 0;
 230 ppnum_t         vm_page_lowest = 0;
 231
 232 /*
 233  *      Resident pages that represent real memory
 234  *      are allocated from a set of free lists,
 235  *      one per color.
 236  */
 237 unsigned int    vm_colors;
 238 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 239 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 240 queue_head_t    vm_page_queue_free[MAX_COLORS];
 241 unsigned int    vm_page_free_wanted;
 242 unsigned int    vm_page_free_wanted_privileged;
 243 unsigned int    vm_page_free_count;
 244 unsigned int    vm_page_fictitious_count;
 245
 246 unsigned int    vm_page_free_count_minimum;     /* debugging */
 247
 248 /*
 249  *      Occasionally, the virtual memory system uses
 250  *      resident page structures that do not refer to
 251  *      real pages, for example to leave a page with
 252  *      important state information in the VP table.
 253  *
 254  *      These page structures are allocated the way
 255  *      most other kernel structures are.
 256  */
 257 zone_t  vm_page_zone;
 258 vm_locks_array_t vm_page_locks;
 259 decl_lck_mtx_data(,vm_page_alloc_lock)
 260 lck_mtx_ext_t vm_page_alloc_lock_ext;
 261
 262 unsigned int io_throttle_zero_fill;
 263
 264 unsigned int    vm_page_local_q_count = 0;
 265 unsigned int    vm_page_local_q_soft_limit = 250;
 266 unsigned int    vm_page_local_q_hard_limit = 500;
 267 struct vplq     *vm_page_local_q = NULL;
 268
 269 /* N.B. Guard and fictitious pages must not
 270  * be assigned a zero phys_page value.
 271  */
 272 /*
 273  *      Fictitious pages don't have a physical address,
 274  *      but we must initialize phys_page to something.
 275  *      For debugging, this should be a strange value
 276  *      that the pmap module can recognize in assertions.
 277  */
 278 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
 279
 280 /*
 281  *      Guard pages are not accessible so they don't
 282  *      need a physical address, but we need to enter
 283  *      one in the pmap.
 284  *      Let's make it recognizable and make sure that
 285  *      we don't use a real physical page with that
 286  *      physical address.
 287  */
 288 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
 289
 290 /*
 291  *      Resident page structures are also chained on
 292  *      queues that are used by the page replacement
 293  *      system (pageout daemon).  These queues are
 294  *      defined here, but are shared by the pageout
 295  *      module.  The inactive queue is broken into
 296  *      inactive and zf for convenience as the
 297  *      pageout daemon often assignes a higher
 298  *      affinity to zf pages
 299  */
 300 queue_head_t    vm_page_queue_active;
 301 queue_head_t    vm_page_queue_inactive;
 302 queue_head_t    vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
 303 queue_head_t    vm_page_queue_throttled;
 304
 305 unsigned int    vm_page_active_count;
 306 unsigned int    vm_page_inactive_count;
 307 unsigned int    vm_page_anonymous_count;
 308 unsigned int    vm_page_throttled_count;
 309 unsigned int    vm_page_speculative_count;
 310 unsigned int    vm_page_wire_count;
 311 unsigned int    vm_page_wire_count_initial;
 312 unsigned int    vm_page_gobble_count = 0;
 313 unsigned int    vm_page_wire_count_warning = 0;
 314 unsigned int    vm_page_gobble_count_warning = 0;
 315
 316 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 317 unsigned int    vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
 318 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 319
 320 #if DEVELOPMENT || DEBUG
 321 unsigned int    vm_page_speculative_recreated = 0;
 322 unsigned int    vm_page_speculative_created = 0;
 323 unsigned int    vm_page_speculative_used = 0;
 324 #endif
 325
 326 queue_head_t    vm_page_queue_cleaned;
 327
 328 unsigned int    vm_page_cleaned_count = 0;
 329 unsigned int    vm_pageout_enqueued_cleaned = 0;
 330
 331 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 332 ppnum_t         max_valid_low_ppnum = 0xffffffff;
 333
 334
 335 /*
 336  *      Several page replacement parameters are also
 337  *      shared with this module, so that page allocation
 338  *      (done here in vm_page_alloc) can trigger the
 339  *      pageout daemon.
 340  */
 341 unsigned int    vm_page_free_target = 0;
 342 unsigned int    vm_page_free_min = 0;
 343 unsigned int    vm_page_throttle_limit = 0;
 344 uint32_t        vm_page_creation_throttle = 0;
 345 unsigned int    vm_page_inactive_target = 0;
 346 unsigned int   vm_page_anonymous_min = 0;
 347 unsigned int    vm_page_inactive_min = 0;
 348 unsigned int    vm_page_free_reserved = 0;
 349 unsigned int    vm_page_throttle_count = 0;
 350
 351
 352 /*
 353  *      The VM system has a couple of heuristics for deciding
 354  *      that pages are "uninteresting" and should be placed
 355  *      on the inactive queue as likely candidates for replacement.
 356  *      These variables let the heuristics be controlled at run-time
 357  *      to make experimentation easier.
 358  */
 359
 360 boolean_t vm_page_deactivate_hint = TRUE;
 361
 362 struct vm_page_stats_reusable vm_page_stats_reusable;
 363
 364 /*
 365  *      vm_set_page_size:
 366  *
 367  *      Sets the page size, perhaps based upon the memory
 368  *      size.  Must be called before any use of page-size
 369  *      dependent functions.
 370  *
 371  *      Sets page_shift and page_mask from page_size.
 372  */
 373 void
 374 vm_set_page_size(void)
 375 {
 376         page_mask = page_size - 1;
 377
 378         if ((page_mask & page_size) != 0)
 379                 panic("vm_set_page_size: page size not a power of two");
 380
 381         for (page_shift = 0; ; page_shift++)
 382                 if ((1U << page_shift) == page_size)
 383                         break;
 384 }
 385
 386
 387 /* Called once during statup, once the cache geometry is known.
 388  */
 389 static void
 390 vm_page_set_colors( void )
 391 {
 392         unsigned int    n, override;
 393
 394         if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
 395                 n = override;
 396         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 397                 n = vm_cache_geometry_colors;
 398         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 399
 400         if ( n == 0 )
 401                 n = 1;
 402         if ( n > MAX_COLORS )
 403                 n = MAX_COLORS;
 404
 405         /* the count must be a power of 2  */
 406         if ( ( n & (n - 1)) != 0  )
 407                 panic("vm_page_set_colors");
 408
 409         vm_colors = n;
 410         vm_color_mask = n - 1;
 411 }
 412
 413
 414 lck_grp_t               vm_page_lck_grp_free;
 415 lck_grp_t               vm_page_lck_grp_queue;
 416 lck_grp_t               vm_page_lck_grp_local;
 417 lck_grp_t               vm_page_lck_grp_purge;
 418 lck_grp_t               vm_page_lck_grp_alloc;
 419 lck_grp_t               vm_page_lck_grp_bucket;
 420 lck_grp_attr_t          vm_page_lck_grp_attr;
 421 lck_attr_t              vm_page_lck_attr;
 422
 423
 424 __private_extern__ void
 425 vm_page_init_lck_grp(void)
 426 {
 427         /*
 428          * initialze the vm_page lock world
 429          */
 430         lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
 431         lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
 432         lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
 433         lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
 434         lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
 435         lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
 436         lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
 437         lck_attr_setdefault(&vm_page_lck_attr);
 438         lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 439 }
 440
 441 void
 442 vm_page_init_local_q()
 443 {
 444         unsigned int            num_cpus;
 445         unsigned int            i;
 446         struct vplq             *t_local_q;
 447
 448         num_cpus = ml_get_max_cpus();
 449
 450         /*
 451          * no point in this for a uni-processor system
 452          */
 453         if (num_cpus >= 2) {
 454                 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
 455
 456                 for (i = 0; i < num_cpus; i++) {
 457                         struct vpl      *lq;
 458
 459                         lq = &t_local_q[i].vpl_un.vpl;
 460                         VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
 461                         queue_init(&lq->vpl_queue);
 462                         lq->vpl_count = 0;
 463                 }
 464                 vm_page_local_q_count = num_cpus;
 465
 466                 vm_page_local_q = (struct vplq *)t_local_q;
 467         }
 468 }
 469
 470
 471 /*
 472  *      vm_page_bootstrap:
 473  *
 474  *      Initializes the resident memory module.
 475  *
 476  *      Allocates memory for the page cells, and
 477  *      for the object/offset-to-page hash table headers.
 478  *      Each page cell is initialized and placed on the free list.
 479  *      Returns the range of available kernel virtual memory.
 480  */
 481
 482 void
 483 vm_page_bootstrap(
 484         vm_offset_t             *startp,
 485         vm_offset_t             *endp)
 486 {
 487         register vm_page_t      m;
 488         unsigned int            i;
 489         unsigned int            log1;
 490         unsigned int            log2;
 491         unsigned int            size;
 492
 493         /*
 494          *      Initialize the vm_page template.
 495          */
 496
 497         m = &vm_page_template;
 498         bzero(m, sizeof (*m));
 499
 500         m->pageq.next = NULL;
 501         m->pageq.prev = NULL;
 502         m->listq.next = NULL;
 503         m->listq.prev = NULL;
 504         m->next = VM_PAGE_NULL;
 505
 506         m->object = VM_OBJECT_NULL;             /* reset later */
 507         m->offset = (vm_object_offset_t) -1;    /* reset later */
 508
 509         m->wire_count = 0;
 510         m->local = FALSE;
 511         m->inactive = FALSE;
 512         m->active = FALSE;
 513         m->pageout_queue = FALSE;
 514         m->speculative = FALSE;
 515         m->laundry = FALSE;
 516         m->free = FALSE;
 517         m->reference = FALSE;
 518         m->gobbled = FALSE;
 519         m->private = FALSE;
 520         m->throttled = FALSE;
 521         m->__unused_pageq_bits = 0;
 522
 523         m->phys_page = 0;               /* reset later */
 524
 525         m->busy = TRUE;
 526         m->wanted = FALSE;
 527         m->tabled = FALSE;
 528         m->fictitious = FALSE;
 529         m->pmapped = FALSE;
 530         m->wpmapped = FALSE;
 531         m->pageout = FALSE;
 532         m->absent = FALSE;
 533         m->error = FALSE;
 534         m->dirty = FALSE;
 535         m->cleaning = FALSE;
 536         m->precious = FALSE;
 537         m->clustered = FALSE;
 538         m->overwriting = FALSE;
 539         m->restart = FALSE;
 540         m->unusual = FALSE;
 541         m->encrypted = FALSE;
 542         m->encrypted_cleaning = FALSE;
 543         m->cs_validated = FALSE;
 544         m->cs_tainted = FALSE;
 545         m->no_cache = FALSE;
 546         m->reusable = FALSE;
 547         m->slid = FALSE;
 548         m->was_dirty = FALSE;
 549         m->__unused_object_bits = 0;
 550
 551
 552         /*
 553          *      Initialize the page queues.
 554          */
 555         vm_page_init_lck_grp();
 556
 557         lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
 558         lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
 559         lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
 560
 561         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 562                 int group;
 563
 564                 purgeable_queues[i].token_q_head = 0;
 565                 purgeable_queues[i].token_q_tail = 0;
 566                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 567                         queue_init(&purgeable_queues[i].objq[group]);
 568
 569                 purgeable_queues[i].type = i;
 570                 purgeable_queues[i].new_pages = 0;
 571 #if MACH_ASSERT
 572                 purgeable_queues[i].debug_count_tokens = 0;
 573                 purgeable_queues[i].debug_count_objects = 0;
 574 #endif
 575         };
 576
 577         for (i = 0; i < MAX_COLORS; i++ )
 578                 queue_init(&vm_page_queue_free[i]);
 579
 580         queue_init(&vm_lopage_queue_free);
 581         queue_init(&vm_page_queue_active);
 582         queue_init(&vm_page_queue_inactive);
 583         queue_init(&vm_page_queue_cleaned);
 584         queue_init(&vm_page_queue_throttled);
 585         queue_init(&vm_page_queue_anonymous);
 586
 587         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 588                 queue_init(&vm_page_queue_speculative[i].age_q);
 589
 590                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 591                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 592         }
 593         vm_page_free_wanted = 0;
 594         vm_page_free_wanted_privileged = 0;
 595
 596         vm_page_set_colors();
 597
 598
 599         /*
 600          *      Steal memory for the map and zone subsystems.
 601          */
 602         zone_steal_memory();
 603         vm_map_steal_memory();
 604
 605         /*
 606          *      Allocate (and initialize) the virtual-to-physical
 607          *      table hash buckets.
 608          *
 609          *      The number of buckets should be a power of two to
 610          *      get a good hash function.  The following computation
 611          *      chooses the first power of two that is greater
 612          *      than the number of physical pages in the system.
 613          */
 614
 615         if (vm_page_bucket_count == 0) {
 616                 unsigned int npages = pmap_free_pages();
 617
 618                 vm_page_bucket_count = 1;
 619                 while (vm_page_bucket_count < npages)
 620                         vm_page_bucket_count <<= 1;
 621         }
 622         vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
 623
 624         vm_page_hash_mask = vm_page_bucket_count - 1;
 625
 626         /*
 627          *      Calculate object shift value for hashing algorithm:
 628          *              O = log2(sizeof(struct vm_object))
 629          *              B = log2(vm_page_bucket_count)
 630          *              hash shifts the object left by
 631          *              B/2 - O
 632          */
 633         size = vm_page_bucket_count;
 634         for (log1 = 0; size > 1; log1++)
 635                 size /= 2;
 636         size = sizeof(struct vm_object);
 637         for (log2 = 0; size > 1; log2++)
 638                 size /= 2;
 639         vm_page_hash_shift = log1/2 - log2 + 1;
 640
 641         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 642         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 643         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 644
 645         if (vm_page_hash_mask & vm_page_bucket_count)
 646                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 647
 648         vm_page_buckets = (vm_page_bucket_t *)
 649                 pmap_steal_memory(vm_page_bucket_count *
 650                                   sizeof(vm_page_bucket_t));
 651
 652         vm_page_bucket_locks = (lck_spin_t *)
 653                 pmap_steal_memory(vm_page_bucket_lock_count *
 654                                   sizeof(lck_spin_t));
 655
 656         for (i = 0; i < vm_page_bucket_count; i++) {
 657                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 658
 659                 bucket->pages = VM_PAGE_NULL;
 660 #if     MACH_PAGE_HASH_STATS
 661                 bucket->cur_count = 0;
 662                 bucket->hi_count = 0;
 663 #endif /* MACH_PAGE_HASH_STATS */
 664         }
 665
 666         for (i = 0; i < vm_page_bucket_lock_count; i++)
 667                 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 668
 669         /*
 670          *      Machine-dependent code allocates the resident page table.
 671          *      It uses vm_page_init to initialize the page frames.
 672          *      The code also returns to us the virtual space available
 673          *      to the kernel.  We don't trust the pmap module
 674          *      to get the alignment right.
 675          */
 676
 677         pmap_startup(&virtual_space_start, &virtual_space_end);
 678         virtual_space_start = round_page(virtual_space_start);
 679         virtual_space_end = trunc_page(virtual_space_end);
 680
 681         *startp = virtual_space_start;
 682         *endp = virtual_space_end;
 683
 684         /*
 685          *      Compute the initial "wire" count.
 686          *      Up until now, the pages which have been set aside are not under
 687          *      the VM system's control, so although they aren't explicitly
 688          *      wired, they nonetheless can't be moved. At this moment,
 689          *      all VM managed pages are "free", courtesy of pmap_startup.
 690          */
 691         assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
 692         vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
 693         vm_page_wire_count_initial = vm_page_wire_count;
 694         vm_page_free_count_minimum = vm_page_free_count;
 695
 696         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 697                vm_page_free_count, vm_page_wire_count);
 698
 699         simple_lock_init(&vm_paging_lock, 0);
 700 }
 701
 702 #ifndef MACHINE_PAGES
 703 /*
 704  *      We implement pmap_steal_memory and pmap_startup with the help
 705  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 706  */
 707
 708 void *
 709 pmap_steal_memory(
 710         vm_size_t size)
 711 {
 712         vm_offset_t addr, vaddr;
 713         ppnum_t phys_page;
 714
 715         /*
 716          *      We round the size to a round multiple.
 717          */
 718
 719         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 720
 721         /*
 722          *      If this is the first call to pmap_steal_memory,
 723          *      we have to initialize ourself.
 724          */
 725
 726         if (virtual_space_start == virtual_space_end) {
 727                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 728
 729                 /*
 730                  *      The initial values must be aligned properly, and
 731                  *      we don't trust the pmap module to do it right.
 732                  */
 733
 734                 virtual_space_start = round_page(virtual_space_start);
 735                 virtual_space_end = trunc_page(virtual_space_end);
 736         }
 737
 738         /*
 739          *      Allocate virtual memory for this request.
 740          */
 741
 742         addr = virtual_space_start;
 743         virtual_space_start += size;
 744
 745         //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
 746
 747         /*
 748          *      Allocate and map physical pages to back new virtual pages.
 749          */
 750
 751         for (vaddr = round_page(addr);
 752              vaddr < addr + size;
 753              vaddr += PAGE_SIZE) {
 754
 755                 if (!pmap_next_page_hi(&phys_page))
 756                         panic("pmap_steal_memory");
 757
 758                 /*
 759                  *      XXX Logically, these mappings should be wired,
 760                  *      but some pmap modules barf if they are.
 761                  */
 762 #if defined(__LP64__)
 763                 pmap_pre_expand(kernel_pmap, vaddr);
 764 #endif
 765
 766                 pmap_enter(kernel_pmap, vaddr, phys_page,
 767                            VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
 768                                 VM_WIMG_USE_DEFAULT, FALSE);
 769                 /*
 770                  * Account for newly stolen memory
 771                  */
 772                 vm_page_wire_count++;
 773
 774         }
 775
 776         return (void *) addr;
 777 }
 778
 779 void
 780 pmap_startup(
 781         vm_offset_t *startp,
 782         vm_offset_t *endp)
 783 {
 784         unsigned int i, npages, pages_initialized, fill, fillval;
 785         ppnum_t         phys_page;
 786         addr64_t        tmpaddr;
 787
 788         /*
 789          *      We calculate how many page frames we will have
 790          *      and then allocate the page structures in one chunk.
 791          */
 792
 793         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 794         tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);  /* Account for any slop */
 795         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 796
 797         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 798
 799         /*
 800          *      Initialize the page frames.
 801          */
 802         for (i = 0, pages_initialized = 0; i < npages; i++) {
 803                 if (!pmap_next_page(&phys_page))
 804                         break;
 805                 if (pages_initialized == 0 || phys_page < vm_page_lowest)
 806                         vm_page_lowest = phys_page;
 807
 808                 vm_page_init(&vm_pages[i], phys_page, FALSE);
 809                 vm_page_pages++;
 810                 pages_initialized++;
 811         }
 812         vm_pages_count = pages_initialized;
 813
 814         /*
 815          * Check if we want to initialize pages to a known value
 816          */
 817         fill = 0;                                                               /* Assume no fill */
 818         if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
 819 #if     DEBUG
 820         /* This slows down booting the DEBUG kernel, particularly on
 821          * large memory systems, but is worthwhile in deterministically
 822          * trapping uninitialized memory usage.
 823          */
 824         if (fill == 0) {
 825                 fill = 1;
 826                 fillval = 0xDEB8F177;
 827         }
 828 #endif
 829         if (fill)
 830                 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
 831         // -debug code remove
 832         if (2 == vm_himemory_mode) {
 833                 // free low -> high so high is preferred
 834                 for (i = 1; i <= pages_initialized; i++) {
 835                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 836                         vm_page_release(&vm_pages[i - 1]);
 837                 }
 838         }
 839         else
 840         // debug code remove-
 841
 842         /*
 843          * Release pages in reverse order so that physical pages
 844          * initially get allocated in ascending addresses. This keeps
 845          * the devices (which must address physical memory) happy if
 846          * they require several consecutive pages.
 847          */
 848         for (i = pages_initialized; i > 0; i--) {
 849                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 850                 vm_page_release(&vm_pages[i - 1]);
 851         }
 852
 853 #if 0
 854         {
 855                 vm_page_t xx, xxo, xxl;
 856                 int i, j, k, l;
 857
 858                 j = 0;                                                                                                  /* (BRINGUP) */
 859                 xxl = 0;
 860
 861                 for( i = 0; i < vm_colors; i++ ) {
 862                         queue_iterate(&vm_page_queue_free[i],
 863                                       xx,
 864                                       vm_page_t,
 865                                       pageq) {  /* BRINGUP */
 866                                 j++;                                                                                            /* (BRINGUP) */
 867                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 868                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 869                                 }
 870
 871                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 872                                 k = 0;                                                                                          /* (BRINGUP) */
 873
 874                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 875
 876                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 877                                         k++;
 878                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 879                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 880                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 881                                         }
 882                                 }
 883
 884                                 xxl = xx;
 885                         }
 886                 }
 887
 888                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 889                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 890                 }
 891         }
 892 #endif
 893
 894
 895         /*
 896          *      We have to re-align virtual_space_start,
 897          *      because pmap_steal_memory has been using it.
 898          */
 899
 900         virtual_space_start = round_page(virtual_space_start);
 901
 902         *startp = virtual_space_start;
 903         *endp = virtual_space_end;
 904 }
 905 #endif  /* MACHINE_PAGES */
 906
 907 /*
 908  *      Routine:        vm_page_module_init
 909  *      Purpose:
 910  *              Second initialization pass, to be done after
 911  *              the basic VM system is ready.
 912  */
 913 void
 914 vm_page_module_init(void)
 915 {
 916         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
 917                              0, PAGE_SIZE, "vm pages");
 918
 919 #if     ZONE_DEBUG
 920         zone_debug_disable(vm_page_zone);
 921 #endif  /* ZONE_DEBUG */
 922
 923         zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
 924         zone_change(vm_page_zone, Z_EXPAND, FALSE);
 925         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
 926         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
 927         zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
 928         /*
 929          * Adjust zone statistics to account for the real pages allocated
 930          * in vm_page_create(). [Q: is this really what we want?]
 931          */
 932         vm_page_zone->count += vm_page_pages;
 933         vm_page_zone->sum_count += vm_page_pages;
 934         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
 935 }
 936
 937 /*
 938  *      Routine:        vm_page_create
 939  *      Purpose:
 940  *              After the VM system is up, machine-dependent code
 941  *              may stumble across more physical memory.  For example,
 942  *              memory that it was reserving for a frame buffer.
 943  *              vm_page_create turns this memory into available pages.
 944  */
 945
 946 void
 947 vm_page_create(
 948         ppnum_t start,
 949         ppnum_t end)
 950 {
 951         ppnum_t         phys_page;
 952         vm_page_t       m;
 953
 954         for (phys_page = start;
 955              phys_page < end;
 956              phys_page++) {
 957                 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
 958                         == VM_PAGE_NULL)
 959                         vm_page_more_fictitious();
 960
 961                 m->fictitious = FALSE;
 962                 pmap_clear_noencrypt(phys_page);
 963
 964                 vm_page_pages++;
 965                 vm_page_release(m);
 966         }
 967 }
 968
 969 /*
 970  *      vm_page_hash:
 971  *
 972  *      Distributes the object/offset key pair among hash buckets.
 973  *
 974  *      NOTE:   The bucket count must be a power of 2
 975  */
 976 #define vm_page_hash(object, offset) (\
 977         ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
 978          & vm_page_hash_mask)
 979
 980
 981 /*
 982  *      vm_page_insert:         [ internal use only ]
 983  *
 984  *      Inserts the given mem entry into the object/object-page
 985  *      table and object list.
 986  *
 987  *      The object must be locked.
 988  */
 989 void
 990 vm_page_insert(
 991         vm_page_t               mem,
 992         vm_object_t             object,
 993         vm_object_offset_t      offset)
 994 {
 995         vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
 996 }
 997
 998 void
 999 vm_page_insert_internal(
1000         vm_page_t               mem,
1001         vm_object_t             object,
1002         vm_object_offset_t      offset,
1003         boolean_t               queues_lock_held,
1004         boolean_t               insert_in_hash,
1005         boolean_t               batch_pmap_op)
1006 {
1007         vm_page_bucket_t *bucket;
1008         lck_spin_t      *bucket_lock;
1009         int     hash_id;
1010
1011         XPR(XPR_VM_PAGE,
1012                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1013                 object, offset, mem, 0,0);
1014 #if 0
1015         /*
1016          * we may not hold the page queue lock
1017          * so this check isn't safe to make
1018          */
1019         VM_PAGE_CHECK(mem);
1020 #endif
1021
1022         if (object == vm_submap_object) {
1023                 /* the vm_submap_object is only a placeholder for submaps */
1024                 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1025         }
1026
1027         vm_object_lock_assert_exclusive(object);
1028 #if DEBUG
1029         lck_mtx_assert(&vm_page_queue_lock,
1030                        queues_lock_held ? LCK_MTX_ASSERT_OWNED
1031                                         : LCK_MTX_ASSERT_NOTOWNED);
1032 #endif  /* DEBUG */
1033
1034         if (insert_in_hash == TRUE) {
1035 #if DEBUG
1036                 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1037                         panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1038                               "already in (obj=%p,off=0x%llx)",
1039                               mem, object, offset, mem->object, mem->offset);
1040 #endif
1041                 assert(!object->internal || offset < object->vo_size);
1042
1043                 /* only insert "pageout" pages into "pageout" objects,
1044                  * and normal pages into normal objects */
1045                 assert(object->pageout == mem->pageout);
1046
1047                 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1048
1049                 /*
1050                  *      Record the object/offset pair in this page
1051                  */
1052
1053                 mem->object = object;
1054                 mem->offset = offset;
1055
1056                 /*
1057                  *      Insert it into the object_object/offset hash table
1058                  */
1059                 hash_id = vm_page_hash(object, offset);
1060                 bucket = &vm_page_buckets[hash_id];
1061                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1062
1063                 lck_spin_lock(bucket_lock);
1064
1065                 mem->next = bucket->pages;
1066                 bucket->pages = mem;
1067 #if     MACH_PAGE_HASH_STATS
1068                 if (++bucket->cur_count > bucket->hi_count)
1069                         bucket->hi_count = bucket->cur_count;
1070 #endif /* MACH_PAGE_HASH_STATS */
1071
1072                 lck_spin_unlock(bucket_lock);
1073         }
1074
1075         {
1076                 unsigned int    cache_attr;
1077
1078                 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1079
1080                 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1081                         PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1082                 }
1083         }
1084         /*
1085          *      Now link into the object's list of backed pages.
1086          */
1087
1088         VM_PAGE_INSERT(mem, object);
1089         mem->tabled = TRUE;
1090
1091         /*
1092          *      Show that the object has one more resident page.
1093          */
1094
1095         object->resident_page_count++;
1096         if (VM_PAGE_WIRED(mem)) {
1097                 object->wired_page_count++;
1098         }
1099         assert(object->resident_page_count >= object->wired_page_count);
1100
1101         assert(!mem->reusable);
1102
1103         if (object->purgable == VM_PURGABLE_VOLATILE) {
1104                 if (VM_PAGE_WIRED(mem)) {
1105                         OSAddAtomic(1, &vm_page_purgeable_wired_count);
1106                 } else {
1107                         OSAddAtomic(1, &vm_page_purgeable_count);
1108                 }
1109         } else if (object->purgable == VM_PURGABLE_EMPTY &&
1110                    mem->throttled) {
1111                 /*
1112                  * This page belongs to a purged VM object but hasn't
1113                  * been purged (because it was "busy").
1114                  * It's in the "throttled" queue and hence not
1115                  * visible to vm_pageout_scan().  Move it to a pageable
1116                  * queue, so that it can eventually be reclaimed, instead
1117                  * of lingering in the "empty" object.
1118                  */
1119                 if (queues_lock_held == FALSE)
1120                         vm_page_lockspin_queues();
1121                 vm_page_deactivate(mem);
1122                 if (queues_lock_held == FALSE)
1123                         vm_page_unlock_queues();
1124         }
1125 }
1126
1127 /*
1128  *      vm_page_replace:
1129  *
1130  *      Exactly like vm_page_insert, except that we first
1131  *      remove any existing page at the given offset in object.
1132  *
1133  *      The object must be locked.
1134  */
1135 void
1136 vm_page_replace(
1137         register vm_page_t              mem,
1138         register vm_object_t            object,
1139         register vm_object_offset_t     offset)
1140 {
1141         vm_page_bucket_t *bucket;
1142         vm_page_t        found_m = VM_PAGE_NULL;
1143         lck_spin_t      *bucket_lock;
1144         int             hash_id;
1145
1146 #if 0
1147         /*
1148          * we don't hold the page queue lock
1149          * so this check isn't safe to make
1150          */
1151         VM_PAGE_CHECK(mem);
1152 #endif
1153         vm_object_lock_assert_exclusive(object);
1154 #if DEBUG
1155         if (mem->tabled || mem->object != VM_OBJECT_NULL)
1156                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1157                       "already in (obj=%p,off=0x%llx)",
1158                       mem, object, offset, mem->object, mem->offset);
1159         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1160 #endif
1161         /*
1162          *      Record the object/offset pair in this page
1163          */
1164
1165         mem->object = object;
1166         mem->offset = offset;
1167
1168         /*
1169          *      Insert it into the object_object/offset hash table,
1170          *      replacing any page that might have been there.
1171          */
1172
1173         hash_id = vm_page_hash(object, offset);
1174         bucket = &vm_page_buckets[hash_id];
1175         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1176
1177         lck_spin_lock(bucket_lock);
1178
1179         if (bucket->pages) {
1180                 vm_page_t *mp = &bucket->pages;
1181                 vm_page_t m = *mp;
1182
1183                 do {
1184                         if (m->object == object && m->offset == offset) {
1185                                 /*
1186                                  * Remove old page from hash list
1187                                  */
1188                                 *mp = m->next;
1189
1190                                 found_m = m;
1191                                 break;
1192                         }
1193                         mp = &m->next;
1194                 } while ((m = *mp));
1195
1196                 mem->next = bucket->pages;
1197         } else {
1198                 mem->next = VM_PAGE_NULL;
1199         }
1200         /*
1201          * insert new page at head of hash list
1202          */
1203         bucket->pages = mem;
1204
1205         lck_spin_unlock(bucket_lock);
1206
1207         if (found_m) {
1208                 /*
1209                  * there was already a page at the specified
1210                  * offset for this object... remove it from
1211                  * the object and free it back to the free list
1212                  */
1213                 vm_page_free_unlocked(found_m, FALSE);
1214         }
1215         vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1216 }
1217
1218 /*
1219  *      vm_page_remove:         [ internal use only ]
1220  *
1221  *      Removes the given mem entry from the object/offset-page
1222  *      table and the object page list.
1223  *
1224  *      The object must be locked.
1225  */
1226
1227 void
1228 vm_page_remove(
1229         vm_page_t       mem,
1230         boolean_t       remove_from_hash)
1231 {
1232         vm_page_bucket_t *bucket;
1233         vm_page_t       this;
1234         lck_spin_t      *bucket_lock;
1235         int             hash_id;
1236
1237         XPR(XPR_VM_PAGE,
1238                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1239                 mem->object, mem->offset,
1240                 mem, 0,0);
1241
1242         vm_object_lock_assert_exclusive(mem->object);
1243         assert(mem->tabled);
1244         assert(!mem->cleaning);
1245         assert(!mem->laundry);
1246 #if 0
1247         /*
1248          * we don't hold the page queue lock
1249          * so this check isn't safe to make
1250          */
1251         VM_PAGE_CHECK(mem);
1252 #endif
1253         if (remove_from_hash == TRUE) {
1254                 /*
1255                  *      Remove from the object_object/offset hash table
1256                  */
1257                 hash_id = vm_page_hash(mem->object, mem->offset);
1258                 bucket = &vm_page_buckets[hash_id];
1259                 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1260
1261                 lck_spin_lock(bucket_lock);
1262
1263                 if ((this = bucket->pages) == mem) {
1264                         /* optimize for common case */
1265
1266                         bucket->pages = mem->next;
1267                 } else {
1268                         vm_page_t       *prev;
1269
1270                         for (prev = &this->next;
1271                              (this = *prev) != mem;
1272                              prev = &this->next)
1273                                 continue;
1274                         *prev = this->next;
1275                 }
1276 #if     MACH_PAGE_HASH_STATS
1277                 bucket->cur_count--;
1278 #endif /* MACH_PAGE_HASH_STATS */
1279
1280                 lck_spin_unlock(bucket_lock);
1281         }
1282         /*
1283          *      Now remove from the object's list of backed pages.
1284          */
1285
1286         VM_PAGE_REMOVE(mem);
1287
1288         /*
1289          *      And show that the object has one fewer resident
1290          *      page.
1291          */
1292
1293         assert(mem->object->resident_page_count > 0);
1294         mem->object->resident_page_count--;
1295
1296         if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1297                 if (mem->object->resident_page_count == 0)
1298                         vm_object_cache_remove(mem->object);
1299         }
1300
1301         if (VM_PAGE_WIRED(mem)) {
1302                 assert(mem->object->wired_page_count > 0);
1303                 mem->object->wired_page_count--;
1304         }
1305         assert(mem->object->resident_page_count >=
1306                mem->object->wired_page_count);
1307         if (mem->reusable) {
1308                 assert(mem->object->reusable_page_count > 0);
1309                 mem->object->reusable_page_count--;
1310                 assert(mem->object->reusable_page_count <=
1311                        mem->object->resident_page_count);
1312                 mem->reusable = FALSE;
1313                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1314                 vm_page_stats_reusable.reused_remove++;
1315         } else if (mem->object->all_reusable) {
1316                 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1317                 vm_page_stats_reusable.reused_remove++;
1318         }
1319
1320         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1321                 if (VM_PAGE_WIRED(mem)) {
1322                         assert(vm_page_purgeable_wired_count > 0);
1323                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1324                 } else {
1325                         assert(vm_page_purgeable_count > 0);
1326                         OSAddAtomic(-1, &vm_page_purgeable_count);
1327                 }
1328         }
1329         if (mem->object->set_cache_attr == TRUE)
1330                 pmap_set_cache_attributes(mem->phys_page, 0);
1331
1332         mem->tabled = FALSE;
1333         mem->object = VM_OBJECT_NULL;
1334         mem->offset = (vm_object_offset_t) -1;
1335 }
1336
1337
1338 /*
1339  *      vm_page_lookup:
1340  *
1341  *      Returns the page associated with the object/offset
1342  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1343  *
1344  *      The object must be locked.  No side effects.
1345  */
1346
1347 unsigned long vm_page_lookup_hint = 0;
1348 unsigned long vm_page_lookup_hint_next = 0;
1349 unsigned long vm_page_lookup_hint_prev = 0;
1350 unsigned long vm_page_lookup_hint_miss = 0;
1351 unsigned long vm_page_lookup_bucket_NULL = 0;
1352 unsigned long vm_page_lookup_miss = 0;
1353
1354
1355 vm_page_t
1356 vm_page_lookup(
1357         vm_object_t             object,
1358         vm_object_offset_t      offset)
1359 {
1360         vm_page_t       mem;
1361         vm_page_bucket_t *bucket;
1362         queue_entry_t   qe;
1363         lck_spin_t      *bucket_lock;
1364         int             hash_id;
1365
1366         vm_object_lock_assert_held(object);
1367         mem = object->memq_hint;
1368
1369         if (mem != VM_PAGE_NULL) {
1370                 assert(mem->object == object);
1371
1372                 if (mem->offset == offset) {
1373                         vm_page_lookup_hint++;
1374                         return mem;
1375                 }
1376                 qe = queue_next(&mem->listq);
1377
1378                 if (! queue_end(&object->memq, qe)) {
1379                         vm_page_t       next_page;
1380
1381                         next_page = (vm_page_t) qe;
1382                         assert(next_page->object == object);
1383
1384                         if (next_page->offset == offset) {
1385                                 vm_page_lookup_hint_next++;
1386                                 object->memq_hint = next_page; /* new hint */
1387                                 return next_page;
1388                         }
1389                 }
1390                 qe = queue_prev(&mem->listq);
1391
1392                 if (! queue_end(&object->memq, qe)) {
1393                         vm_page_t prev_page;
1394
1395                         prev_page = (vm_page_t) qe;
1396                         assert(prev_page->object == object);
1397
1398                         if (prev_page->offset == offset) {
1399                                 vm_page_lookup_hint_prev++;
1400                                 object->memq_hint = prev_page; /* new hint */
1401                                 return prev_page;
1402                         }
1403                 }
1404         }
1405         /*
1406          * Search the hash table for this object/offset pair
1407          */
1408         hash_id = vm_page_hash(object, offset);
1409         bucket = &vm_page_buckets[hash_id];
1410
1411         /*
1412          * since we hold the object lock, we are guaranteed that no
1413          * new pages can be inserted into this object... this in turn
1414          * guarantess that the page we're looking for can't exist
1415          * if the bucket it hashes to is currently NULL even when looked
1416          * at outside the scope of the hash bucket lock... this is a
1417          * really cheap optimiztion to avoid taking the lock
1418          */
1419         if (bucket->pages == VM_PAGE_NULL) {
1420                 vm_page_lookup_bucket_NULL++;
1421
1422                 return (VM_PAGE_NULL);
1423         }
1424         bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1425
1426         lck_spin_lock(bucket_lock);
1427
1428         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1429 #if 0
1430                 /*
1431                  * we don't hold the page queue lock
1432                  * so this check isn't safe to make
1433                  */
1434                 VM_PAGE_CHECK(mem);
1435 #endif
1436                 if ((mem->object == object) && (mem->offset == offset))
1437                         break;
1438         }
1439         lck_spin_unlock(bucket_lock);
1440
1441         if (mem != VM_PAGE_NULL) {
1442                 if (object->memq_hint != VM_PAGE_NULL) {
1443                         vm_page_lookup_hint_miss++;
1444                 }
1445                 assert(mem->object == object);
1446                 object->memq_hint = mem;
1447         } else
1448                 vm_page_lookup_miss++;
1449
1450         return(mem);
1451 }
1452
1453
1454 /*
1455  *      vm_page_rename:
1456  *
1457  *      Move the given memory entry from its
1458  *      current object to the specified target object/offset.
1459  *
1460  *      The object must be locked.
1461  */
1462 void
1463 vm_page_rename(
1464         register vm_page_t              mem,
1465         register vm_object_t            new_object,
1466         vm_object_offset_t              new_offset,
1467         boolean_t                       encrypted_ok)
1468 {
1469         assert(mem->object != new_object);
1470
1471         /*
1472          * ENCRYPTED SWAP:
1473          * The encryption key is based on the page's memory object
1474          * (aka "pager") and paging offset.  Moving the page to
1475          * another VM object changes its "pager" and "paging_offset"
1476          * so it has to be decrypted first, or we would lose the key.
1477          *
1478          * One exception is VM object collapsing, where we transfer pages
1479          * from one backing object to its parent object.  This operation also
1480          * transfers the paging information, so the <pager,paging_offset> info
1481          * should remain consistent.  The caller (vm_object_do_collapse())
1482          * sets "encrypted_ok" in this case.
1483          */
1484         if (!encrypted_ok && mem->encrypted) {
1485                 panic("vm_page_rename: page %p is encrypted\n", mem);
1486         }
1487
1488         XPR(XPR_VM_PAGE,
1489                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1490                 new_object, new_offset,
1491                 mem, 0,0);
1492
1493         /*
1494          *      Changes to mem->object require the page lock because
1495          *      the pageout daemon uses that lock to get the object.
1496          */
1497         vm_page_lockspin_queues();
1498
1499         vm_page_remove(mem, TRUE);
1500         vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1501
1502         vm_page_unlock_queues();
1503 }
1504
1505 /*
1506  *      vm_page_init:
1507  *
1508  *      Initialize the fields in a new page.
1509  *      This takes a structure with random values and initializes it
1510  *      so that it can be given to vm_page_release or vm_page_insert.
1511  */
1512 void
1513 vm_page_init(
1514         vm_page_t       mem,
1515         ppnum_t         phys_page,
1516         boolean_t       lopage)
1517 {
1518         assert(phys_page);
1519
1520 #if     DEBUG
1521         if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1522                 if (!(pmap_valid_page(phys_page))) {
1523                         panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1524                 }
1525         }
1526 #endif
1527         *mem = vm_page_template;
1528         mem->phys_page = phys_page;
1529 #if 0
1530         /*
1531          * we're leaving this turned off for now... currently pages
1532          * come off the free list and are either immediately dirtied/referenced
1533          * due to zero-fill or COW faults, or are used to read or write files...
1534          * in the file I/O case, the UPL mechanism takes care of clearing
1535          * the state of the HW ref/mod bits in a somewhat fragile way.
1536          * Since we may change the way this works in the future (to toughen it up),
1537          * I'm leaving this as a reminder of where these bits could get cleared
1538          */
1539
1540         /*
1541          * make sure both the h/w referenced and modified bits are
1542          * clear at this point... we are especially dependent on
1543          * not finding a 'stale' h/w modified in a number of spots
1544          * once this page goes back into use
1545          */
1546         pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1547 #endif
1548         mem->lopage = lopage;
1549 }
1550
1551 /*
1552  *      vm_page_grab_fictitious:
1553  *
1554  *      Remove a fictitious page from the free list.
1555  *      Returns VM_PAGE_NULL if there are no free pages.
1556  */
1557 int     c_vm_page_grab_fictitious = 0;
1558 int     c_vm_page_grab_fictitious_failed = 0;
1559 int     c_vm_page_release_fictitious = 0;
1560 int     c_vm_page_more_fictitious = 0;
1561
1562 vm_page_t
1563 vm_page_grab_fictitious_common(
1564         ppnum_t phys_addr)
1565 {
1566         vm_page_t       m;
1567
1568         if ((m = (vm_page_t)zget(vm_page_zone))) {
1569
1570                 vm_page_init(m, phys_addr, FALSE);
1571                 m->fictitious = TRUE;
1572
1573                 c_vm_page_grab_fictitious++;
1574         } else
1575                 c_vm_page_grab_fictitious_failed++;
1576
1577         return m;
1578 }
1579
1580 vm_page_t
1581 vm_page_grab_fictitious(void)
1582 {
1583         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1584 }
1585
1586 vm_page_t
1587 vm_page_grab_guard(void)
1588 {
1589         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1590 }
1591
1592
1593 /*
1594  *      vm_page_release_fictitious:
1595  *
1596  *      Release a fictitious page to the zone pool
1597  */
1598 void
1599 vm_page_release_fictitious(
1600         vm_page_t m)
1601 {
1602         assert(!m->free);
1603         assert(m->fictitious);
1604         assert(m->phys_page == vm_page_fictitious_addr ||
1605                m->phys_page == vm_page_guard_addr);
1606
1607         c_vm_page_release_fictitious++;
1608
1609         zfree(vm_page_zone, m);
1610 }
1611
1612 /*
1613  *      vm_page_more_fictitious:
1614  *
1615  *      Add more fictitious pages to the zone.
1616  *      Allowed to block. This routine is way intimate
1617  *      with the zones code, for several reasons:
1618  *      1. we need to carve some page structures out of physical
1619  *         memory before zones work, so they _cannot_ come from
1620  *         the zone_map.
1621  *      2. the zone needs to be collectable in order to prevent
1622  *         growth without bound. These structures are used by
1623  *         the device pager (by the hundreds and thousands), as
1624  *         private pages for pageout, and as blocking pages for
1625  *         pagein. Temporary bursts in demand should not result in
1626  *         permanent allocation of a resource.
1627  *      3. To smooth allocation humps, we allocate single pages
1628  *         with kernel_memory_allocate(), and cram them into the
1629  *         zone.
1630  */
1631
1632 void vm_page_more_fictitious(void)
1633 {
1634         vm_offset_t     addr;
1635         kern_return_t   retval;
1636
1637         c_vm_page_more_fictitious++;
1638
1639         /*
1640          * Allocate a single page from the zone_map. Do not wait if no physical
1641          * pages are immediately available, and do not zero the space. We need
1642          * our own blocking lock here to prevent having multiple,
1643          * simultaneous requests from piling up on the zone_map lock. Exactly
1644          * one (of our) threads should be potentially waiting on the map lock.
1645          * If winner is not vm-privileged, then the page allocation will fail,
1646          * and it will temporarily block here in the vm_page_wait().
1647          */
1648         lck_mtx_lock(&vm_page_alloc_lock);
1649         /*
1650          * If another thread allocated space, just bail out now.
1651          */
1652         if (zone_free_count(vm_page_zone) > 5) {
1653                 /*
1654                  * The number "5" is a small number that is larger than the
1655                  * number of fictitious pages that any single caller will
1656                  * attempt to allocate. Otherwise, a thread will attempt to
1657                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1658                  * release all of the resources and locks already acquired,
1659                  * and then call this routine. This routine finds the pages
1660                  * that the caller released, so fails to allocate new space.
1661                  * The process repeats infinitely. The largest known number
1662                  * of fictitious pages required in this manner is 2. 5 is
1663                  * simply a somewhat larger number.
1664                  */
1665                 lck_mtx_unlock(&vm_page_alloc_lock);
1666                 return;
1667         }
1668
1669         retval = kernel_memory_allocate(zone_map,
1670                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1671                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1672         if (retval != KERN_SUCCESS) {
1673                 /*
1674                  * No page was available. Drop the
1675                  * lock to give another thread a chance at it, and
1676                  * wait for the pageout daemon to make progress.
1677                  */
1678                 lck_mtx_unlock(&vm_page_alloc_lock);
1679                 vm_page_wait(THREAD_UNINT);
1680                 return;
1681         }
1682         zcram(vm_page_zone, addr, PAGE_SIZE);
1683
1684         lck_mtx_unlock(&vm_page_alloc_lock);
1685 }
1686
1687
1688 /*
1689  *      vm_pool_low():
1690  *
1691  *      Return true if it is not likely that a non-vm_privileged thread
1692  *      can get memory without blocking.  Advisory only, since the
1693  *      situation may change under us.
1694  */
1695 int
1696 vm_pool_low(void)
1697 {
1698         /* No locking, at worst we will fib. */
1699         return( vm_page_free_count <= vm_page_free_reserved );
1700 }
1701
1702
1703
1704 /*
1705  * this is an interface to support bring-up of drivers
1706  * on platforms with physical memory > 4G...
1707  */
1708 int             vm_himemory_mode = 0;
1709
1710
1711 /*
1712  * this interface exists to support hardware controllers
1713  * incapable of generating DMAs with more than 32 bits
1714  * of address on platforms with physical memory > 4G...
1715  */
1716 unsigned int    vm_lopages_allocated_q = 0;
1717 unsigned int    vm_lopages_allocated_cpm_success = 0;
1718 unsigned int    vm_lopages_allocated_cpm_failed = 0;
1719 queue_head_t    vm_lopage_queue_free;
1720
1721 vm_page_t
1722 vm_page_grablo(void)
1723 {
1724         vm_page_t       mem;
1725
1726         if (vm_lopage_needed == FALSE)
1727                 return (vm_page_grab());
1728
1729         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1730
1731         if ( !queue_empty(&vm_lopage_queue_free)) {
1732                 queue_remove_first(&vm_lopage_queue_free,
1733                                    mem,
1734                                    vm_page_t,
1735                                    pageq);
1736                 assert(vm_lopage_free_count);
1737
1738                 vm_lopage_free_count--;
1739                 vm_lopages_allocated_q++;
1740
1741                 if (vm_lopage_free_count < vm_lopage_lowater)
1742                         vm_lopage_refill = TRUE;
1743
1744                 lck_mtx_unlock(&vm_page_queue_free_lock);
1745         } else {
1746                 lck_mtx_unlock(&vm_page_queue_free_lock);
1747
1748                 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1749
1750                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1751                         vm_lopages_allocated_cpm_failed++;
1752                         lck_mtx_unlock(&vm_page_queue_free_lock);
1753
1754                         return (VM_PAGE_NULL);
1755                 }
1756                 mem->busy = TRUE;
1757
1758                 vm_page_lockspin_queues();
1759
1760                 mem->gobbled = FALSE;
1761                 vm_page_gobble_count--;
1762                 vm_page_wire_count--;
1763
1764                 vm_lopages_allocated_cpm_success++;
1765                 vm_page_unlock_queues();
1766         }
1767         assert(mem->busy);
1768         assert(!mem->free);
1769         assert(!mem->pmapped);
1770         assert(!mem->wpmapped);
1771         assert(!pmap_is_noencrypt(mem->phys_page));
1772
1773         mem->pageq.next = NULL;
1774         mem->pageq.prev = NULL;
1775
1776         return (mem);
1777 }
1778
1779
1780 /*
1781  *      vm_page_grab:
1782  *
1783  *      first try to grab a page from the per-cpu free list...
1784  *      this must be done while pre-emption is disabled... if
1785  *      a page is available, we're done...
1786  *      if no page is available, grab the vm_page_queue_free_lock
1787  *      and see if current number of free pages would allow us
1788  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
1789  *      if there are pages available, disable preemption and
1790  *      recheck the state of the per-cpu free list... we could
1791  *      have been preempted and moved to a different cpu, or
1792  *      some other thread could have re-filled it... if still
1793  *      empty, figure out how many pages we can steal from the
1794  *      global free queue and move to the per-cpu queue...
1795  *      return 1 of these pages when done... only wakeup the
1796  *      pageout_scan thread if we moved pages from the global
1797  *      list... no need for the wakeup if we've satisfied the
1798  *      request from the per-cpu queue.
1799  */
1800
1801 #define COLOR_GROUPS_TO_STEAL   4
1802
1803
1804 vm_page_t
1805 vm_page_grab( void )
1806 {
1807         vm_page_t       mem;
1808
1809
1810         disable_preemption();
1811
1812         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1813 return_page_from_cpu_list:
1814                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1815                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1816                 mem->pageq.next = NULL;
1817
1818                 enable_preemption();
1819
1820                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1821                 assert(mem->tabled == FALSE);
1822                 assert(mem->object == VM_OBJECT_NULL);
1823                 assert(!mem->laundry);
1824                 assert(!mem->free);
1825                 assert(pmap_verify_free(mem->phys_page));
1826                 assert(mem->busy);
1827                 assert(!mem->encrypted);
1828                 assert(!mem->pmapped);
1829                 assert(!mem->wpmapped);
1830                 assert(!mem->active);
1831                 assert(!mem->inactive);
1832                 assert(!mem->throttled);
1833                 assert(!mem->speculative);
1834                 assert(!pmap_is_noencrypt(mem->phys_page));
1835
1836                 return mem;
1837         }
1838         enable_preemption();
1839
1840
1841         /*
1842          *      Optionally produce warnings if the wire or gobble
1843          *      counts exceed some threshold.
1844          */
1845         if (vm_page_wire_count_warning > 0
1846             && vm_page_wire_count >= vm_page_wire_count_warning) {
1847                 printf("mk: vm_page_grab(): high wired page count of %d\n",
1848                         vm_page_wire_count);
1849                 assert(vm_page_wire_count < vm_page_wire_count_warning);
1850         }
1851         if (vm_page_gobble_count_warning > 0
1852             && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1853                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1854                         vm_page_gobble_count);
1855                 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1856         }
1857
1858         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1859
1860         /*
1861          *      Only let privileged threads (involved in pageout)
1862          *      dip into the reserved pool.
1863          */
1864         if ((vm_page_free_count < vm_page_free_reserved) &&
1865             !(current_thread()->options & TH_OPT_VMPRIV)) {
1866                 lck_mtx_unlock(&vm_page_queue_free_lock);
1867                 mem = VM_PAGE_NULL;
1868         }
1869         else {
1870                vm_page_t        head;
1871                vm_page_t        tail;
1872                unsigned int     pages_to_steal;
1873                unsigned int     color;
1874
1875                while ( vm_page_free_count == 0 ) {
1876
1877                         lck_mtx_unlock(&vm_page_queue_free_lock);
1878                         /*
1879                          * must be a privileged thread to be
1880                          * in this state since a non-privileged
1881                          * thread would have bailed if we were
1882                          * under the vm_page_free_reserved mark
1883                          */
1884                         VM_PAGE_WAIT();
1885                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
1886                 }
1887
1888                 disable_preemption();
1889
1890                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1891                         lck_mtx_unlock(&vm_page_queue_free_lock);
1892
1893                         /*
1894                          * we got preempted and moved to another processor
1895                          * or we got preempted and someone else ran and filled the cache
1896                          */
1897                         goto return_page_from_cpu_list;
1898                 }
1899                 if (vm_page_free_count <= vm_page_free_reserved)
1900                         pages_to_steal = 1;
1901                 else {
1902                         pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1903
1904                         if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1905                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1906                 }
1907                 color = PROCESSOR_DATA(current_processor(), start_color);
1908                 head = tail = NULL;
1909
1910                 while (pages_to_steal--) {
1911                         if (--vm_page_free_count < vm_page_free_count_minimum)
1912                                 vm_page_free_count_minimum = vm_page_free_count;
1913
1914                         while (queue_empty(&vm_page_queue_free[color]))
1915                                 color = (color + 1) & vm_color_mask;
1916
1917                         queue_remove_first(&vm_page_queue_free[color],
1918                                            mem,
1919                                            vm_page_t,
1920                                            pageq);
1921                         mem->pageq.next = NULL;
1922                         mem->pageq.prev = NULL;
1923
1924                         assert(!mem->active);
1925                         assert(!mem->inactive);
1926                         assert(!mem->throttled);
1927                         assert(!mem->speculative);
1928
1929                         color = (color + 1) & vm_color_mask;
1930
1931                         if (head == NULL)
1932                                 head = mem;
1933                         else
1934                                 tail->pageq.next = (queue_t)mem;
1935                         tail = mem;
1936
1937                         mem->pageq.prev = NULL;
1938                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1939                         assert(mem->tabled == FALSE);
1940                         assert(mem->object == VM_OBJECT_NULL);
1941                         assert(!mem->laundry);
1942                         assert(mem->free);
1943                         mem->free = FALSE;
1944
1945                         assert(pmap_verify_free(mem->phys_page));
1946                         assert(mem->busy);
1947                         assert(!mem->free);
1948                         assert(!mem->encrypted);
1949                         assert(!mem->pmapped);
1950                         assert(!mem->wpmapped);
1951                         assert(!pmap_is_noencrypt(mem->phys_page));
1952                 }
1953                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1954                 PROCESSOR_DATA(current_processor(), start_color) = color;
1955
1956                 /*
1957                  * satisfy this request
1958                  */
1959                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1960                 mem = head;
1961                 mem->pageq.next = NULL;
1962
1963                 lck_mtx_unlock(&vm_page_queue_free_lock);
1964
1965                 enable_preemption();
1966         }
1967         /*
1968          *      Decide if we should poke the pageout daemon.
1969          *      We do this if the free count is less than the low
1970          *      water mark, or if the free count is less than the high
1971          *      water mark (but above the low water mark) and the inactive
1972          *      count is less than its target.
1973          *
1974          *      We don't have the counts locked ... if they change a little,
1975          *      it doesn't really matter.
1976          */
1977         if ((vm_page_free_count < vm_page_free_min) ||
1978              ((vm_page_free_count < vm_page_free_target) &&
1979               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1980                  thread_wakeup((event_t) &vm_page_free_wanted);
1981
1982         VM_CHECK_MEMORYSTATUS;
1983
1984 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
1985
1986         return mem;
1987 }
1988
1989 /*
1990  *      vm_page_release:
1991  *
1992  *      Return a page to the free list.
1993  */
1994
1995 void
1996 vm_page_release(
1997         register vm_page_t      mem)
1998 {
1999         unsigned int    color;
2000         int     need_wakeup = 0;
2001         int     need_priv_wakeup = 0;
2002
2003
2004         assert(!mem->private && !mem->fictitious);
2005         if (vm_page_free_verify) {
2006                 assert(pmap_verify_free(mem->phys_page));
2007         }
2008 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
2009
2010         pmap_clear_noencrypt(mem->phys_page);
2011
2012         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2013 #if DEBUG
2014         if (mem->free)
2015                 panic("vm_page_release");
2016 #endif
2017
2018         assert(mem->busy);
2019         assert(!mem->laundry);
2020         assert(mem->object == VM_OBJECT_NULL);
2021         assert(mem->pageq.next == NULL &&
2022                mem->pageq.prev == NULL);
2023         assert(mem->listq.next == NULL &&
2024                mem->listq.prev == NULL);
2025
2026         if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2027             vm_lopage_free_count < vm_lopage_free_limit &&
2028             mem->phys_page < max_valid_low_ppnum) {
2029                 /*
2030                  * this exists to support hardware controllers
2031                  * incapable of generating DMAs with more than 32 bits
2032                  * of address on platforms with physical memory > 4G...
2033                  */
2034                 queue_enter_first(&vm_lopage_queue_free,
2035                                   mem,
2036                                   vm_page_t,
2037                                   pageq);
2038                 vm_lopage_free_count++;
2039
2040                 if (vm_lopage_free_count >= vm_lopage_free_limit)
2041                         vm_lopage_refill = FALSE;
2042
2043                 mem->lopage = TRUE;
2044         } else {
2045                 mem->lopage = FALSE;
2046                 mem->free = TRUE;
2047
2048                 color = mem->phys_page & vm_color_mask;
2049                 queue_enter_first(&vm_page_queue_free[color],
2050                                   mem,
2051                                   vm_page_t,
2052                                   pageq);
2053                 vm_page_free_count++;
2054                 /*
2055                  *      Check if we should wake up someone waiting for page.
2056                  *      But don't bother waking them unless they can allocate.
2057                  *
2058                  *      We wakeup only one thread, to prevent starvation.
2059                  *      Because the scheduling system handles wait queues FIFO,
2060                  *      if we wakeup all waiting threads, one greedy thread
2061                  *      can starve multiple niceguy threads.  When the threads
2062                  *      all wakeup, the greedy threads runs first, grabs the page,
2063                  *      and waits for another page.  It will be the first to run
2064                  *      when the next page is freed.
2065                  *
2066                  *      However, there is a slight danger here.
2067                  *      The thread we wake might not use the free page.
2068                  *      Then the other threads could wait indefinitely
2069                  *      while the page goes unused.  To forestall this,
2070                  *      the pageout daemon will keep making free pages
2071                  *      as long as vm_page_free_wanted is non-zero.
2072                  */
2073
2074                 assert(vm_page_free_count > 0);
2075                 if (vm_page_free_wanted_privileged > 0) {
2076                         vm_page_free_wanted_privileged--;
2077                         need_priv_wakeup = 1;
2078                 } else if (vm_page_free_wanted > 0 &&
2079                            vm_page_free_count > vm_page_free_reserved) {
2080                         vm_page_free_wanted--;
2081                         need_wakeup = 1;
2082                 }
2083         }
2084         lck_mtx_unlock(&vm_page_queue_free_lock);
2085
2086         if (need_priv_wakeup)
2087                 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2088         else if (need_wakeup)
2089                 thread_wakeup_one((event_t) &vm_page_free_count);
2090
2091         VM_CHECK_MEMORYSTATUS;
2092 }
2093
2094 /*
2095  *      vm_page_wait:
2096  *
2097  *      Wait for a page to become available.
2098  *      If there are plenty of free pages, then we don't sleep.
2099  *
2100  *      Returns:
2101  *              TRUE:  There may be another page, try again
2102  *              FALSE: We were interrupted out of our wait, don't try again
2103  */
2104
2105 boolean_t
2106 vm_page_wait(
2107         int     interruptible )
2108 {
2109         /*
2110          *      We can't use vm_page_free_reserved to make this
2111          *      determination.  Consider: some thread might
2112          *      need to allocate two pages.  The first allocation
2113          *      succeeds, the second fails.  After the first page is freed,
2114          *      a call to vm_page_wait must really block.
2115          */
2116         kern_return_t   wait_result;
2117         int             need_wakeup = 0;
2118         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2119
2120         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2121
2122         if (is_privileged && vm_page_free_count) {
2123                 lck_mtx_unlock(&vm_page_queue_free_lock);
2124                 return TRUE;
2125         }
2126         if (vm_page_free_count < vm_page_free_target) {
2127
2128                 if (is_privileged) {
2129                         if (vm_page_free_wanted_privileged++ == 0)
2130                                 need_wakeup = 1;
2131                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2132                 } else {
2133                         if (vm_page_free_wanted++ == 0)
2134                                 need_wakeup = 1;
2135                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2136                 }
2137                 lck_mtx_unlock(&vm_page_queue_free_lock);
2138                 counter(c_vm_page_wait_block++);
2139
2140                 if (need_wakeup)
2141                         thread_wakeup((event_t)&vm_page_free_wanted);
2142
2143                 if (wait_result == THREAD_WAITING)
2144                         wait_result = thread_block(THREAD_CONTINUE_NULL);
2145
2146                 return(wait_result == THREAD_AWAKENED);
2147         } else {
2148                 lck_mtx_unlock(&vm_page_queue_free_lock);
2149                 return TRUE;
2150         }
2151 }
2152
2153 /*
2154  *      vm_page_alloc:
2155  *
2156  *      Allocate and return a memory cell associated
2157  *      with this VM object/offset pair.
2158  *
2159  *      Object must be locked.
2160  */
2161
2162 vm_page_t
2163 vm_page_alloc(
2164         vm_object_t             object,
2165         vm_object_offset_t      offset)
2166 {
2167         register vm_page_t      mem;
2168
2169         vm_object_lock_assert_exclusive(object);
2170         mem = vm_page_grab();
2171         if (mem == VM_PAGE_NULL)
2172                 return VM_PAGE_NULL;
2173
2174         vm_page_insert(mem, object, offset);
2175
2176         return(mem);
2177 }
2178
2179 vm_page_t
2180 vm_page_alloclo(
2181         vm_object_t             object,
2182         vm_object_offset_t      offset)
2183 {
2184         register vm_page_t      mem;
2185
2186         vm_object_lock_assert_exclusive(object);
2187         mem = vm_page_grablo();
2188         if (mem == VM_PAGE_NULL)
2189                 return VM_PAGE_NULL;
2190
2191         vm_page_insert(mem, object, offset);
2192
2193         return(mem);
2194 }
2195
2196
2197 /*
2198  *      vm_page_alloc_guard:
2199  *
2200  *      Allocate a fictitious page which will be used
2201  *      as a guard page.  The page will be inserted into
2202  *      the object and returned to the caller.
2203  */
2204
2205 vm_page_t
2206 vm_page_alloc_guard(
2207         vm_object_t             object,
2208         vm_object_offset_t      offset)
2209 {
2210         register vm_page_t      mem;
2211
2212         vm_object_lock_assert_exclusive(object);
2213         mem = vm_page_grab_guard();
2214         if (mem == VM_PAGE_NULL)
2215                 return VM_PAGE_NULL;
2216
2217         vm_page_insert(mem, object, offset);
2218
2219         return(mem);
2220 }
2221
2222
2223 counter(unsigned int c_laundry_pages_freed = 0;)
2224
2225 /*
2226  *      vm_page_free_prepare:
2227  *
2228  *      Removes page from any queue it may be on
2229  *      and disassociates it from its VM object.
2230  *
2231  *      Object and page queues must be locked prior to entry.
2232  */
2233 static void
2234 vm_page_free_prepare(
2235         vm_page_t       mem)
2236 {
2237         vm_page_free_prepare_queues(mem);
2238         vm_page_free_prepare_object(mem, TRUE);
2239 }
2240
2241
2242 void
2243 vm_page_free_prepare_queues(
2244         vm_page_t       mem)
2245 {
2246         VM_PAGE_CHECK(mem);
2247         assert(!mem->free);
2248         assert(!mem->cleaning);
2249 #if DEBUG
2250         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2251         if (mem->free)
2252                 panic("vm_page_free: freeing page on free list\n");
2253 #endif
2254         if (mem->object) {
2255                 vm_object_lock_assert_exclusive(mem->object);
2256         }
2257         if (mem->laundry) {
2258                 /*
2259                  * We may have to free a page while it's being laundered
2260                  * if we lost its pager (due to a forced unmount, for example).
2261                  * We need to call vm_pageout_steal_laundry() before removing
2262                  * the page from its VM object, so that we can remove it
2263                  * from its pageout queue and adjust the laundry accounting
2264                  */
2265                 vm_pageout_steal_laundry(mem, TRUE);
2266                 counter(++c_laundry_pages_freed);
2267         }
2268
2269         VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
2270
2271         if (VM_PAGE_WIRED(mem)) {
2272                 if (mem->object) {
2273                         assert(mem->object->wired_page_count > 0);
2274                         mem->object->wired_page_count--;
2275                         assert(mem->object->resident_page_count >=
2276                                mem->object->wired_page_count);
2277
2278                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2279                                 OSAddAtomic(+1, &vm_page_purgeable_count);
2280                                 assert(vm_page_purgeable_wired_count > 0);
2281                                 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2282                         }
2283                 }
2284                 if (!mem->private && !mem->fictitious)
2285                         vm_page_wire_count--;
2286                 mem->wire_count = 0;
2287                 assert(!mem->gobbled);
2288         } else if (mem->gobbled) {
2289                 if (!mem->private && !mem->fictitious)
2290                         vm_page_wire_count--;
2291                 vm_page_gobble_count--;
2292         }
2293 }
2294
2295
2296 void
2297 vm_page_free_prepare_object(
2298         vm_page_t       mem,
2299         boolean_t       remove_from_hash)
2300 {
2301         if (mem->tabled)
2302                 vm_page_remove(mem, remove_from_hash);  /* clears tabled, object, offset */
2303
2304         PAGE_WAKEUP(mem);               /* clears wanted */
2305
2306         if (mem->private) {
2307                 mem->private = FALSE;
2308                 mem->fictitious = TRUE;
2309                 mem->phys_page = vm_page_fictitious_addr;
2310         }
2311         if ( !mem->fictitious) {
2312                 vm_page_init(mem, mem->phys_page, mem->lopage);
2313         }
2314 }
2315
2316
2317 /*
2318  *      vm_page_free:
2319  *
2320  *      Returns the given page to the free list,
2321  *      disassociating it with any VM object.
2322  *
2323  *      Object and page queues must be locked prior to entry.
2324  */
2325 void
2326 vm_page_free(
2327         vm_page_t       mem)
2328 {
2329         vm_page_free_prepare(mem);
2330
2331         if (mem->fictitious) {
2332                 vm_page_release_fictitious(mem);
2333         } else {
2334                 vm_page_release(mem);
2335         }
2336 }
2337
2338
2339 void
2340 vm_page_free_unlocked(
2341         vm_page_t       mem,
2342         boolean_t       remove_from_hash)
2343 {
2344         vm_page_lockspin_queues();
2345         vm_page_free_prepare_queues(mem);
2346         vm_page_unlock_queues();
2347
2348         vm_page_free_prepare_object(mem, remove_from_hash);
2349
2350         if (mem->fictitious) {
2351                 vm_page_release_fictitious(mem);
2352         } else {
2353                 vm_page_release(mem);
2354         }
2355 }
2356
2357
2358 /*
2359  * Free a list of pages.  The list can be up to several hundred pages,
2360  * as blocked up by vm_pageout_scan().
2361  * The big win is not having to take the free list lock once
2362  * per page.
2363  */
2364 void
2365 vm_page_free_list(
2366         vm_page_t       freeq,
2367         boolean_t       prepare_object)
2368 {
2369         vm_page_t       mem;
2370         vm_page_t       nxt;
2371         vm_page_t       local_freeq;
2372         int             pg_count;
2373
2374         while (freeq) {
2375
2376                 pg_count = 0;
2377                 local_freeq = VM_PAGE_NULL;
2378                 mem = freeq;
2379
2380                 /*
2381                  * break up the processing into smaller chunks so
2382                  * that we can 'pipeline' the pages onto the
2383                  * free list w/o introducing too much
2384                  * contention on the global free queue lock
2385                  */
2386                 while (mem && pg_count < 64) {
2387
2388                         assert(!mem->inactive);
2389                         assert(!mem->active);
2390                         assert(!mem->throttled);
2391                         assert(!mem->free);
2392                         assert(!mem->speculative);
2393                         assert(!VM_PAGE_WIRED(mem));
2394                         assert(mem->pageq.prev == NULL);
2395
2396                         nxt = (vm_page_t)(mem->pageq.next);
2397
2398                         if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2399                                 assert(pmap_verify_free(mem->phys_page));
2400                         }
2401                         if (prepare_object == TRUE)
2402                                 vm_page_free_prepare_object(mem, TRUE);
2403
2404                         if (!mem->fictitious) {
2405                                 assert(mem->busy);
2406
2407                                 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2408                                     vm_lopage_free_count < vm_lopage_free_limit &&
2409                                     mem->phys_page < max_valid_low_ppnum) {
2410                                         mem->pageq.next = NULL;
2411                                         vm_page_release(mem);
2412                                 } else {
2413                                         /*
2414                                          * IMPORTANT: we can't set the page "free" here
2415                                          * because that would make the page eligible for
2416                                          * a physically-contiguous allocation (see
2417                                          * vm_page_find_contiguous()) right away (we don't
2418                                          * hold the vm_page_queue_free lock).  That would
2419                                          * cause trouble because the page is not actually
2420                                          * in the free queue yet...
2421                                          */
2422                                         mem->pageq.next = (queue_entry_t)local_freeq;
2423                                         local_freeq = mem;
2424                                         pg_count++;
2425
2426                                         pmap_clear_noencrypt(mem->phys_page);
2427                                 }
2428                         } else {
2429                                 assert(mem->phys_page == vm_page_fictitious_addr ||
2430                                        mem->phys_page == vm_page_guard_addr);
2431                                 vm_page_release_fictitious(mem);
2432                         }
2433                         mem = nxt;
2434                 }
2435                 freeq = mem;
2436
2437                 if ( (mem = local_freeq) ) {
2438                         unsigned int    avail_free_count;
2439                         unsigned int    need_wakeup = 0;
2440                         unsigned int    need_priv_wakeup = 0;
2441
2442                         lck_mtx_lock_spin(&vm_page_queue_free_lock);
2443
2444                         while (mem) {
2445                                 int     color;
2446
2447                                 nxt = (vm_page_t)(mem->pageq.next);
2448
2449                                 assert(!mem->free);
2450                                 assert(mem->busy);
2451                                 mem->free = TRUE;
2452
2453                                 color = mem->phys_page & vm_color_mask;
2454                                 queue_enter_first(&vm_page_queue_free[color],
2455                                                   mem,
2456                                                   vm_page_t,
2457                                                   pageq);
2458                                 mem = nxt;
2459                         }
2460                         vm_page_free_count += pg_count;
2461                         avail_free_count = vm_page_free_count;
2462
2463                         if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2464
2465                                 if (avail_free_count < vm_page_free_wanted_privileged) {
2466                                         need_priv_wakeup = avail_free_count;
2467                                         vm_page_free_wanted_privileged -= avail_free_count;
2468                                         avail_free_count = 0;
2469                                 } else {
2470                                         need_priv_wakeup = vm_page_free_wanted_privileged;
2471                                         vm_page_free_wanted_privileged = 0;
2472                                         avail_free_count -= vm_page_free_wanted_privileged;
2473                                 }
2474                         }
2475                         if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2476                                 unsigned int  available_pages;
2477
2478                                 available_pages = avail_free_count - vm_page_free_reserved;
2479
2480                                 if (available_pages >= vm_page_free_wanted) {
2481                                         need_wakeup = vm_page_free_wanted;
2482                                         vm_page_free_wanted = 0;
2483                                 } else {
2484                                         need_wakeup = available_pages;
2485                                         vm_page_free_wanted -= available_pages;
2486                                 }
2487                         }
2488                         lck_mtx_unlock(&vm_page_queue_free_lock);
2489
2490                         if (need_priv_wakeup != 0) {
2491                                 /*
2492                                  * There shouldn't be that many VM-privileged threads,
2493                                  * so let's wake them all up, even if we don't quite
2494                                  * have enough pages to satisfy them all.
2495                                  */
2496                                 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2497                         }
2498                         if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2499                                 /*
2500                                  * We don't expect to have any more waiters
2501                                  * after this, so let's wake them all up at
2502                                  * once.
2503                                  */
2504                                 thread_wakeup((event_t) &vm_page_free_count);
2505                         } else for (; need_wakeup != 0; need_wakeup--) {
2506                                 /*
2507                                  * Wake up one waiter per page we just released.
2508                                  */
2509                                 thread_wakeup_one((event_t) &vm_page_free_count);
2510                         }
2511
2512                         VM_CHECK_MEMORYSTATUS;
2513                 }
2514         }
2515 }
2516
2517
2518 /*
2519  *      vm_page_wire:
2520  *
2521  *      Mark this page as wired down by yet
2522  *      another map, removing it from paging queues
2523  *      as necessary.
2524  *
2525  *      The page's object and the page queues must be locked.
2526  */
2527 void
2528 vm_page_wire(
2529         register vm_page_t      mem)
2530 {
2531
2532 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2533
2534         VM_PAGE_CHECK(mem);
2535         if (mem->object) {
2536                 vm_object_lock_assert_exclusive(mem->object);
2537         } else {
2538                 /*
2539                  * In theory, the page should be in an object before it
2540                  * gets wired, since we need to hold the object lock
2541                  * to update some fields in the page structure.
2542                  * However, some code (i386 pmap, for example) might want
2543                  * to wire a page before it gets inserted into an object.
2544                  * That's somewhat OK, as long as nobody else can get to
2545                  * that page and update it at the same time.
2546                  */
2547         }
2548 #if DEBUG
2549         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2550 #endif
2551         if ( !VM_PAGE_WIRED(mem)) {
2552
2553                 if (mem->pageout_queue) {
2554                         mem->pageout = FALSE;
2555                         vm_pageout_throttle_up(mem);
2556                 }
2557                 VM_PAGE_QUEUES_REMOVE(mem);
2558
2559                 if (mem->object) {
2560                         mem->object->wired_page_count++;
2561                         assert(mem->object->resident_page_count >=
2562                                mem->object->wired_page_count);
2563                         if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2564                                 assert(vm_page_purgeable_count > 0);
2565                                 OSAddAtomic(-1, &vm_page_purgeable_count);
2566                                 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2567                         }
2568                         if (mem->object->all_reusable) {
2569                                 /*
2570                                  * Wired pages are not counted as "re-usable"
2571                                  * in "all_reusable" VM objects, so nothing
2572                                  * to do here.
2573                                  */
2574                         } else if (mem->reusable) {
2575                                 /*
2576                                  * This page is not "re-usable" when it's
2577                                  * wired, so adjust its state and the
2578                                  * accounting.
2579                                  */
2580                                 vm_object_reuse_pages(mem->object,
2581                                                       mem->offset,
2582                                                       mem->offset+PAGE_SIZE_64,
2583                                                       FALSE);
2584                         }
2585                 }
2586                 assert(!mem->reusable);
2587
2588                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2589                         vm_page_wire_count++;
2590                 if (mem->gobbled)
2591                         vm_page_gobble_count--;
2592                 mem->gobbled = FALSE;
2593
2594                 VM_CHECK_MEMORYSTATUS;
2595
2596                 /*
2597                  * ENCRYPTED SWAP:
2598                  * The page could be encrypted, but
2599                  * We don't have to decrypt it here
2600                  * because we don't guarantee that the
2601                  * data is actually valid at this point.
2602                  * The page will get decrypted in
2603                  * vm_fault_wire() if needed.
2604                  */
2605         }
2606         assert(!mem->gobbled);
2607         mem->wire_count++;
2608         VM_PAGE_CHECK(mem);
2609 }
2610
2611 /*
2612  *      vm_page_gobble:
2613  *
2614  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2615  *
2616  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2617  */
2618 void
2619 vm_page_gobble(
2620         register vm_page_t      mem)
2621 {
2622         vm_page_lockspin_queues();
2623         VM_PAGE_CHECK(mem);
2624
2625         assert(!mem->gobbled);
2626         assert( !VM_PAGE_WIRED(mem));
2627
2628         if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2629                 if (!mem->private && !mem->fictitious)
2630                         vm_page_wire_count++;
2631         }
2632         vm_page_gobble_count++;
2633         mem->gobbled = TRUE;
2634         vm_page_unlock_queues();
2635 }
2636
2637 /*
2638  *      vm_page_unwire:
2639  *
2640  *      Release one wiring of this page, potentially
2641  *      enabling it to be paged again.
2642  *
2643  *      The page's object and the page queues must be locked.
2644  */
2645 void
2646 vm_page_unwire(
2647         vm_page_t       mem,
2648         boolean_t       queueit)
2649 {
2650
2651 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2652
2653         VM_PAGE_CHECK(mem);
2654         assert(VM_PAGE_WIRED(mem));
2655         assert(mem->object != VM_OBJECT_NULL);
2656 #if DEBUG
2657         vm_object_lock_assert_exclusive(mem->object);
2658         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2659 #endif
2660         if (--mem->wire_count == 0) {
2661                 assert(!mem->private && !mem->fictitious);
2662                 vm_page_wire_count--;
2663                 assert(mem->object->wired_page_count > 0);
2664                 mem->object->wired_page_count--;
2665                 assert(mem->object->resident_page_count >=
2666                        mem->object->wired_page_count);
2667                 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2668                         OSAddAtomic(+1, &vm_page_purgeable_count);
2669                         assert(vm_page_purgeable_wired_count > 0);
2670                         OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2671                 }
2672                 assert(!mem->laundry);
2673                 assert(mem->object != kernel_object);
2674                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2675
2676                 if (queueit == TRUE) {
2677                         if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2678                                 vm_page_deactivate(mem);
2679                         } else {
2680                                 vm_page_activate(mem);
2681                         }
2682                 }
2683
2684                 VM_CHECK_MEMORYSTATUS;
2685
2686         }
2687         VM_PAGE_CHECK(mem);
2688 }
2689
2690 /*
2691  *      vm_page_deactivate:
2692  *
2693  *      Returns the given page to the inactive list,
2694  *      indicating that no physical maps have access
2695  *      to this page.  [Used by the physical mapping system.]
2696  *
2697  *      The page queues must be locked.
2698  */
2699 void
2700 vm_page_deactivate(
2701         vm_page_t       m)
2702 {
2703         vm_page_deactivate_internal(m, TRUE);
2704 }
2705
2706
2707 void
2708 vm_page_deactivate_internal(
2709         vm_page_t       m,
2710         boolean_t       clear_hw_reference)
2711 {
2712
2713         VM_PAGE_CHECK(m);
2714         assert(m->object != kernel_object);
2715         assert(m->phys_page != vm_page_guard_addr);
2716
2717 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
2718 #if DEBUG
2719         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2720 #endif
2721         /*
2722          *      This page is no longer very interesting.  If it was
2723          *      interesting (active or inactive/referenced), then we
2724          *      clear the reference bit and (re)enter it in the
2725          *      inactive queue.  Note wired pages should not have
2726          *      their reference bit cleared.
2727          */
2728         assert ( !(m->absent && !m->unusual));
2729
2730         if (m->gobbled) {               /* can this happen? */
2731                 assert( !VM_PAGE_WIRED(m));
2732
2733                 if (!m->private && !m->fictitious)
2734                         vm_page_wire_count--;
2735                 vm_page_gobble_count--;
2736                 m->gobbled = FALSE;
2737         }
2738         /*
2739          * if this page is currently on the pageout queue, we can't do the
2740          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2741          * and we can't remove it manually since we would need the object lock
2742          * (which is not required here) to decrement the activity_in_progress
2743          * reference which is held on the object while the page is in the pageout queue...
2744          * just let the normal laundry processing proceed
2745          */
2746         if (m->pageout_queue || m->private || m->fictitious || (VM_PAGE_WIRED(m)))
2747                 return;
2748
2749         if (!m->absent && clear_hw_reference == TRUE)
2750                 pmap_clear_reference(m->phys_page);
2751
2752         m->reference = FALSE;
2753         m->no_cache = FALSE;
2754
2755         if (!m->inactive) {
2756                 VM_PAGE_QUEUES_REMOVE(m);
2757
2758                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2759                     m->dirty && m->object->internal &&
2760                     (m->object->purgable == VM_PURGABLE_DENY ||
2761                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2762                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2763                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2764                         m->throttled = TRUE;
2765                         vm_page_throttled_count++;
2766                 } else {
2767                         if (m->object->named && m->object->ref_count == 1) {
2768                                 vm_page_speculate(m, FALSE);
2769 #if DEVELOPMENT || DEBUG
2770                                 vm_page_speculative_recreated++;
2771 #endif
2772                         } else {
2773                                 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2774                         }
2775                 }
2776         }
2777 }
2778
2779 /*
2780  * vm_page_enqueue_cleaned
2781  *
2782  * Put the page on the cleaned queue, mark it cleaned, etc.
2783  * Being on the cleaned queue (and having m->clean_queue set)
2784  * does ** NOT ** guarantee that the page is clean!
2785  *
2786  * Call with the queues lock held.
2787  */
2788
2789 void vm_page_enqueue_cleaned(vm_page_t m)
2790 {
2791         assert(m->phys_page != vm_page_guard_addr);
2792 #if DEBUG
2793         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2794 #endif
2795         assert( !(m->absent && !m->unusual));
2796
2797         if (m->gobbled) {
2798                 assert( !VM_PAGE_WIRED(m));
2799                 if (!m->private && !m->fictitious)
2800                         vm_page_wire_count--;
2801                 vm_page_gobble_count--;
2802                 m->gobbled = FALSE;
2803         }
2804         /*
2805          * if this page is currently on the pageout queue, we can't do the
2806          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2807          * and we can't remove it manually since we would need the object lock
2808          * (which is not required here) to decrement the activity_in_progress
2809          * reference which is held on the object while the page is in the pageout queue...
2810          * just let the normal laundry processing proceed
2811          */
2812         if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2813                 return;
2814
2815         VM_PAGE_QUEUES_REMOVE(m);
2816
2817         queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2818         m->clean_queue = TRUE;
2819         vm_page_cleaned_count++;
2820
2821         m->inactive = TRUE;
2822         vm_page_inactive_count++;
2823
2824         vm_pageout_enqueued_cleaned++;
2825 }
2826
2827 /*
2828  *      vm_page_activate:
2829  *
2830  *      Put the specified page on the active list (if appropriate).
2831  *
2832  *      The page queues must be locked.
2833  */
2834
2835 void
2836 vm_page_activate(
2837         register vm_page_t      m)
2838 {
2839         VM_PAGE_CHECK(m);
2840 #ifdef  FIXME_4778297
2841         assert(m->object != kernel_object);
2842 #endif
2843         assert(m->phys_page != vm_page_guard_addr);
2844 #if DEBUG
2845         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2846 #endif
2847         assert( !(m->absent && !m->unusual));
2848
2849         if (m->gobbled) {
2850                 assert( !VM_PAGE_WIRED(m));
2851                 if (!m->private && !m->fictitious)
2852                         vm_page_wire_count--;
2853                 vm_page_gobble_count--;
2854                 m->gobbled = FALSE;
2855         }
2856         /*
2857          * if this page is currently on the pageout queue, we can't do the
2858          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2859          * and we can't remove it manually since we would need the object lock
2860          * (which is not required here) to decrement the activity_in_progress
2861          * reference which is held on the object while the page is in the pageout queue...
2862          * just let the normal laundry processing proceed
2863          */
2864         if (m->pageout_queue || m->private || m->fictitious)
2865                 return;
2866
2867 #if DEBUG
2868         if (m->active)
2869                 panic("vm_page_activate: already active");
2870 #endif
2871
2872         if (m->speculative) {
2873                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2874                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2875         }
2876
2877         VM_PAGE_QUEUES_REMOVE(m);
2878
2879         if ( !VM_PAGE_WIRED(m)) {
2880
2881                 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2882                     m->dirty && m->object->internal &&
2883                     (m->object->purgable == VM_PURGABLE_DENY ||
2884                      m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2885                      m->object->purgable == VM_PURGABLE_VOLATILE)) {
2886                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2887                         m->throttled = TRUE;
2888                         vm_page_throttled_count++;
2889                 } else {
2890                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2891                         m->active = TRUE;
2892                         vm_page_active_count++;
2893                 }
2894                 m->reference = TRUE;
2895                 m->no_cache = FALSE;
2896         }
2897         VM_PAGE_CHECK(m);
2898 }
2899
2900
2901 /*
2902  *      vm_page_speculate:
2903  *
2904  *      Put the specified page on the speculative list (if appropriate).
2905  *
2906  *      The page queues must be locked.
2907  */
2908 void
2909 vm_page_speculate(
2910         vm_page_t       m,
2911         boolean_t       new)
2912 {
2913         struct vm_speculative_age_q     *aq;
2914
2915         VM_PAGE_CHECK(m);
2916         assert(m->object != kernel_object);
2917         assert(m->phys_page != vm_page_guard_addr);
2918 #if DEBUG
2919         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2920 #endif
2921         assert( !(m->absent && !m->unusual));
2922
2923         /*
2924          * if this page is currently on the pageout queue, we can't do the
2925          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2926          * and we can't remove it manually since we would need the object lock
2927          * (which is not required here) to decrement the activity_in_progress
2928          * reference which is held on the object while the page is in the pageout queue...
2929          * just let the normal laundry processing proceed
2930          */
2931         if (m->pageout_queue || m->private || m->fictitious)
2932                 return;
2933
2934         VM_PAGE_QUEUES_REMOVE(m);
2935
2936         if ( !VM_PAGE_WIRED(m)) {
2937                 mach_timespec_t         ts;
2938                 clock_sec_t sec;
2939                 clock_nsec_t nsec;
2940
2941                 clock_get_system_nanotime(&sec, &nsec);
2942                 ts.tv_sec = (unsigned int) sec;
2943                 ts.tv_nsec = nsec;
2944
2945                 if (vm_page_speculative_count == 0) {
2946
2947                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2948                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2949
2950                         aq = &vm_page_queue_speculative[speculative_age_index];
2951
2952                         /*
2953                          * set the timer to begin a new group
2954                          */
2955                         aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2956                         aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2957
2958                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2959                 } else {
2960                         aq = &vm_page_queue_speculative[speculative_age_index];
2961
2962                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2963
2964                                 speculative_age_index++;
2965
2966                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2967                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2968                                 if (speculative_age_index == speculative_steal_index) {
2969                                         speculative_steal_index = speculative_age_index + 1;
2970
2971                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2972                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2973                                 }
2974                                 aq = &vm_page_queue_speculative[speculative_age_index];
2975
2976                                 if (!queue_empty(&aq->age_q))
2977                                         vm_page_speculate_ageit(aq);
2978
2979                                 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2980                                 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2981
2982                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2983                         }
2984                 }
2985                 enqueue_tail(&aq->age_q, &m->pageq);
2986                 m->speculative = TRUE;
2987                 vm_page_speculative_count++;
2988
2989                 if (new == TRUE) {
2990                         vm_object_lock_assert_exclusive(m->object);
2991
2992                         m->object->pages_created++;
2993 #if DEVELOPMENT || DEBUG
2994                         vm_page_speculative_created++;
2995 #endif
2996                 }
2997         }
2998         VM_PAGE_CHECK(m);
2999 }
3000
3001
3002 /*
3003  * move pages from the specified aging bin to
3004  * the speculative bin that pageout_scan claims from
3005  *
3006  *      The page queues must be locked.
3007  */
3008 void
3009 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3010 {
3011         struct vm_speculative_age_q     *sq;
3012         vm_page_t       t;
3013
3014         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3015
3016         if (queue_empty(&sq->age_q)) {
3017                 sq->age_q.next = aq->age_q.next;
3018                 sq->age_q.prev = aq->age_q.prev;
3019
3020                 t = (vm_page_t)sq->age_q.next;
3021                 t->pageq.prev = &sq->age_q;
3022
3023                 t = (vm_page_t)sq->age_q.prev;
3024                 t->pageq.next = &sq->age_q;
3025         } else {
3026                 t = (vm_page_t)sq->age_q.prev;
3027                 t->pageq.next = aq->age_q.next;
3028
3029                 t = (vm_page_t)aq->age_q.next;
3030                 t->pageq.prev = sq->age_q.prev;
3031
3032                 t = (vm_page_t)aq->age_q.prev;
3033                 t->pageq.next = &sq->age_q;
3034
3035                 sq->age_q.prev = aq->age_q.prev;
3036         }
3037         queue_init(&aq->age_q);
3038 }
3039
3040
3041 void
3042 vm_page_lru(
3043         vm_page_t       m)
3044 {
3045         VM_PAGE_CHECK(m);
3046         assert(m->object != kernel_object);
3047         assert(m->phys_page != vm_page_guard_addr);
3048
3049 #if DEBUG
3050         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051 #endif
3052         /*
3053          * if this page is currently on the pageout queue, we can't do the
3054          * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3055          * and we can't remove it manually since we would need the object lock
3056          * (which is not required here) to decrement the activity_in_progress
3057          * reference which is held on the object while the page is in the pageout queue...
3058          * just let the normal laundry processing proceed
3059          */
3060         if (m->pageout_queue || m->private || (VM_PAGE_WIRED(m)))
3061                 return;
3062
3063         m->no_cache = FALSE;
3064
3065         VM_PAGE_QUEUES_REMOVE(m);
3066
3067         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3068 }
3069
3070
3071 void
3072 vm_page_reactivate_all_throttled(void)
3073 {
3074         vm_page_t       first_throttled, last_throttled;
3075         vm_page_t       first_active;
3076         vm_page_t       m;
3077         int             extra_active_count;
3078
3079         if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3080                 return;
3081
3082         extra_active_count = 0;
3083         vm_page_lock_queues();
3084         if (! queue_empty(&vm_page_queue_throttled)) {
3085                 /*
3086                  * Switch "throttled" pages to "active".
3087                  */
3088                 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3089                         VM_PAGE_CHECK(m);
3090                         assert(m->throttled);
3091                         assert(!m->active);
3092                         assert(!m->inactive);
3093                         assert(!m->speculative);
3094                         assert(!VM_PAGE_WIRED(m));
3095
3096                         extra_active_count++;
3097
3098                         m->throttled = FALSE;
3099                         m->active = TRUE;
3100                         VM_PAGE_CHECK(m);
3101                 }
3102
3103                 /*
3104                  * Transfer the entire throttled queue to a regular LRU page queues.
3105                  * We insert it at the head of the active queue, so that these pages
3106                  * get re-evaluated by the LRU algorithm first, since they've been
3107                  * completely out of it until now.
3108                  */
3109                 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3110                 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3111                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3112                 if (queue_empty(&vm_page_queue_active)) {
3113                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3114                 } else {
3115                         queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3116                 }
3117                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3118                 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3119                 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3120
3121 #if DEBUG
3122                 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3123 #endif
3124                 queue_init(&vm_page_queue_throttled);
3125                 /*
3126                  * Adjust the global page counts.
3127                  */
3128                 vm_page_active_count += extra_active_count;
3129                 vm_page_throttled_count = 0;
3130         }
3131         assert(vm_page_throttled_count == 0);
3132         assert(queue_empty(&vm_page_queue_throttled));
3133         vm_page_unlock_queues();
3134 }
3135
3136
3137 /*
3138  * move pages from the indicated local queue to the global active queue
3139  * its ok to fail if we're below the hard limit and force == FALSE
3140  * the nolocks == TRUE case is to allow this function to be run on
3141  * the hibernate path
3142  */
3143
3144 void
3145 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3146 {
3147         struct vpl      *lq;
3148         vm_page_t       first_local, last_local;
3149         vm_page_t       first_active;
3150         vm_page_t       m;
3151         uint32_t        count = 0;
3152
3153         if (vm_page_local_q == NULL)
3154                 return;
3155
3156         lq = &vm_page_local_q[lid].vpl_un.vpl;
3157
3158         if (nolocks == FALSE) {
3159                 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3160                         if ( !vm_page_trylockspin_queues())
3161                                 return;
3162                 } else
3163                         vm_page_lockspin_queues();
3164
3165                 VPL_LOCK(&lq->vpl_lock);
3166         }
3167         if (lq->vpl_count) {
3168                 /*
3169                  * Switch "local" pages to "active".
3170                  */
3171                 assert(!queue_empty(&lq->vpl_queue));
3172
3173                 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3174                         VM_PAGE_CHECK(m);
3175                         assert(m->local);
3176                         assert(!m->active);
3177                         assert(!m->inactive);
3178                         assert(!m->speculative);
3179                         assert(!VM_PAGE_WIRED(m));
3180                         assert(!m->throttled);
3181                         assert(!m->fictitious);
3182
3183                         if (m->local_id != lid)
3184                                 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3185
3186                         m->local_id = 0;
3187                         m->local = FALSE;
3188                         m->active = TRUE;
3189                         VM_PAGE_CHECK(m);
3190
3191                         count++;
3192                 }
3193                 if (count != lq->vpl_count)
3194                         panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3195
3196                 /*
3197                  * Transfer the entire local queue to a regular LRU page queues.
3198                  */
3199                 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3200                 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3201                 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3202
3203                 if (queue_empty(&vm_page_queue_active)) {
3204                         queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3205                 } else {
3206                         queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3207                 }
3208                 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3209                 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3210                 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3211
3212                 queue_init(&lq->vpl_queue);
3213                 /*
3214                  * Adjust the global page counts.
3215                  */
3216                 vm_page_active_count += lq->vpl_count;
3217                 lq->vpl_count = 0;
3218         }
3219         assert(queue_empty(&lq->vpl_queue));
3220
3221         if (nolocks == FALSE) {
3222                 VPL_UNLOCK(&lq->vpl_lock);
3223                 vm_page_unlock_queues();
3224         }
3225 }
3226
3227 /*
3228  *      vm_page_part_zero_fill:
3229  *
3230  *      Zero-fill a part of the page.
3231  */
3232 void
3233 vm_page_part_zero_fill(
3234         vm_page_t       m,
3235         vm_offset_t     m_pa,
3236         vm_size_t       len)
3237 {
3238         vm_page_t       tmp;
3239
3240 #if 0
3241         /*
3242          * we don't hold the page queue lock
3243          * so this check isn't safe to make
3244          */
3245         VM_PAGE_CHECK(m);
3246 #endif
3247
3248 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3249         pmap_zero_part_page(m->phys_page, m_pa, len);
3250 #else
3251         while (1) {
3252                 tmp = vm_page_grab();
3253                 if (tmp == VM_PAGE_NULL) {
3254                         vm_page_wait(THREAD_UNINT);
3255                         continue;
3256                 }
3257                 break;
3258         }
3259         vm_page_zero_fill(tmp);
3260         if(m_pa != 0) {
3261                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3262         }
3263         if((m_pa + len) <  PAGE_SIZE) {
3264                 vm_page_part_copy(m, m_pa + len, tmp,
3265                                 m_pa + len, PAGE_SIZE - (m_pa + len));
3266         }
3267         vm_page_copy(tmp,m);
3268         VM_PAGE_FREE(tmp);
3269 #endif
3270
3271 }
3272
3273 /*
3274  *      vm_page_zero_fill:
3275  *
3276  *      Zero-fill the specified page.
3277  */
3278 void
3279 vm_page_zero_fill(
3280         vm_page_t       m)
3281 {
3282         XPR(XPR_VM_PAGE,
3283                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3284                 m->object, m->offset, m, 0,0);
3285 #if 0
3286         /*
3287          * we don't hold the page queue lock
3288          * so this check isn't safe to make
3289          */
3290         VM_PAGE_CHECK(m);
3291 #endif
3292
3293 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
3294         pmap_zero_page(m->phys_page);
3295 }
3296
3297 /*
3298  *      vm_page_part_copy:
3299  *
3300  *      copy part of one page to another
3301  */
3302
3303 void
3304 vm_page_part_copy(
3305         vm_page_t       src_m,
3306         vm_offset_t     src_pa,
3307         vm_page_t       dst_m,
3308         vm_offset_t     dst_pa,
3309         vm_size_t       len)
3310 {
3311 #if 0
3312         /*
3313          * we don't hold the page queue lock
3314          * so this check isn't safe to make
3315          */
3316         VM_PAGE_CHECK(src_m);
3317         VM_PAGE_CHECK(dst_m);
3318 #endif
3319         pmap_copy_part_page(src_m->phys_page, src_pa,
3320                         dst_m->phys_page, dst_pa, len);
3321 }
3322
3323 /*
3324  *      vm_page_copy:
3325  *
3326  *      Copy one page to another
3327  *
3328  * ENCRYPTED SWAP:
3329  * The source page should not be encrypted.  The caller should
3330  * make sure the page is decrypted first, if necessary.
3331  */
3332
3333 int vm_page_copy_cs_validations = 0;
3334 int vm_page_copy_cs_tainted = 0;
3335
3336 void
3337 vm_page_copy(
3338         vm_page_t       src_m,
3339         vm_page_t       dest_m)
3340 {
3341         XPR(XPR_VM_PAGE,
3342         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3343         src_m->object, src_m->offset,
3344         dest_m->object, dest_m->offset,
3345         0);
3346 #if 0
3347         /*
3348          * we don't hold the page queue lock
3349          * so this check isn't safe to make
3350          */
3351         VM_PAGE_CHECK(src_m);
3352         VM_PAGE_CHECK(dest_m);
3353 #endif
3354         vm_object_lock_assert_held(src_m->object);
3355
3356         /*
3357          * ENCRYPTED SWAP:
3358          * The source page should not be encrypted at this point.
3359          * The destination page will therefore not contain encrypted
3360          * data after the copy.
3361          */
3362         if (src_m->encrypted) {
3363                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3364         }
3365         dest_m->encrypted = FALSE;
3366
3367         if (src_m->object != VM_OBJECT_NULL &&
3368             src_m->object->code_signed) {
3369                 /*
3370                  * We're copying a page from a code-signed object.
3371                  * Whoever ends up mapping the copy page might care about
3372                  * the original page's integrity, so let's validate the
3373                  * source page now.
3374                  */
3375                 vm_page_copy_cs_validations++;
3376                 vm_page_validate_cs(src_m);
3377         }
3378
3379         if (vm_page_is_slideable(src_m)) {
3380                 boolean_t was_busy = src_m->busy;
3381                 src_m->busy = TRUE;
3382                 (void) vm_page_slide(src_m, 0);
3383                 assert(src_m->busy);
3384                 if (!was_busy) {
3385                         PAGE_WAKEUP_DONE(src_m);
3386                 }
3387         }
3388
3389         /*
3390          * Propagate the cs_tainted bit to the copy page. Do not propagate
3391          * the cs_validated bit.
3392          */
3393         dest_m->cs_tainted = src_m->cs_tainted;
3394         if (dest_m->cs_tainted) {
3395                 vm_page_copy_cs_tainted++;
3396         }
3397         dest_m->slid = src_m->slid;
3398         dest_m->error = src_m->error; /* sliding src_m might have failed... */
3399         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3400 }
3401
3402 #if MACH_ASSERT
3403 static void
3404 _vm_page_print(
3405         vm_page_t       p)
3406 {
3407         printf("vm_page %p: \n", p);
3408         printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3409         printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3410         printf("  next=%p\n", p->next);
3411         printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3412         printf("  wire_count=%u\n", p->wire_count);
3413
3414         printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3415                (p->local ? "" : "!"),
3416                (p->inactive ? "" : "!"),
3417                (p->active ? "" : "!"),
3418                (p->pageout_queue ? "" : "!"),
3419                (p->speculative ? "" : "!"),
3420                (p->laundry ? "" : "!"));
3421         printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3422                (p->free ? "" : "!"),
3423                (p->reference ? "" : "!"),
3424                (p->gobbled ? "" : "!"),
3425                (p->private ? "" : "!"),
3426                (p->throttled ? "" : "!"));
3427         printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3428                 (p->busy ? "" : "!"),
3429                 (p->wanted ? "" : "!"),
3430                 (p->tabled ? "" : "!"),
3431                 (p->fictitious ? "" : "!"),
3432                 (p->pmapped ? "" : "!"),
3433                 (p->wpmapped ? "" : "!"));
3434         printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3435                (p->pageout ? "" : "!"),
3436                (p->absent ? "" : "!"),
3437                (p->error ? "" : "!"),
3438                (p->dirty ? "" : "!"),
3439                (p->cleaning ? "" : "!"),
3440                (p->precious ? "" : "!"),
3441                (p->clustered ? "" : "!"));
3442         printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3443                (p->overwriting ? "" : "!"),
3444                (p->restart ? "" : "!"),
3445                (p->unusual ? "" : "!"),
3446                (p->encrypted ? "" : "!"),
3447                (p->encrypted_cleaning ? "" : "!"));
3448         printf("  %scs_validated, %scs_tainted, %sno_cache\n",
3449                (p->cs_validated ? "" : "!"),
3450                (p->cs_tainted ? "" : "!"),
3451                (p->no_cache ? "" : "!"));
3452
3453         printf("phys_page=0x%x\n", p->phys_page);
3454 }
3455
3456 /*
3457  *      Check that the list of pages is ordered by
3458  *      ascending physical address and has no holes.
3459  */
3460 static int
3461 vm_page_verify_contiguous(
3462         vm_page_t       pages,
3463         unsigned int    npages)
3464 {
3465         register vm_page_t      m;
3466         unsigned int            page_count;
3467         vm_offset_t             prev_addr;
3468
3469         prev_addr = pages->phys_page;
3470         page_count = 1;
3471         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3472                 if (m->phys_page != prev_addr + 1) {
3473                         printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3474                                m, (long)prev_addr, m->phys_page);
3475                         printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3476                         panic("vm_page_verify_contiguous:  not contiguous!");
3477                 }
3478                 prev_addr = m->phys_page;
3479                 ++page_count;
3480         }
3481         if (page_count != npages) {
3482                 printf("pages %p actual count 0x%x but requested 0x%x\n",
3483                        pages, page_count, npages);
3484                 panic("vm_page_verify_contiguous:  count error");
3485         }
3486         return 1;
3487 }
3488
3489
3490 /*
3491  *      Check the free lists for proper length etc.
3492  */
3493 static unsigned int
3494 vm_page_verify_free_list(
3495         queue_head_t    *vm_page_queue,
3496         unsigned int    color,
3497         vm_page_t       look_for_page,
3498         boolean_t       expect_page)
3499 {
3500         unsigned int    npages;
3501         vm_page_t       m;
3502         vm_page_t       prev_m;
3503         boolean_t       found_page;
3504
3505         found_page = FALSE;
3506         npages = 0;
3507         prev_m = (vm_page_t) vm_page_queue;
3508         queue_iterate(vm_page_queue,
3509                       m,
3510                       vm_page_t,
3511                       pageq) {
3512
3513                 if (m == look_for_page) {
3514                         found_page = TRUE;
3515                 }
3516                 if ((vm_page_t) m->pageq.prev != prev_m)
3517                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3518                               color, npages, m, m->pageq.prev, prev_m);
3519                 if ( ! m->busy )
3520                         panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3521                               color, npages, m);
3522                 if (color != (unsigned int) -1) {
3523                         if ((m->phys_page & vm_color_mask) != color)
3524                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3525                                       color, npages, m, m->phys_page & vm_color_mask, color);
3526                         if ( ! m->free )
3527                                 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3528                                       color, npages, m);
3529                 }
3530                 ++npages;
3531                 prev_m = m;
3532         }
3533         if (look_for_page != VM_PAGE_NULL) {
3534                 unsigned int other_color;
3535
3536                 if (expect_page && !found_page) {
3537                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3538                                color, npages, look_for_page, look_for_page->phys_page);
3539                         _vm_page_print(look_for_page);
3540                         for (other_color = 0;
3541                              other_color < vm_colors;
3542                              other_color++) {
3543                                 if (other_color == color)
3544                                         continue;
3545                                 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3546                                                          other_color, look_for_page, FALSE);
3547                         }
3548                         if (color == (unsigned int) -1) {
3549                                 vm_page_verify_free_list(&vm_lopage_queue_free,
3550                                                          (unsigned int) -1, look_for_page, FALSE);
3551                         }
3552                         panic("vm_page_verify_free_list(color=%u)\n", color);
3553                 }
3554                 if (!expect_page && found_page) {
3555                         printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3556                                color, npages, look_for_page, look_for_page->phys_page);
3557                 }
3558         }
3559         return npages;
3560 }
3561
3562 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3563 static void
3564 vm_page_verify_free_lists( void )
3565 {
3566         unsigned int    color, npages, nlopages;
3567
3568         if (! vm_page_verify_free_lists_enabled)
3569                 return;
3570
3571         npages = 0;
3572
3573         lck_mtx_lock(&vm_page_queue_free_lock);
3574
3575         for( color = 0; color < vm_colors; color++ ) {
3576                 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3577                                                    color, VM_PAGE_NULL, FALSE);
3578         }
3579         nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3580                                             (unsigned int) -1,
3581                                             VM_PAGE_NULL, FALSE);
3582         if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3583                 panic("vm_page_verify_free_lists:  "
3584                       "npages %u free_count %d nlopages %u lo_free_count %u",
3585                       npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3586
3587         lck_mtx_unlock(&vm_page_queue_free_lock);
3588 }
3589
3590 void
3591 vm_page_queues_assert(
3592         vm_page_t       mem,
3593         int             val)
3594 {
3595 #if DEBUG
3596         lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3597 #endif
3598         if (mem->free + mem->active + mem->inactive + mem->speculative +
3599             mem->throttled + mem->pageout_queue > (val)) {
3600                 _vm_page_print(mem);
3601                 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3602         }
3603         if (VM_PAGE_WIRED(mem)) {
3604                 assert(!mem->active);
3605                 assert(!mem->inactive);
3606                 assert(!mem->speculative);
3607                 assert(!mem->throttled);
3608                 assert(!mem->pageout_queue);
3609         }
3610 }
3611 #endif  /* MACH_ASSERT */
3612
3613
3614 /*
3615  *      CONTIGUOUS PAGE ALLOCATION
3616  *
3617  *      Find a region large enough to contain at least n pages
3618  *      of contiguous physical memory.
3619  *
3620  *      This is done by traversing the vm_page_t array in a linear fashion
3621  *      we assume that the vm_page_t array has the avaiable physical pages in an
3622  *      ordered, ascending list... this is currently true of all our implementations
3623  *      and must remain so... there can be 'holes' in the array...  we also can
3624  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3625  *      which use to happen via 'vm_page_convert'... that function was no longer
3626  *      being called and was removed...
3627  *
3628  *      The basic flow consists of stabilizing some of the interesting state of
3629  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3630  *      sweep at the beginning of the array looking for pages that meet our criterea
3631  *      for a 'stealable' page... currently we are pretty conservative... if the page
3632  *      meets this criterea and is physically contiguous to the previous page in the 'run'
3633  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
3634  *      and start to develop a new run... if at this point we've already considered
3635  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3636  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
3637  *      to other threads trying to acquire free pages (or move pages from q to q),
3638  *      and then continue from the spot we left off... we only make 1 pass through the
3639  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
3640  *      which steals the pages from the queues they're currently on... pages on the free
3641  *      queue can be stolen directly... pages that are on any of the other queues
3642  *      must be removed from the object they are tabled on... this requires taking the
3643  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3644  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
3645  *      dump the pages we've currently stolen back to the free list, and pick up our
3646  *      scan from the point where we aborted the 'current' run.
3647  *
3648  *
3649  *      Requirements:
3650  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
3651  *
3652  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3653  *
3654  * Algorithm:
3655  */
3656
3657 #define MAX_CONSIDERED_BEFORE_YIELD     1000
3658
3659
3660 #define RESET_STATE_OF_RUN()    \
3661         MACRO_BEGIN             \
3662         prevcontaddr = -2;      \
3663         start_pnum = -1;        \
3664         free_considered = 0;    \
3665         substitute_needed = 0;  \
3666         npages = 0;             \
3667         MACRO_END
3668
3669 /*
3670  * Can we steal in-use (i.e. not free) pages when searching for
3671  * physically-contiguous pages ?
3672  */
3673 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3674
3675 static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
3676 #if DEBUG
3677 int vm_page_find_contig_debug = 0;
3678 #endif
3679
3680 static vm_page_t
3681 vm_page_find_contiguous(
3682         unsigned int    contig_pages,
3683         ppnum_t         max_pnum,
3684         ppnum_t     pnum_mask,
3685         boolean_t       wire,
3686         int             flags)
3687 {
3688         vm_page_t       m = NULL;
3689         ppnum_t         prevcontaddr;
3690         ppnum_t         start_pnum;
3691         unsigned int    npages, considered, scanned;
3692         unsigned int    page_idx, start_idx, last_idx, orig_last_idx;
3693         unsigned int    idx_last_contig_page_found = 0;
3694         int             free_considered, free_available;
3695         int             substitute_needed;
3696         boolean_t       wrapped;
3697 #if DEBUG
3698         clock_sec_t     tv_start_sec, tv_end_sec;
3699         clock_usec_t    tv_start_usec, tv_end_usec;
3700 #endif
3701 #if MACH_ASSERT
3702         int             yielded = 0;
3703         int             dumped_run = 0;
3704         int             stolen_pages = 0;
3705 #endif
3706
3707         if (contig_pages == 0)
3708                 return VM_PAGE_NULL;
3709
3710 #if MACH_ASSERT
3711         vm_page_verify_free_lists();
3712 #endif
3713 #if DEBUG
3714         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3715 #endif
3716         vm_page_lock_queues();
3717         lck_mtx_lock(&vm_page_queue_free_lock);
3718
3719         RESET_STATE_OF_RUN();
3720
3721         scanned = 0;
3722         considered = 0;
3723         free_available = vm_page_free_count - vm_page_free_reserved;
3724
3725         wrapped = FALSE;
3726
3727         if(flags & KMA_LOMEM)
3728                 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3729         else
3730                 idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
3731
3732         orig_last_idx = idx_last_contig_page_found;
3733         last_idx = orig_last_idx;
3734
3735         for (page_idx = last_idx, start_idx = last_idx;
3736              npages < contig_pages && page_idx < vm_pages_count;
3737              page_idx++) {
3738 retry:
3739                 if (wrapped &&
3740                     npages == 0 &&
3741                     page_idx >= orig_last_idx) {
3742                         /*
3743                          * We're back where we started and we haven't
3744                          * found any suitable contiguous range.  Let's
3745                          * give up.
3746                          */
3747                         break;
3748                 }
3749                 scanned++;
3750                 m = &vm_pages[page_idx];
3751
3752                 assert(!m->fictitious);
3753                 assert(!m->private);
3754
3755                 if (max_pnum && m->phys_page > max_pnum) {
3756                         /* no more low pages... */
3757                         break;
3758                 }
3759                 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3760                         /*
3761                          * not aligned
3762                          */
3763                         RESET_STATE_OF_RUN();
3764
3765                 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3766                            m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3767                            m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3768                            m->cleaning || m->overwriting || m->restart || m->unusual || m->pageout) {
3769                         /*
3770                          * page is in a transient state
3771                          * or a state we don't want to deal
3772                          * with, so don't consider it which
3773                          * means starting a new run
3774                          */
3775                         RESET_STATE_OF_RUN();
3776
3777                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3778                         /*
3779                          * page needs to be on one of our queues
3780                          * in order for it to be stable behind the
3781                          * locks we hold at this point...
3782                          * if not, don't consider it which
3783                          * means starting a new run
3784                          */
3785                         RESET_STATE_OF_RUN();
3786
3787                 } else if (!m->free && (!m->tabled || m->busy)) {
3788                         /*
3789                          * pages on the free list are always 'busy'
3790                          * so we couldn't test for 'busy' in the check
3791                          * for the transient states... pages that are
3792                          * 'free' are never 'tabled', so we also couldn't
3793                          * test for 'tabled'.  So we check here to make
3794                          * sure that a non-free page is not busy and is
3795                          * tabled on an object...
3796                          * if not, don't consider it which
3797                          * means starting a new run
3798                          */
3799                         RESET_STATE_OF_RUN();
3800
3801                 } else {
3802                         if (m->phys_page != prevcontaddr + 1) {
3803                                 if ((m->phys_page & pnum_mask) != 0) {
3804                                         RESET_STATE_OF_RUN();
3805                                         goto did_consider;
3806                                 } else {
3807                                         npages = 1;
3808                                         start_idx = page_idx;
3809                                         start_pnum = m->phys_page;
3810                                 }
3811                         } else {
3812                                 npages++;
3813                         }
3814                         prevcontaddr = m->phys_page;
3815
3816                         VM_PAGE_CHECK(m);
3817                         if (m->free) {
3818                                 free_considered++;
3819                         } else {
3820                                 /*
3821                                  * This page is not free.
3822                                  * If we can't steal used pages,
3823                                  * we have to give up this run
3824                                  * and keep looking.
3825                                  * Otherwise, we might need to
3826                                  * move the contents of this page
3827                                  * into a substitute page.
3828                                  */
3829 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3830                                 if (m->pmapped || m->dirty) {
3831                                         substitute_needed++;
3832                                 }
3833 #else
3834                                 RESET_STATE_OF_RUN();
3835 #endif
3836                         }
3837
3838                         if ((free_considered + substitute_needed) > free_available) {
3839                                 /*
3840                                  * if we let this run continue
3841                                  * we will end up dropping the vm_page_free_count
3842                                  * below the reserve limit... we need to abort
3843                                  * this run, but we can at least re-consider this
3844                                  * page... thus the jump back to 'retry'
3845                                  */
3846                                 RESET_STATE_OF_RUN();
3847
3848                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3849                                         considered++;
3850                                         goto retry;
3851                                 }
3852                                 /*
3853                                  * free_available == 0
3854                                  * so can't consider any free pages... if
3855                                  * we went to retry in this case, we'd
3856                                  * get stuck looking at the same page
3857                                  * w/o making any forward progress
3858                                  * we also want to take this path if we've already
3859                                  * reached our limit that controls the lock latency
3860                                  */
3861                         }
3862                 }
3863 did_consider:
3864                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3865
3866                         lck_mtx_unlock(&vm_page_queue_free_lock);
3867                         vm_page_unlock_queues();
3868
3869                         mutex_pause(0);
3870
3871                         vm_page_lock_queues();
3872                         lck_mtx_lock(&vm_page_queue_free_lock);
3873
3874                         RESET_STATE_OF_RUN();
3875                         /*
3876                          * reset our free page limit since we
3877                          * dropped the lock protecting the vm_page_free_queue
3878                          */
3879                         free_available = vm_page_free_count - vm_page_free_reserved;
3880                         considered = 0;
3881 #if MACH_ASSERT
3882                         yielded++;
3883 #endif
3884                         goto retry;
3885                 }
3886                 considered++;
3887         }
3888         m = VM_PAGE_NULL;
3889
3890         if (npages != contig_pages) {
3891                 if (!wrapped) {
3892                         /*
3893                          * We didn't find a contiguous range but we didn't
3894                          * start from the very first page.
3895                          * Start again from the very first page.
3896                          */
3897                         RESET_STATE_OF_RUN();
3898                         if( flags & KMA_LOMEM)
3899                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
3900                         else
3901                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3902                         last_idx = 0;
3903                         page_idx = last_idx;
3904                         wrapped = TRUE;
3905                         goto retry;
3906                 }
3907                 lck_mtx_unlock(&vm_page_queue_free_lock);
3908         } else {
3909                 vm_page_t       m1;
3910                 vm_page_t       m2;
3911                 unsigned int    cur_idx;
3912                 unsigned int    tmp_start_idx;
3913                 vm_object_t     locked_object = VM_OBJECT_NULL;
3914                 boolean_t       abort_run = FALSE;
3915
3916                 assert(page_idx - start_idx == contig_pages);
3917
3918                 tmp_start_idx = start_idx;
3919
3920                 /*
3921                  * first pass through to pull the free pages
3922                  * off of the free queue so that in case we
3923                  * need substitute pages, we won't grab any
3924                  * of the free pages in the run... we'll clear
3925                  * the 'free' bit in the 2nd pass, and even in
3926                  * an abort_run case, we'll collect all of the
3927                  * free pages in this run and return them to the free list
3928                  */
3929                 while (start_idx < page_idx) {
3930
3931                         m1 = &vm_pages[start_idx++];
3932
3933 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3934                         assert(m1->free);
3935 #endif
3936
3937                         if (m1->free) {
3938                                 unsigned int color;
3939
3940                                 color = m1->phys_page & vm_color_mask;
3941 #if MACH_ASSERT
3942                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
3943 #endif
3944                                 queue_remove(&vm_page_queue_free[color],
3945                                              m1,
3946                                              vm_page_t,
3947                                              pageq);
3948                                 m1->pageq.next = NULL;
3949                                 m1->pageq.prev = NULL;
3950 #if MACH_ASSERT
3951                                 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
3952 #endif
3953                                 /*
3954                                  * Clear the "free" bit so that this page
3955                                  * does not get considered for another
3956                                  * concurrent physically-contiguous allocation.
3957                                  */
3958                                 m1->free = FALSE;
3959                                 assert(m1->busy);
3960
3961                                 vm_page_free_count--;
3962                         }
3963                 }
3964                 /*
3965                  * adjust global freelist counts
3966                  */
3967                 if (vm_page_free_count < vm_page_free_count_minimum)
3968                         vm_page_free_count_minimum = vm_page_free_count;
3969
3970                 if( flags & KMA_LOMEM)
3971                         vm_page_lomem_find_contiguous_last_idx = page_idx;
3972                 else
3973                         vm_page_find_contiguous_last_idx = page_idx;
3974
3975                 /*
3976                  * we can drop the free queue lock at this point since
3977                  * we've pulled any 'free' candidates off of the list
3978                  * we need it dropped so that we can do a vm_page_grab
3979                  * when substituing for pmapped/dirty pages
3980                  */
3981                 lck_mtx_unlock(&vm_page_queue_free_lock);
3982
3983                 start_idx = tmp_start_idx;
3984                 cur_idx = page_idx - 1;
3985
3986                 while (start_idx++ < page_idx) {
3987                         /*
3988                          * must go through the list from back to front
3989                          * so that the page list is created in the
3990                          * correct order - low -> high phys addresses
3991                          */
3992                         m1 = &vm_pages[cur_idx--];
3993
3994                         assert(!m1->free);
3995                         if (m1->object == VM_OBJECT_NULL) {
3996                                 /*
3997                                  * page has already been removed from
3998                                  * the free list in the 1st pass
3999                                  */
4000                                 assert(m1->offset == (vm_object_offset_t) -1);
4001                                 assert(m1->busy);
4002                                 assert(!m1->wanted);
4003                                 assert(!m1->laundry);
4004                         } else {
4005                                 vm_object_t object;
4006
4007                                 if (abort_run == TRUE)
4008                                         continue;
4009
4010                                 object = m1->object;
4011
4012                                 if (object != locked_object) {
4013                                         if (locked_object) {
4014                                                 vm_object_unlock(locked_object);
4015                                                 locked_object = VM_OBJECT_NULL;
4016                                         }
4017                                         if (vm_object_lock_try(object))
4018                                                 locked_object = object;
4019                                 }
4020                                 if (locked_object == VM_OBJECT_NULL ||
4021                                     (VM_PAGE_WIRED(m1) || m1->gobbled ||
4022                                      m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
4023                                      m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
4024                                      m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->busy)) {
4025
4026                                         if (locked_object) {
4027                                                 vm_object_unlock(locked_object);
4028                                                 locked_object = VM_OBJECT_NULL;
4029                                         }
4030                                         tmp_start_idx = cur_idx;
4031                                         abort_run = TRUE;
4032                                         continue;
4033                                 }
4034                                 if (m1->pmapped || m1->dirty) {
4035                                         int refmod;
4036                                         vm_object_offset_t offset;
4037
4038                                         m2 = vm_page_grab();
4039
4040                                         if (m2 == VM_PAGE_NULL) {
4041                                                 if (locked_object) {
4042                                                         vm_object_unlock(locked_object);
4043                                                         locked_object = VM_OBJECT_NULL;
4044                                                 }
4045                                                 tmp_start_idx = cur_idx;
4046                                                 abort_run = TRUE;
4047                                                 continue;
4048                                         }
4049                                         if (m1->pmapped)
4050                                                 refmod = pmap_disconnect(m1->phys_page);
4051                                         else
4052                                                 refmod = 0;
4053                                         vm_page_copy(m1, m2);
4054
4055                                         m2->reference = m1->reference;
4056                                         m2->dirty     = m1->dirty;
4057
4058                                         if (refmod & VM_MEM_REFERENCED)
4059                                                 m2->reference = TRUE;
4060                                         if (refmod & VM_MEM_MODIFIED) {
4061                                                 SET_PAGE_DIRTY(m2, TRUE);
4062                                         }
4063                                         offset = m1->offset;
4064
4065                                         /*
4066                                          * completely cleans up the state
4067                                          * of the page so that it is ready
4068                                          * to be put onto the free list, or
4069                                          * for this purpose it looks like it
4070                                          * just came off of the free list
4071                                          */
4072                                         vm_page_free_prepare(m1);
4073
4074                                         /*
4075                                          * make sure we clear the ref/mod state
4076                                          * from the pmap layer... else we risk
4077                                          * inheriting state from the last time
4078                                          * this page was used...
4079                                          */
4080                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4081                                         /*
4082                                          * now put the substitute page on the object
4083                                          */
4084                                         vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4085
4086                                         if (m2->reference)
4087                                                 vm_page_activate(m2);
4088                                         else
4089                                                 vm_page_deactivate(m2);
4090
4091                                         PAGE_WAKEUP_DONE(m2);
4092
4093                                 } else {
4094                                         /*
4095                                          * completely cleans up the state
4096                                          * of the page so that it is ready
4097                                          * to be put onto the free list, or
4098                                          * for this purpose it looks like it
4099                                          * just came off of the free list
4100                                          */
4101                                         vm_page_free_prepare(m1);
4102                                 }
4103 #if MACH_ASSERT
4104                                 stolen_pages++;
4105 #endif
4106                         }
4107                         m1->pageq.next = (queue_entry_t) m;
4108                         m1->pageq.prev = NULL;
4109                         m = m1;
4110                 }
4111                 if (locked_object) {
4112                         vm_object_unlock(locked_object);
4113                         locked_object = VM_OBJECT_NULL;
4114                 }
4115
4116                 if (abort_run == TRUE) {
4117                         if (m != VM_PAGE_NULL) {
4118                                 vm_page_free_list(m, FALSE);
4119                         }
4120 #if MACH_ASSERT
4121                         dumped_run++;
4122 #endif
4123                         /*
4124                          * want the index of the last
4125                          * page in this run that was
4126                          * successfully 'stolen', so back
4127                          * it up 1 for the auto-decrement on use
4128                          * and 1 more to bump back over this page
4129                          */
4130                         page_idx = tmp_start_idx + 2;
4131                         if (page_idx >= vm_pages_count) {
4132                                 if (wrapped)
4133                                         goto done_scanning;
4134                                 page_idx = last_idx = 0;
4135                                 wrapped = TRUE;
4136                         }
4137                         abort_run = FALSE;
4138
4139                         /*
4140                          * We didn't find a contiguous range but we didn't
4141                          * start from the very first page.
4142                          * Start again from the very first page.
4143                          */
4144                         RESET_STATE_OF_RUN();
4145
4146                         if( flags & KMA_LOMEM)
4147                                 idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4148                         else
4149                                 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4150
4151                         last_idx = page_idx;
4152
4153                         lck_mtx_lock(&vm_page_queue_free_lock);
4154                         /*
4155                         * reset our free page limit since we
4156                         * dropped the lock protecting the vm_page_free_queue
4157                         */
4158                         free_available = vm_page_free_count - vm_page_free_reserved;
4159                         goto retry;
4160                 }
4161
4162                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4163
4164                         if (wire == TRUE)
4165                                 m1->wire_count++;
4166                         else
4167                                 m1->gobbled = TRUE;
4168                 }
4169                 if (wire == FALSE)
4170                         vm_page_gobble_count += npages;
4171
4172                 /*
4173                  * gobbled pages are also counted as wired pages
4174                  */
4175                 vm_page_wire_count += npages;
4176
4177                 assert(vm_page_verify_contiguous(m, npages));
4178         }
4179 done_scanning:
4180         vm_page_unlock_queues();
4181
4182 #if DEBUG
4183         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4184
4185         tv_end_sec -= tv_start_sec;
4186         if (tv_end_usec < tv_start_usec) {
4187                 tv_end_sec--;
4188                 tv_end_usec += 1000000;
4189         }
4190         tv_end_usec -= tv_start_usec;
4191         if (tv_end_usec >= 1000000) {
4192                 tv_end_sec++;
4193                 tv_end_sec -= 1000000;
4194         }
4195         if (vm_page_find_contig_debug) {
4196                 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d... scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
4197                __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4198                (long)tv_end_sec, tv_end_usec, orig_last_idx,
4199                scanned, yielded, dumped_run, stolen_pages);
4200         }
4201
4202 #endif
4203 #if MACH_ASSERT
4204         vm_page_verify_free_lists();
4205 #endif
4206         return m;
4207 }
4208
4209 /*
4210  *      Allocate a list of contiguous, wired pages.
4211  */
4212 kern_return_t
4213 cpm_allocate(
4214         vm_size_t       size,
4215         vm_page_t       *list,
4216         ppnum_t         max_pnum,
4217         ppnum_t         pnum_mask,
4218         boolean_t       wire,
4219         int             flags)
4220 {
4221         vm_page_t               pages;
4222         unsigned int            npages;
4223
4224         if (size % PAGE_SIZE != 0)
4225                 return KERN_INVALID_ARGUMENT;
4226
4227         npages = (unsigned int) (size / PAGE_SIZE);
4228         if (npages != size / PAGE_SIZE) {
4229                 /* 32-bit overflow */
4230                 return KERN_INVALID_ARGUMENT;
4231         }
4232
4233         /*
4234          *      Obtain a pointer to a subset of the free
4235          *      list large enough to satisfy the request;
4236          *      the region will be physically contiguous.
4237          */
4238         pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4239
4240         if (pages == VM_PAGE_NULL)
4241                 return KERN_NO_SPACE;
4242         /*
4243          * determine need for wakeups
4244          */
4245         if ((vm_page_free_count < vm_page_free_min) ||
4246              ((vm_page_free_count < vm_page_free_target) &&
4247               ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4248                  thread_wakeup((event_t) &vm_page_free_wanted);
4249
4250         VM_CHECK_MEMORYSTATUS;
4251
4252         /*
4253          *      The CPM pages should now be available and
4254          *      ordered by ascending physical address.
4255          */
4256         assert(vm_page_verify_contiguous(pages, npages));
4257
4258         *list = pages;
4259         return KERN_SUCCESS;
4260 }
4261
4262
4263 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4264
4265 /*
4266  * when working on a 'run' of pages, it is necessary to hold
4267  * the vm_page_queue_lock (a hot global lock) for certain operations
4268  * on the page... however, the majority of the work can be done
4269  * while merely holding the object lock... in fact there are certain
4270  * collections of pages that don't require any work brokered by the
4271  * vm_page_queue_lock... to mitigate the time spent behind the global
4272  * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4273  * while doing all of the work that doesn't require the vm_page_queue_lock...
4274  * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4275  * necessary work for each page... we will grab the busy bit on the page
4276  * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4277  * if it can't immediately take the vm_page_queue_lock in order to compete
4278  * for the locks in the same order that vm_pageout_scan takes them.
4279  * the operation names are modeled after the names of the routines that
4280  * need to be called in order to make the changes very obvious in the
4281  * original loop
4282  */
4283
4284 void
4285 vm_page_do_delayed_work(
4286         vm_object_t     object,
4287         struct vm_page_delayed_work *dwp,
4288         int             dw_count)
4289 {
4290         int             j;
4291         vm_page_t       m;
4292         vm_page_t       local_free_q = VM_PAGE_NULL;
4293
4294         /*
4295          * pageout_scan takes the vm_page_lock_queues first
4296          * then tries for the object lock... to avoid what
4297          * is effectively a lock inversion, we'll go to the
4298          * trouble of taking them in that same order... otherwise
4299          * if this object contains the majority of the pages resident
4300          * in the UBC (or a small set of large objects actively being
4301          * worked on contain the majority of the pages), we could
4302          * cause the pageout_scan thread to 'starve' in its attempt
4303          * to find pages to move to the free queue, since it has to
4304          * successfully acquire the object lock of any candidate page
4305          * before it can steal/clean it.
4306          */
4307         if (!vm_page_trylockspin_queues()) {
4308                 vm_object_unlock(object);
4309
4310                 vm_page_lockspin_queues();
4311
4312                 for (j = 0; ; j++) {
4313                         if (!vm_object_lock_avoid(object) &&
4314                             _vm_object_lock_try(object))
4315                                 break;
4316                         vm_page_unlock_queues();
4317                         mutex_pause(j);
4318                         vm_page_lockspin_queues();
4319                 }
4320         }
4321         for (j = 0; j < dw_count; j++, dwp++) {
4322
4323                 m = dwp->dw_m;
4324
4325                 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4326                         vm_pageout_throttle_up(m);
4327
4328                 if (dwp->dw_mask & DW_vm_page_wire)
4329                         vm_page_wire(m);
4330                 else if (dwp->dw_mask & DW_vm_page_unwire) {
4331                         boolean_t       queueit;
4332
4333                         queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4334
4335                         vm_page_unwire(m, queueit);
4336                 }
4337                 if (dwp->dw_mask & DW_vm_page_free) {
4338                         vm_page_free_prepare_queues(m);
4339
4340                         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4341                         /*
4342                          * Add this page to our list of reclaimed pages,
4343                          * to be freed later.
4344                          */
4345                         m->pageq.next = (queue_entry_t) local_free_q;
4346                         local_free_q = m;
4347                 } else {
4348                         if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4349                                 vm_page_deactivate_internal(m, FALSE);
4350                         else if (dwp->dw_mask & DW_vm_page_activate) {
4351                                 if (m->active == FALSE) {
4352                                         vm_page_activate(m);
4353                                 }
4354                         }
4355                         else if (dwp->dw_mask & DW_vm_page_speculate)
4356                                 vm_page_speculate(m, TRUE);
4357                         else if (dwp->dw_mask & DW_enqueue_cleaned) {
4358                                 /*
4359                                  * if we didn't hold the object lock and did this,
4360                                  * we might disconnect the page, then someone might
4361                                  * soft fault it back in, then we would put it on the
4362                                  * cleaned queue, and so we would have a referenced (maybe even dirty)
4363                                  * page on that queue, which we don't want
4364                                  */
4365                                 int refmod_state = pmap_disconnect(m->phys_page);
4366
4367                                 if ((refmod_state & VM_MEM_REFERENCED)) {
4368                                         /*
4369                                          * this page has been touched since it got cleaned; let's activate it
4370                                          * if it hasn't already been
4371                                          */
4372                                         vm_pageout_enqueued_cleaned++;
4373                                         vm_pageout_cleaned_reactivated++;
4374                                         vm_pageout_cleaned_commit_reactivated++;
4375
4376                                         if (m->active == FALSE)
4377                                                 vm_page_activate(m);
4378                                 } else {
4379                                         m->reference = FALSE;
4380                                         vm_page_enqueue_cleaned(m);
4381                                 }
4382                         }
4383                         else if (dwp->dw_mask & DW_vm_page_lru)
4384                                 vm_page_lru(m);
4385                         else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4386                                 if ( !m->pageout_queue)
4387                                         VM_PAGE_QUEUES_REMOVE(m);
4388                         }
4389                         if (dwp->dw_mask & DW_set_reference)
4390                                 m->reference = TRUE;
4391                         else if (dwp->dw_mask & DW_clear_reference)
4392                                 m->reference = FALSE;
4393
4394                         if (dwp->dw_mask & DW_move_page) {
4395                                 if ( !m->pageout_queue) {
4396                                         VM_PAGE_QUEUES_REMOVE(m);
4397
4398                                         assert(m->object != kernel_object);
4399
4400                                         VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4401                                 }
4402                         }
4403                         if (dwp->dw_mask & DW_clear_busy)
4404                                 m->busy = FALSE;
4405
4406                         if (dwp->dw_mask & DW_PAGE_WAKEUP)
4407                                 PAGE_WAKEUP(m);
4408                 }
4409         }
4410         vm_page_unlock_queues();
4411
4412         if (local_free_q)
4413                 vm_page_free_list(local_free_q, TRUE);
4414
4415         VM_CHECK_MEMORYSTATUS;
4416
4417 }
4418
4419 kern_return_t
4420 vm_page_alloc_list(
4421         int     page_count,
4422         int     flags,
4423         vm_page_t *list)
4424 {
4425         vm_page_t       lo_page_list = VM_PAGE_NULL;
4426         vm_page_t       mem;
4427         int             i;
4428
4429         if ( !(flags & KMA_LOMEM))
4430                 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4431
4432         for (i = 0; i < page_count; i++) {
4433
4434                 mem = vm_page_grablo();
4435
4436                 if (mem == VM_PAGE_NULL) {
4437                         if (lo_page_list)
4438                                 vm_page_free_list(lo_page_list, FALSE);
4439
4440                         *list = VM_PAGE_NULL;
4441
4442                         return (KERN_RESOURCE_SHORTAGE);
4443                 }
4444                 mem->pageq.next = (queue_entry_t) lo_page_list;
4445                 lo_page_list = mem;
4446         }
4447         *list = lo_page_list;
4448
4449         return (KERN_SUCCESS);
4450 }
4451
4452 void
4453 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4454 {
4455         page->offset = offset;
4456 }
4457
4458 vm_page_t
4459 vm_page_get_next(vm_page_t page)
4460 {
4461         return ((vm_page_t) page->pageq.next);
4462 }
4463
4464 vm_object_offset_t
4465 vm_page_get_offset(vm_page_t page)
4466 {
4467         return (page->offset);
4468 }
4469
4470 ppnum_t
4471 vm_page_get_phys_page(vm_page_t page)
4472 {
4473         return (page->phys_page);
4474 }
4475
4476
4477 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4478
4479 #if HIBERNATION
4480
4481 static vm_page_t hibernate_gobble_queue;
4482
4483 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4484
4485 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4486 static int  hibernate_flush_dirty_pages(void);
4487 static int  hibernate_flush_queue(queue_head_t *, int);
4488
4489 void hibernate_flush_wait(void);
4490 void hibernate_mark_in_progress(void);
4491 void hibernate_clear_in_progress(void);
4492
4493
4494 struct hibernate_statistics {
4495         int hibernate_considered;
4496         int hibernate_reentered_on_q;
4497         int hibernate_found_dirty;
4498         int hibernate_skipped_cleaning;
4499         int hibernate_skipped_transient;
4500         int hibernate_skipped_precious;
4501         int hibernate_queue_nolock;
4502         int hibernate_queue_paused;
4503         int hibernate_throttled;
4504         int hibernate_throttle_timeout;
4505         int hibernate_drained;
4506         int hibernate_drain_timeout;
4507         int cd_lock_failed;
4508         int cd_found_precious;
4509         int cd_found_wired;
4510         int cd_found_busy;
4511         int cd_found_unusual;
4512         int cd_found_cleaning;
4513         int cd_found_laundry;
4514         int cd_found_dirty;
4515         int cd_local_free;
4516         int cd_total_free;
4517         int cd_vm_page_wire_count;
4518         int cd_pages;
4519         int cd_discarded;
4520         int cd_count_wire;
4521 } hibernate_stats;
4522
4523
4524
4525 static int
4526 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4527 {
4528         wait_result_t   wait_result;
4529
4530         vm_page_lock_queues();
4531
4532         while (q->pgo_laundry) {
4533
4534                 q->pgo_draining = TRUE;
4535
4536                 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4537
4538                 vm_page_unlock_queues();
4539
4540                 wait_result = thread_block(THREAD_CONTINUE_NULL);
4541
4542                 if (wait_result == THREAD_TIMED_OUT) {
4543                         hibernate_stats.hibernate_drain_timeout++;
4544                         return (1);
4545                 }
4546                 vm_page_lock_queues();
4547
4548                 hibernate_stats.hibernate_drained++;
4549         }
4550         vm_page_unlock_queues();
4551
4552         return (0);
4553 }
4554
4555
4556 static int
4557 hibernate_flush_queue(queue_head_t *q, int qcount)
4558 {
4559         vm_page_t       m;
4560         vm_object_t     l_object = NULL;
4561         vm_object_t     m_object = NULL;
4562         int             refmod_state = 0;
4563         int             try_failed_count = 0;
4564         int             retval = 0;
4565         int             current_run = 0;
4566         struct  vm_pageout_queue *iq;
4567         struct  vm_pageout_queue *eq;
4568         struct  vm_pageout_queue *tq;
4569
4570         hibernate_cleaning_in_progress = TRUE;
4571
4572         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4573
4574         iq = &vm_pageout_queue_internal;
4575         eq = &vm_pageout_queue_external;
4576
4577         vm_page_lock_queues();
4578
4579         while (qcount && !queue_empty(q)) {
4580
4581                 if (current_run++ == 1000) {
4582                         if (hibernate_should_abort()) {
4583                                 retval = 1;
4584                                 break;
4585                         }
4586                         current_run = 0;
4587                 }
4588
4589                 m = (vm_page_t) queue_first(q);
4590                 m_object = m->object;
4591
4592                 /*
4593                  * check to see if we currently are working
4594                  * with the same object... if so, we've
4595                  * already got the lock
4596                  */
4597                 if (m_object != l_object) {
4598                         /*
4599                          * the object associated with candidate page is
4600                          * different from the one we were just working
4601                          * with... dump the lock if we still own it
4602                          */
4603                         if (l_object != NULL) {
4604                                 vm_object_unlock(l_object);
4605                                 l_object = NULL;
4606                         }
4607                         /*
4608                          * Try to lock object; since we've alread got the
4609                          * page queues lock, we can only 'try' for this one.
4610                          * if the 'try' fails, we need to do a mutex_pause
4611                          * to allow the owner of the object lock a chance to
4612                          * run...
4613                          */
4614                         if ( !vm_object_lock_try_scan(m_object)) {
4615
4616                                 if (try_failed_count > 20) {
4617                                         hibernate_stats.hibernate_queue_nolock++;
4618
4619                                         goto reenter_pg_on_q;
4620                                 }
4621                                 vm_pageout_scan_wants_object = m_object;
4622
4623                                 vm_page_unlock_queues();
4624                                 mutex_pause(try_failed_count++);
4625                                 vm_page_lock_queues();
4626
4627                                 hibernate_stats.hibernate_queue_paused++;
4628                                 continue;
4629                         } else {
4630                                 l_object = m_object;
4631                                 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4632                         }
4633                 }
4634                 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
4635                         /*
4636                          * page is not to be cleaned
4637                          * put it back on the head of its queue
4638                          */
4639                         if (m->cleaning)
4640                                 hibernate_stats.hibernate_skipped_cleaning++;
4641                         else
4642                                 hibernate_stats.hibernate_skipped_transient++;
4643
4644                         goto reenter_pg_on_q;
4645                 }
4646                 if ( !m_object->pager_initialized && m_object->pager_created)
4647                         goto reenter_pg_on_q;
4648
4649                 if (m_object->copy == VM_OBJECT_NULL) {
4650                         if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4651                                 /*
4652                                  * let the normal hibernate image path
4653                                  * deal with these
4654                                  */
4655                                 goto reenter_pg_on_q;
4656                         }
4657                 }
4658                 if ( !m->dirty && m->pmapped) {
4659                         refmod_state = pmap_get_refmod(m->phys_page);
4660
4661                         if ((refmod_state & VM_MEM_MODIFIED)) {
4662                                 SET_PAGE_DIRTY(m, FALSE);
4663                         }
4664                 } else
4665                         refmod_state = 0;
4666
4667                 if ( !m->dirty) {
4668                         /*
4669                          * page is not to be cleaned
4670                          * put it back on the head of its queue
4671                          */
4672                         if (m->precious)
4673                                 hibernate_stats.hibernate_skipped_precious++;
4674
4675                         goto reenter_pg_on_q;
4676                 }
4677                 tq = NULL;
4678
4679                 if (m_object->internal) {
4680                         if (VM_PAGE_Q_THROTTLED(iq))
4681                                 tq = iq;
4682                 } else if (VM_PAGE_Q_THROTTLED(eq))
4683                         tq = eq;
4684
4685                 if (tq != NULL) {
4686                         wait_result_t   wait_result;
4687                         int             wait_count = 5;
4688
4689                         if (l_object != NULL) {
4690                                 vm_object_unlock(l_object);
4691                                 l_object = NULL;
4692                         }
4693                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4694
4695                         tq->pgo_throttled = TRUE;
4696
4697                         while (retval == 0) {
4698
4699                                 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4700
4701                                 vm_page_unlock_queues();
4702
4703                                 wait_result = thread_block(THREAD_CONTINUE_NULL);
4704
4705                                 vm_page_lock_queues();
4706
4707                                 if (hibernate_should_abort())
4708                                         retval = 1;
4709
4710                                 if (wait_result != THREAD_TIMED_OUT)
4711                                         break;
4712
4713                                 if (--wait_count == 0) {
4714                                         hibernate_stats.hibernate_throttle_timeout++;
4715                                         retval = 1;
4716                                 }
4717                         }
4718                         if (retval)
4719                                 break;
4720
4721                         hibernate_stats.hibernate_throttled++;
4722
4723                         continue;
4724                 }
4725                 /*
4726                  * we've already factored out pages in the laundry which
4727                  * means this page can't be on the pageout queue so it's
4728                  * safe to do the VM_PAGE_QUEUES_REMOVE
4729                  */
4730                 assert(!m->pageout_queue);
4731
4732                 VM_PAGE_QUEUES_REMOVE(m);
4733
4734                 vm_pageout_cluster(m, FALSE);
4735
4736                 hibernate_stats.hibernate_found_dirty++;
4737
4738                 goto next_pg;
4739
4740 reenter_pg_on_q:
4741                 queue_remove(q, m, vm_page_t, pageq);
4742                 queue_enter(q, m, vm_page_t, pageq);
4743
4744                 hibernate_stats.hibernate_reentered_on_q++;
4745 next_pg:
4746                 hibernate_stats.hibernate_considered++;
4747
4748                 qcount--;
4749                 try_failed_count = 0;
4750         }
4751         if (l_object != NULL) {
4752                 vm_object_unlock(l_object);
4753                 l_object = NULL;
4754         }
4755         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4756
4757         vm_page_unlock_queues();
4758
4759         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4760
4761         hibernate_cleaning_in_progress = FALSE;
4762
4763         return (retval);
4764 }
4765
4766
4767 static int
4768 hibernate_flush_dirty_pages()
4769 {
4770         struct vm_speculative_age_q     *aq;
4771         uint32_t        i;
4772
4773         bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4774
4775         if (vm_page_local_q) {
4776                 for (i = 0; i < vm_page_local_q_count; i++)
4777                         vm_page_reactivate_local(i, TRUE, FALSE);
4778         }
4779
4780         for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4781                 int             qcount;
4782                 vm_page_t       m;
4783
4784                 aq = &vm_page_queue_speculative[i];
4785
4786                 if (queue_empty(&aq->age_q))
4787                         continue;
4788                 qcount = 0;
4789
4790                 vm_page_lockspin_queues();
4791
4792                 queue_iterate(&aq->age_q,
4793                               m,
4794                               vm_page_t,
4795                               pageq)
4796                 {
4797                         qcount++;
4798                 }
4799                 vm_page_unlock_queues();
4800
4801                 if (qcount) {
4802                         if (hibernate_flush_queue(&aq->age_q, qcount))
4803                                 return (1);
4804                 }
4805         }
4806         if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4807                 return (1);
4808         if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
4809                 return (1);
4810         if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
4811                 return (1);
4812         if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
4813                 return (1);
4814
4815         if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4816                 return (1);
4817         return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4818 }
4819
4820
4821 extern void IOSleep(unsigned int);
4822 extern int sync_internal(void);
4823
4824 int
4825 hibernate_flush_memory()
4826 {
4827         int     retval;
4828
4829         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4830
4831         IOSleep(2 * 1000);
4832
4833         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4834
4835         if ((retval = hibernate_flush_dirty_pages()) == 0) {
4836                 if (consider_buffer_cache_collect != NULL) {
4837
4838                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4839
4840                         sync_internal();
4841                         (void)(*consider_buffer_cache_collect)(1);
4842                         consider_zone_gc(TRUE);
4843
4844                         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4845                 }
4846         }
4847         KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4848
4849     HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4850                 hibernate_stats.hibernate_considered,
4851                 hibernate_stats.hibernate_reentered_on_q,
4852                 hibernate_stats.hibernate_found_dirty);
4853     HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4854                 hibernate_stats.hibernate_skipped_cleaning,
4855                 hibernate_stats.hibernate_skipped_transient,
4856                 hibernate_stats.hibernate_skipped_precious,
4857                 hibernate_stats.hibernate_queue_nolock);
4858     HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4859                 hibernate_stats.hibernate_queue_paused,
4860                 hibernate_stats.hibernate_throttled,
4861                 hibernate_stats.hibernate_throttle_timeout,
4862                 hibernate_stats.hibernate_drained,
4863                 hibernate_stats.hibernate_drain_timeout);
4864
4865         return (retval);
4866 }
4867
4868
4869 static void
4870 hibernate_page_list_zero(hibernate_page_list_t *list)
4871 {
4872     uint32_t             bank;
4873     hibernate_bitmap_t * bitmap;
4874
4875     bitmap = &list->bank_bitmap[0];
4876     for (bank = 0; bank < list->bank_count; bank++)
4877     {
4878         uint32_t last_bit;
4879
4880         bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4881         // set out-of-bound bits at end of bitmap.
4882         last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4883         if (last_bit)
4884             bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4885
4886         bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4887     }
4888 }
4889
4890 void
4891 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4892 {
4893     uint32_t i;
4894     vm_page_t m;
4895     uint64_t start, end, timeout, nsec;
4896     clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4897     clock_get_uptime(&start);
4898
4899     for (i = 0; i < gobble_count; i++)
4900     {
4901         while (VM_PAGE_NULL == (m = vm_page_grab()))
4902         {
4903             clock_get_uptime(&end);
4904             if (end >= timeout)
4905                 break;
4906             VM_PAGE_WAIT();
4907         }
4908         if (!m)
4909             break;
4910         m->busy = FALSE;
4911         vm_page_gobble(m);
4912
4913         m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4914         hibernate_gobble_queue = m;
4915     }
4916
4917     clock_get_uptime(&end);
4918     absolutetime_to_nanoseconds(end - start, &nsec);
4919     HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4920 }
4921
4922 void
4923 hibernate_free_gobble_pages(void)
4924 {
4925     vm_page_t m, next;
4926     uint32_t  count = 0;
4927
4928     m = (vm_page_t) hibernate_gobble_queue;
4929     while(m)
4930     {
4931         next = (vm_page_t) m->pageq.next;
4932         vm_page_free(m);
4933         count++;
4934         m = next;
4935     }
4936     hibernate_gobble_queue = VM_PAGE_NULL;
4937
4938     if (count)
4939         HIBLOG("Freed %d pages\n", count);
4940 }
4941
4942 static boolean_t
4943 hibernate_consider_discard(vm_page_t m)
4944 {
4945     vm_object_t object = NULL;
4946     int                  refmod_state;
4947     boolean_t            discard = FALSE;
4948
4949     do
4950     {
4951         if (m->private)
4952             panic("hibernate_consider_discard: private");
4953
4954         if (!vm_object_lock_try(m->object)) {
4955             hibernate_stats.cd_lock_failed++;
4956             break;
4957         }
4958         object = m->object;
4959
4960         if (VM_PAGE_WIRED(m)) {
4961             hibernate_stats.cd_found_wired++;
4962             break;
4963         }
4964         if (m->precious) {
4965             hibernate_stats.cd_found_precious++;
4966             break;
4967         }
4968         if (m->busy || !object->alive) {
4969            /*
4970             *   Somebody is playing with this page.
4971             */
4972             hibernate_stats.cd_found_busy++;
4973             break;
4974         }
4975         if (m->absent || m->unusual || m->error) {
4976            /*
4977             * If it's unusual in anyway, ignore it
4978             */
4979             hibernate_stats.cd_found_unusual++;
4980             break;
4981         }
4982         if (m->cleaning) {
4983             hibernate_stats.cd_found_cleaning++;
4984             break;
4985         }
4986         if (m->laundry) {
4987             hibernate_stats.cd_found_laundry++;
4988             break;
4989         }
4990         if (!m->dirty)
4991         {
4992             refmod_state = pmap_get_refmod(m->phys_page);
4993
4994             if (refmod_state & VM_MEM_REFERENCED)
4995                 m->reference = TRUE;
4996             if (refmod_state & VM_MEM_MODIFIED) {
4997                 SET_PAGE_DIRTY(m, FALSE);
4998             }
4999         }
5000
5001         /*
5002          * If it's clean or purgeable we can discard the page on wakeup.
5003          */
5004         discard = (!m->dirty)
5005                     || (VM_PURGABLE_VOLATILE == object->purgable)
5006                     || (VM_PURGABLE_EMPTY    == object->purgable);
5007
5008         if (discard == FALSE)
5009             hibernate_stats.cd_found_dirty++;
5010     }
5011     while (FALSE);
5012
5013     if (object)
5014         vm_object_unlock(object);
5015
5016     return (discard);
5017 }
5018
5019
5020 static void
5021 hibernate_discard_page(vm_page_t m)
5022 {
5023     if (m->absent || m->unusual || m->error)
5024        /*
5025         * If it's unusual in anyway, ignore
5026         */
5027         return;
5028
5029 #if DEBUG
5030     vm_object_t object = m->object;
5031     if (!vm_object_lock_try(m->object))
5032         panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5033 #else
5034     /* No need to lock page queue for token delete, hibernate_vm_unlock()
5035        makes sure these locks are uncontended before sleep */
5036 #endif  /* !DEBUG */
5037
5038     if (m->pmapped == TRUE)
5039     {
5040         __unused int refmod_state = pmap_disconnect(m->phys_page);
5041     }
5042
5043     if (m->laundry)
5044         panic("hibernate_discard_page(%p) laundry", m);
5045     if (m->private)
5046         panic("hibernate_discard_page(%p) private", m);
5047     if (m->fictitious)
5048         panic("hibernate_discard_page(%p) fictitious", m);
5049
5050     if (VM_PURGABLE_VOLATILE == m->object->purgable)
5051     {
5052         /* object should be on a queue */
5053         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5054         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5055         assert(old_queue);
5056         vm_purgeable_token_delete_first(old_queue);
5057         m->object->purgable = VM_PURGABLE_EMPTY;
5058     }
5059
5060     vm_page_free(m);
5061
5062 #if DEBUG
5063     vm_object_unlock(object);
5064 #endif  /* DEBUG */
5065 }
5066
5067 /*
5068  Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5069  pages known to VM to not need saving are subtracted.
5070  Wired pages to be saved are present in page_list_wired, pageable in page_list.
5071 */
5072
5073 void
5074 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5075                            hibernate_page_list_t * page_list_wired,
5076                            hibernate_page_list_t * page_list_pal,
5077                            uint32_t * pagesOut)
5078 {
5079     uint64_t start, end, nsec;
5080     vm_page_t m;
5081     uint32_t pages = page_list->page_count;
5082     uint32_t count_zf = 0, count_throttled = 0;
5083     uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5084     uint32_t count_wire = pages;
5085     uint32_t count_discard_active    = 0;
5086     uint32_t count_discard_inactive  = 0;
5087     uint32_t count_discard_cleaned   = 0;
5088     uint32_t count_discard_purgeable = 0;
5089     uint32_t count_discard_speculative = 0;
5090     uint32_t i;
5091     uint32_t             bank;
5092     hibernate_bitmap_t * bitmap;
5093     hibernate_bitmap_t * bitmap_wired;
5094
5095
5096     HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
5097
5098 #if DEBUG
5099         vm_page_lock_queues();
5100         if (vm_page_local_q) {
5101             for (i = 0; i < vm_page_local_q_count; i++) {
5102                 struct vpl      *lq;
5103                 lq = &vm_page_local_q[i].vpl_un.vpl;
5104                 VPL_LOCK(&lq->vpl_lock);
5105             }
5106         }
5107 #endif  /* DEBUG */
5108
5109
5110     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5111
5112     clock_get_uptime(&start);
5113
5114     hibernate_page_list_zero(page_list);
5115     hibernate_page_list_zero(page_list_wired);
5116     hibernate_page_list_zero(page_list_pal);
5117
5118     hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5119     hibernate_stats.cd_pages = pages;
5120
5121     if (vm_page_local_q) {
5122             for (i = 0; i < vm_page_local_q_count; i++)
5123                     vm_page_reactivate_local(i, TRUE, TRUE);
5124     }
5125
5126     m = (vm_page_t) hibernate_gobble_queue;
5127     while(m)
5128     {
5129         pages--;
5130         count_wire--;
5131         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5132         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5133         m = (vm_page_t) m->pageq.next;
5134     }
5135
5136     for( i = 0; i < real_ncpus; i++ )
5137     {
5138         if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5139         {
5140             for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5141             {
5142                 pages--;
5143                 count_wire--;
5144                 hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5145                 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5146
5147                 hibernate_stats.cd_local_free++;
5148                 hibernate_stats.cd_total_free++;
5149             }
5150         }
5151     }
5152
5153     for( i = 0; i < vm_colors; i++ )
5154     {
5155         queue_iterate(&vm_page_queue_free[i],
5156                       m,
5157                       vm_page_t,
5158                       pageq)
5159         {
5160             pages--;
5161             count_wire--;
5162             hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5163             hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5164
5165             hibernate_stats.cd_total_free++;
5166         }
5167     }
5168
5169     queue_iterate(&vm_lopage_queue_free,
5170                   m,
5171                   vm_page_t,
5172                   pageq)
5173     {
5174         pages--;
5175         count_wire--;
5176         hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5177         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5178
5179         hibernate_stats.cd_total_free++;
5180     }
5181
5182     queue_iterate( &vm_page_queue_throttled,
5183                     m,
5184                     vm_page_t,
5185                     pageq )
5186     {
5187         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5188          && hibernate_consider_discard(m))
5189         {
5190             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5191             count_discard_inactive++;
5192         }
5193         else
5194             count_throttled++;
5195         count_wire--;
5196         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5197     }
5198
5199     queue_iterate( &vm_page_queue_anonymous,
5200                     m,
5201                     vm_page_t,
5202                    pageq )
5203     {
5204         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5205          && hibernate_consider_discard(m))
5206         {
5207             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5208             if (m->dirty)
5209                 count_discard_purgeable++;
5210             else
5211                 count_discard_inactive++;
5212         }
5213         else
5214             count_zf++;
5215         count_wire--;
5216         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5217     }
5218
5219     queue_iterate( &vm_page_queue_inactive,
5220                     m,
5221                     vm_page_t,
5222                     pageq )
5223     {
5224         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5225          && hibernate_consider_discard(m))
5226         {
5227             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5228             if (m->dirty)
5229                 count_discard_purgeable++;
5230             else
5231                 count_discard_inactive++;
5232         }
5233         else
5234             count_inactive++;
5235         count_wire--;
5236         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5237     }
5238
5239     queue_iterate( &vm_page_queue_cleaned,
5240                     m,
5241                     vm_page_t,
5242                     pageq )
5243     {
5244         if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5245          && hibernate_consider_discard(m))
5246         {
5247             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5248             if (m->dirty)
5249                 count_discard_purgeable++;
5250             else
5251                 count_discard_cleaned++;
5252         }
5253         else
5254             count_cleaned++;
5255         count_wire--;
5256         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5257     }
5258
5259     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5260     {
5261        queue_iterate(&vm_page_queue_speculative[i].age_q,
5262                      m,
5263                      vm_page_t,
5264                      pageq)
5265        {
5266            if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5267             && hibernate_consider_discard(m))
5268            {
5269                hibernate_page_bitset(page_list, TRUE, m->phys_page);
5270                count_discard_speculative++;
5271            }
5272            else
5273                count_speculative++;
5274            count_wire--;
5275            hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5276        }
5277     }
5278
5279     queue_iterate( &vm_page_queue_active,
5280                     m,
5281                     vm_page_t,
5282                     pageq )
5283     {
5284         if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5285          && hibernate_consider_discard(m))
5286         {
5287             hibernate_page_bitset(page_list, TRUE, m->phys_page);
5288             if (m->dirty)
5289                 count_discard_purgeable++;
5290             else
5291                 count_discard_active++;
5292         }
5293         else
5294             count_active++;
5295         count_wire--;
5296         hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5297     }
5298
5299     // pull wired from hibernate_bitmap
5300
5301     bitmap = &page_list->bank_bitmap[0];
5302     bitmap_wired = &page_list_wired->bank_bitmap[0];
5303     for (bank = 0; bank < page_list->bank_count; bank++)
5304     {
5305         for (i = 0; i < bitmap->bitmapwords; i++)
5306             bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5307         bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5308         bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5309     }
5310
5311     // machine dependent adjustments
5312     hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5313
5314     hibernate_stats.cd_count_wire = count_wire;
5315     hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative + count_discard_cleaned;
5316
5317     clock_get_uptime(&end);
5318     absolutetime_to_nanoseconds(end - start, &nsec);
5319     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5320
5321     HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5322                 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_zf, count_throttled,
5323                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5324
5325     *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5326
5327 #if DEBUG
5328         if (vm_page_local_q) {
5329             for (i = 0; i < vm_page_local_q_count; i++) {
5330                 struct vpl      *lq;
5331                 lq = &vm_page_local_q[i].vpl_un.vpl;
5332                 VPL_UNLOCK(&lq->vpl_lock);
5333             }
5334         }
5335         vm_page_unlock_queues();
5336 #endif  /* DEBUG */
5337
5338     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5339 }
5340
5341 void
5342 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5343 {
5344     uint64_t  start, end, nsec;
5345     vm_page_t m;
5346     vm_page_t next;
5347     uint32_t  i;
5348     uint32_t  count_discard_active    = 0;
5349     uint32_t  count_discard_inactive  = 0;
5350     uint32_t  count_discard_purgeable = 0;
5351     uint32_t  count_discard_cleaned   = 0;
5352     uint32_t  count_discard_speculative = 0;
5353
5354 #if DEBUG
5355         vm_page_lock_queues();
5356         if (vm_page_local_q) {
5357             for (i = 0; i < vm_page_local_q_count; i++) {
5358                 struct vpl      *lq;
5359                 lq = &vm_page_local_q[i].vpl_un.vpl;
5360                 VPL_LOCK(&lq->vpl_lock);
5361             }
5362         }
5363 #endif  /* DEBUG */
5364
5365     clock_get_uptime(&start);
5366
5367     m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5368     while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5369     {
5370         next = (vm_page_t) m->pageq.next;
5371         if (hibernate_page_bittst(page_list, m->phys_page))
5372         {
5373             if (m->dirty)
5374                 count_discard_purgeable++;
5375             else
5376                 count_discard_inactive++;
5377             hibernate_discard_page(m);
5378         }
5379         m = next;
5380     }
5381
5382     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5383     {
5384        m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5385        while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5386        {
5387            next = (vm_page_t) m->pageq.next;
5388            if (hibernate_page_bittst(page_list, m->phys_page))
5389            {
5390                count_discard_speculative++;
5391                hibernate_discard_page(m);
5392            }
5393            m = next;
5394        }
5395     }
5396
5397     m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5398     while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5399     {
5400         next = (vm_page_t) m->pageq.next;
5401         if (hibernate_page_bittst(page_list, m->phys_page))
5402         {
5403             if (m->dirty)
5404                 count_discard_purgeable++;
5405             else
5406                 count_discard_inactive++;
5407             hibernate_discard_page(m);
5408         }
5409         m = next;
5410     }
5411
5412     m = (vm_page_t) queue_first(&vm_page_queue_active);
5413     while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5414     {
5415         next = (vm_page_t) m->pageq.next;
5416         if (hibernate_page_bittst(page_list, m->phys_page))
5417         {
5418             if (m->dirty)
5419                 count_discard_purgeable++;
5420             else
5421                 count_discard_active++;
5422             hibernate_discard_page(m);
5423         }
5424         m = next;
5425     }
5426
5427     m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5428     while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5429     {
5430         next = (vm_page_t) m->pageq.next;
5431         if (hibernate_page_bittst(page_list, m->phys_page))
5432         {
5433             if (m->dirty)
5434                 count_discard_purgeable++;
5435             else
5436                 count_discard_cleaned++;
5437             hibernate_discard_page(m);
5438         }
5439         m = next;
5440     }
5441
5442 #if DEBUG
5443         if (vm_page_local_q) {
5444             for (i = 0; i < vm_page_local_q_count; i++) {
5445                 struct vpl      *lq;
5446                 lq = &vm_page_local_q[i].vpl_un.vpl;
5447                 VPL_UNLOCK(&lq->vpl_lock);
5448             }
5449         }
5450         vm_page_unlock_queues();
5451 #endif  /* DEBUG */
5452
5453     clock_get_uptime(&end);
5454     absolutetime_to_nanoseconds(end - start, &nsec);
5455     HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5456                 nsec / 1000000ULL,
5457                 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5458 }
5459
5460 #endif /* HIBERNATION */
5461
5462 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5463
5464 #include <mach_vm_debug.h>
5465 #if     MACH_VM_DEBUG
5466
5467 #include <mach_debug/hash_info.h>
5468 #include <vm/vm_debug.h>
5469
5470 /*
5471  *      Routine:        vm_page_info
5472  *      Purpose:
5473  *              Return information about the global VP table.
5474  *              Fills the buffer with as much information as possible
5475  *              and returns the desired size of the buffer.
5476  *      Conditions:
5477  *              Nothing locked.  The caller should provide
5478  *              possibly-pageable memory.
5479  */
5480
5481 unsigned int
5482 vm_page_info(
5483         hash_info_bucket_t *info,
5484         unsigned int count)
5485 {
5486         unsigned int i;
5487         lck_spin_t      *bucket_lock;
5488
5489         if (vm_page_bucket_count < count)
5490                 count = vm_page_bucket_count;
5491
5492         for (i = 0; i < count; i++) {
5493                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5494                 unsigned int bucket_count = 0;
5495                 vm_page_t m;
5496
5497                 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5498                 lck_spin_lock(bucket_lock);
5499
5500                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5501                         bucket_count++;
5502
5503                 lck_spin_unlock(bucket_lock);
5504
5505                 /* don't touch pageable memory while holding locks */
5506                 info[i].hib_count = bucket_count;
5507         }
5508
5509         return vm_page_bucket_count;
5510 }
5511 #endif  /* MACH_VM_DEBUG */