osfmk/vm/vm_resident.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_page.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *
  62  *      Resident memory management module.
  63  */
  64
  65 #include <debug.h>
  66 #include <libkern/OSAtomic.h>
  67
  68 #include <mach/clock_types.h>
  69 #include <mach/vm_prot.h>
  70 #include <mach/vm_statistics.h>
  71 #include <mach/sdt.h>
  72 #include <kern/counters.h>
  73 #include <kern/sched_prim.h>
  74 #include <kern/task.h>
  75 #include <kern/thread.h>
  76 #include <kern/zalloc.h>
  77 #include <kern/xpr.h>
  78 #include <vm/pmap.h>
  79 #include <vm/vm_init.h>
  80 #include <vm/vm_map.h>
  81 #include <vm/vm_page.h>
  82 #include <vm/vm_pageout.h>
  83 #include <vm/vm_kern.h>                 /* kernel_memory_allocate() */
  84 #include <kern/misc_protos.h>
  85 #include <zone_debug.h>
  86 #include <vm/cpm.h>
  87 #include <ppc/mappings.h>               /* (BRINGUP) */
  88 #include <pexpert/pexpert.h>    /* (BRINGUP) */
  89
  90 #include <vm/vm_protos.h>
  91 #include <vm/memory_object.h>
  92 #include <vm/vm_purgeable_internal.h>
  93
  94 #if CONFIG_EMBEDDED
  95 #include <sys/kern_memorystatus.h>
  96 #endif
  97
  98 int                     speculative_age_index = 0;
  99 int                     speculative_steal_index = 0;
 100
 101 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 102
 103 static void vm_page_insert_internal(vm_page_t, vm_object_t, vm_object_offset_t, boolean_t);
 104
 105
 106 /*
 107  *      Associated with page of user-allocatable memory is a
 108  *      page structure.
 109  */
 110
 111 /*
 112  *      These variables record the values returned by vm_page_bootstrap,
 113  *      for debugging purposes.  The implementation of pmap_steal_memory
 114  *      and pmap_startup here also uses them internally.
 115  */
 116
 117 vm_offset_t virtual_space_start;
 118 vm_offset_t virtual_space_end;
 119 int     vm_page_pages;
 120
 121 /*
 122  *      The vm_page_lookup() routine, which provides for fast
 123  *      (virtual memory object, offset) to page lookup, employs
 124  *      the following hash table.  The vm_page_{insert,remove}
 125  *      routines install and remove associations in the table.
 126  *      [This table is often called the virtual-to-physical,
 127  *      or VP, table.]
 128  */
 129 typedef struct {
 130         vm_page_t       pages;
 131 #if     MACH_PAGE_HASH_STATS
 132         int             cur_count;              /* current count */
 133         int             hi_count;               /* high water mark */
 134 #endif /* MACH_PAGE_HASH_STATS */
 135 } vm_page_bucket_t;
 136
 137 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
 138 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
 139 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
 140 unsigned int    vm_page_hash_shift;             /* Shift for hash function */
 141 uint32_t        vm_page_bucket_hash;            /* Basic bucket hash */
 142 decl_simple_lock_data(,vm_page_bucket_lock)
 143
 144
 145 #if     MACH_PAGE_HASH_STATS
 146 /* This routine is only for debug.  It is intended to be called by
 147  * hand by a developer using a kernel debugger.  This routine prints
 148  * out vm_page_hash table statistics to the kernel debug console.
 149  */
 150 void
 151 hash_debug(void)
 152 {
 153         int     i;
 154         int     numbuckets = 0;
 155         int     highsum = 0;
 156         int     maxdepth = 0;
 157
 158         for (i = 0; i < vm_page_bucket_count; i++) {
 159                 if (vm_page_buckets[i].hi_count) {
 160                         numbuckets++;
 161                         highsum += vm_page_buckets[i].hi_count;
 162                         if (vm_page_buckets[i].hi_count > maxdepth)
 163                                 maxdepth = vm_page_buckets[i].hi_count;
 164                 }
 165         }
 166         printf("Total number of buckets: %d\n", vm_page_bucket_count);
 167         printf("Number used buckets:     %d = %d%%\n",
 168                 numbuckets, 100*numbuckets/vm_page_bucket_count);
 169         printf("Number unused buckets:   %d = %d%%\n",
 170                 vm_page_bucket_count - numbuckets,
 171                 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
 172         printf("Sum of bucket max depth: %d\n", highsum);
 173         printf("Average bucket depth:    %d.%2d\n",
 174                 highsum/vm_page_bucket_count,
 175                 highsum%vm_page_bucket_count);
 176         printf("Maximum bucket depth:    %d\n", maxdepth);
 177 }
 178 #endif /* MACH_PAGE_HASH_STATS */
 179
 180 /*
 181  *      The virtual page size is currently implemented as a runtime
 182  *      variable, but is constant once initialized using vm_set_page_size.
 183  *      This initialization must be done in the machine-dependent
 184  *      bootstrap sequence, before calling other machine-independent
 185  *      initializations.
 186  *
 187  *      All references to the virtual page size outside this
 188  *      module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
 189  *      constants.
 190  */
 191 vm_size_t       page_size  = PAGE_SIZE;
 192 vm_size_t       page_mask  = PAGE_MASK;
 193 int             page_shift = PAGE_SHIFT;
 194
 195 /*
 196  *      Resident page structures are initialized from
 197  *      a template (see vm_page_alloc).
 198  *
 199  *      When adding a new field to the virtual memory
 200  *      object structure, be sure to add initialization
 201  *      (see vm_page_bootstrap).
 202  */
 203 struct vm_page  vm_page_template;
 204
 205 vm_page_t       vm_pages = VM_PAGE_NULL;
 206 unsigned int    vm_pages_count = 0;
 207
 208 /*
 209  *      Resident pages that represent real memory
 210  *      are allocated from a set of free lists,
 211  *      one per color.
 212  */
 213 unsigned int    vm_colors;
 214 unsigned int    vm_color_mask;                  /* mask is == (vm_colors-1) */
 215 unsigned int    vm_cache_geometry_colors = 0;   /* set by hw dependent code during startup */
 216 queue_head_t    vm_page_queue_free[MAX_COLORS];
 217 vm_page_t       vm_page_queue_fictitious;
 218 unsigned int    vm_page_free_wanted;
 219 unsigned int    vm_page_free_wanted_privileged;
 220 unsigned int    vm_page_free_count;
 221 unsigned int    vm_page_fictitious_count;
 222
 223 unsigned int    vm_page_free_count_minimum;     /* debugging */
 224
 225 /*
 226  *      Occasionally, the virtual memory system uses
 227  *      resident page structures that do not refer to
 228  *      real pages, for example to leave a page with
 229  *      important state information in the VP table.
 230  *
 231  *      These page structures are allocated the way
 232  *      most other kernel structures are.
 233  */
 234 zone_t  vm_page_zone;
 235 decl_mutex_data(,vm_page_alloc_lock)
 236 unsigned int io_throttle_zero_fill;
 237
 238 /*
 239  *      Fictitious pages don't have a physical address,
 240  *      but we must initialize phys_page to something.
 241  *      For debugging, this should be a strange value
 242  *      that the pmap module can recognize in assertions.
 243  */
 244 vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
 245
 246 /*
 247  *      Guard pages are not accessible so they don't
 248  *      need a physical address, but we need to enter
 249  *      one in the pmap.
 250  *      Let's make it recognizable and make sure that
 251  *      we don't use a real physical page with that
 252  *      physical address.
 253  */
 254 vm_offset_t vm_page_guard_addr = (vm_offset_t) -2;
 255
 256 /*
 257  *      Resident page structures are also chained on
 258  *      queues that are used by the page replacement
 259  *      system (pageout daemon).  These queues are
 260  *      defined here, but are shared by the pageout
 261  *      module.  The inactive queue is broken into
 262  *      inactive and zf for convenience as the
 263  *      pageout daemon often assignes a higher
 264  *      affinity to zf pages
 265  */
 266 queue_head_t    vm_page_queue_active;
 267 queue_head_t    vm_page_queue_inactive;
 268 queue_head_t    vm_page_queue_zf;       /* inactive memory queue for zero fill */
 269
 270 unsigned int    vm_page_active_count;
 271 unsigned int    vm_page_inactive_count;
 272 unsigned int    vm_page_throttled_count;
 273 unsigned int    vm_page_speculative_count;
 274 unsigned int    vm_page_wire_count;
 275 unsigned int    vm_page_gobble_count = 0;
 276 unsigned int    vm_page_wire_count_warning = 0;
 277 unsigned int    vm_page_gobble_count_warning = 0;
 278
 279 unsigned int    vm_page_purgeable_count = 0; /* # of pages purgeable now */
 280 uint64_t        vm_page_purged_count = 0;    /* total count of purged pages */
 281
 282 unsigned int    vm_page_speculative_recreated = 0;
 283 unsigned int    vm_page_speculative_created = 0;
 284 unsigned int    vm_page_speculative_used = 0;
 285
 286 ppnum_t         vm_lopage_poolstart = 0;
 287 ppnum_t         vm_lopage_poolend = 0;
 288 int             vm_lopage_poolsize = 0;
 289 uint64_t        max_valid_dma_address = 0xffffffffffffffffULL;
 290
 291
 292 /*
 293  *      Several page replacement parameters are also
 294  *      shared with this module, so that page allocation
 295  *      (done here in vm_page_alloc) can trigger the
 296  *      pageout daemon.
 297  */
 298 unsigned int    vm_page_free_target = 0;
 299 unsigned int    vm_page_free_min = 0;
 300 unsigned int    vm_page_inactive_target = 0;
 301 unsigned int    vm_page_inactive_min = 0;
 302 unsigned int    vm_page_free_reserved = 0;
 303 unsigned int    vm_page_zfill_throttle_count = 0;
 304
 305 /*
 306  *      The VM system has a couple of heuristics for deciding
 307  *      that pages are "uninteresting" and should be placed
 308  *      on the inactive queue as likely candidates for replacement.
 309  *      These variables let the heuristics be controlled at run-time
 310  *      to make experimentation easier.
 311  */
 312
 313 boolean_t vm_page_deactivate_hint = TRUE;
 314
 315 /*
 316  *      vm_set_page_size:
 317  *
 318  *      Sets the page size, perhaps based upon the memory
 319  *      size.  Must be called before any use of page-size
 320  *      dependent functions.
 321  *
 322  *      Sets page_shift and page_mask from page_size.
 323  */
 324 void
 325 vm_set_page_size(void)
 326 {
 327         page_mask = page_size - 1;
 328
 329         if ((page_mask & page_size) != 0)
 330                 panic("vm_set_page_size: page size not a power of two");
 331
 332         for (page_shift = 0; ; page_shift++)
 333                 if ((1U << page_shift) == page_size)
 334                         break;
 335 }
 336
 337
 338 /* Called once during statup, once the cache geometry is known.
 339  */
 340 static void
 341 vm_page_set_colors( void )
 342 {
 343         unsigned int    n, override;
 344
 345         if ( PE_parse_boot_arg("colors", &override) )           /* colors specified as a boot-arg? */
 346                 n = override;
 347         else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
 348                 n = vm_cache_geometry_colors;
 349         else    n = DEFAULT_COLORS;                             /* use default if all else fails */
 350
 351         if ( n == 0 )
 352                 n = 1;
 353         if ( n > MAX_COLORS )
 354                 n = MAX_COLORS;
 355
 356         /* the count must be a power of 2  */
 357         if ( ( n & (n - 1)) !=0  )
 358                 panic("vm_page_set_colors");
 359
 360         vm_colors = n;
 361         vm_color_mask = n - 1;
 362 }
 363
 364
 365 /*
 366  *      vm_page_bootstrap:
 367  *
 368  *      Initializes the resident memory module.
 369  *
 370  *      Allocates memory for the page cells, and
 371  *      for the object/offset-to-page hash table headers.
 372  *      Each page cell is initialized and placed on the free list.
 373  *      Returns the range of available kernel virtual memory.
 374  */
 375
 376 void
 377 vm_page_bootstrap(
 378         vm_offset_t             *startp,
 379         vm_offset_t             *endp)
 380 {
 381         register vm_page_t      m;
 382         unsigned int            i;
 383         unsigned int            log1;
 384         unsigned int            log2;
 385         unsigned int            size;
 386
 387         /*
 388          *      Initialize the vm_page template.
 389          */
 390
 391         m = &vm_page_template;
 392         m->object = VM_OBJECT_NULL;             /* reset later */
 393         m->offset = (vm_object_offset_t) -1;    /* reset later */
 394         m->wire_count = 0;
 395
 396         m->pageq.next = NULL;
 397         m->pageq.prev = NULL;
 398         m->listq.next = NULL;
 399         m->listq.prev = NULL;
 400
 401         m->speculative = FALSE;
 402         m->throttled = FALSE;
 403         m->inactive = FALSE;
 404         m->active = FALSE;
 405         m->no_cache = FALSE;
 406         m->laundry = FALSE;
 407         m->free = FALSE;
 408         m->pmapped = FALSE;
 409         m->reference = FALSE;
 410         m->pageout = FALSE;
 411         m->dump_cleaning = FALSE;
 412         m->list_req_pending = FALSE;
 413
 414         m->busy = TRUE;
 415         m->wanted = FALSE;
 416         m->tabled = FALSE;
 417         m->fictitious = FALSE;
 418         m->private = FALSE;
 419         m->absent = FALSE;
 420         m->error = FALSE;
 421         m->dirty = FALSE;
 422         m->cleaning = FALSE;
 423         m->precious = FALSE;
 424         m->clustered = FALSE;
 425         m->unusual = FALSE;
 426         m->restart = FALSE;
 427         m->zero_fill = FALSE;
 428         m->encrypted = FALSE;
 429         m->encrypted_cleaning = FALSE;
 430         m->deactivated = FALSE;
 431
 432         m->phys_page = 0;               /* reset later */
 433
 434         /*
 435          *      Initialize the page queues.
 436          */
 437
 438         mutex_init(&vm_page_queue_free_lock, 0);
 439         mutex_init(&vm_page_queue_lock, 0);
 440
 441         mutex_init(&vm_purgeable_queue_lock, 0);
 442
 443         for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
 444                 int group;
 445
 446                 purgeable_queues[i].token_q_head = 0;
 447                 purgeable_queues[i].token_q_tail = 0;
 448                 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
 449                         queue_init(&purgeable_queues[i].objq[group]);
 450
 451                 purgeable_queues[i].type = i;
 452                 purgeable_queues[i].new_pages = 0;
 453 #if MACH_ASSERT
 454                 purgeable_queues[i].debug_count_tokens = 0;
 455                 purgeable_queues[i].debug_count_objects = 0;
 456 #endif
 457         };
 458
 459         for (i = 0; i < MAX_COLORS; i++ )
 460                 queue_init(&vm_page_queue_free[i]);
 461         queue_init(&vm_lopage_queue_free);
 462         vm_page_queue_fictitious = VM_PAGE_NULL;
 463         queue_init(&vm_page_queue_active);
 464         queue_init(&vm_page_queue_inactive);
 465         queue_init(&vm_page_queue_throttled);
 466         queue_init(&vm_page_queue_zf);
 467
 468         for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
 469                 queue_init(&vm_page_queue_speculative[i].age_q);
 470
 471                 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
 472                 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
 473         }
 474         vm_page_free_wanted = 0;
 475         vm_page_free_wanted_privileged = 0;
 476
 477         vm_page_set_colors();
 478
 479
 480         /*
 481          *      Steal memory for the map and zone subsystems.
 482          */
 483
 484         vm_map_steal_memory();
 485         zone_steal_memory();
 486
 487         /*
 488          *      Allocate (and initialize) the virtual-to-physical
 489          *      table hash buckets.
 490          *
 491          *      The number of buckets should be a power of two to
 492          *      get a good hash function.  The following computation
 493          *      chooses the first power of two that is greater
 494          *      than the number of physical pages in the system.
 495          */
 496
 497         simple_lock_init(&vm_page_bucket_lock, 0);
 498
 499         if (vm_page_bucket_count == 0) {
 500                 unsigned int npages = pmap_free_pages();
 501
 502                 vm_page_bucket_count = 1;
 503                 while (vm_page_bucket_count < npages)
 504                         vm_page_bucket_count <<= 1;
 505         }
 506
 507         vm_page_hash_mask = vm_page_bucket_count - 1;
 508
 509         /*
 510          *      Calculate object shift value for hashing algorithm:
 511          *              O = log2(sizeof(struct vm_object))
 512          *              B = log2(vm_page_bucket_count)
 513          *              hash shifts the object left by
 514          *              B/2 - O
 515          */
 516         size = vm_page_bucket_count;
 517         for (log1 = 0; size > 1; log1++)
 518                 size /= 2;
 519         size = sizeof(struct vm_object);
 520         for (log2 = 0; size > 1; log2++)
 521                 size /= 2;
 522         vm_page_hash_shift = log1/2 - log2 + 1;
 523
 524         vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);           /* Get (ceiling of sqrt of table size) */
 525         vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);          /* Get (ceiling of quadroot of table size) */
 526         vm_page_bucket_hash |= 1;                                                       /* Set bit and add 1 - always must be 1 to insure unique series */
 527
 528         if (vm_page_hash_mask & vm_page_bucket_count)
 529                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
 530
 531         vm_page_buckets = (vm_page_bucket_t *)
 532                 pmap_steal_memory(vm_page_bucket_count *
 533                                   sizeof(vm_page_bucket_t));
 534
 535         for (i = 0; i < vm_page_bucket_count; i++) {
 536                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
 537
 538                 bucket->pages = VM_PAGE_NULL;
 539 #if     MACH_PAGE_HASH_STATS
 540                 bucket->cur_count = 0;
 541                 bucket->hi_count = 0;
 542 #endif /* MACH_PAGE_HASH_STATS */
 543         }
 544
 545         /*
 546          *      Machine-dependent code allocates the resident page table.
 547          *      It uses vm_page_init to initialize the page frames.
 548          *      The code also returns to us the virtual space available
 549          *      to the kernel.  We don't trust the pmap module
 550          *      to get the alignment right.
 551          */
 552
 553         pmap_startup(&virtual_space_start, &virtual_space_end);
 554         virtual_space_start = round_page(virtual_space_start);
 555         virtual_space_end = trunc_page(virtual_space_end);
 556
 557         *startp = virtual_space_start;
 558         *endp = virtual_space_end;
 559
 560         /*
 561          *      Compute the initial "wire" count.
 562          *      Up until now, the pages which have been set aside are not under
 563          *      the VM system's control, so although they aren't explicitly
 564          *      wired, they nonetheless can't be moved. At this moment,
 565          *      all VM managed pages are "free", courtesy of pmap_startup.
 566          */
 567         vm_page_wire_count = atop_64(max_mem) - vm_page_free_count;     /* initial value */
 568         vm_page_free_count_minimum = vm_page_free_count;
 569
 570         printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 571                vm_page_free_count, vm_page_wire_count);
 572
 573         simple_lock_init(&vm_paging_lock, 0);
 574 }
 575
 576 #ifndef MACHINE_PAGES
 577 /*
 578  *      We implement pmap_steal_memory and pmap_startup with the help
 579  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
 580  */
 581
 582 void *
 583 pmap_steal_memory(
 584         vm_size_t size)
 585 {
 586         vm_offset_t addr, vaddr;
 587         ppnum_t phys_page;
 588
 589         /*
 590          *      We round the size to a round multiple.
 591          */
 592
 593         size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
 594
 595         /*
 596          *      If this is the first call to pmap_steal_memory,
 597          *      we have to initialize ourself.
 598          */
 599
 600         if (virtual_space_start == virtual_space_end) {
 601                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 602
 603                 /*
 604                  *      The initial values must be aligned properly, and
 605                  *      we don't trust the pmap module to do it right.
 606                  */
 607
 608                 virtual_space_start = round_page(virtual_space_start);
 609                 virtual_space_end = trunc_page(virtual_space_end);
 610         }
 611
 612         /*
 613          *      Allocate virtual memory for this request.
 614          */
 615
 616         addr = virtual_space_start;
 617         virtual_space_start += size;
 618
 619         kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr, virtual_space_start, size);        /* (TEST/DEBUG) */
 620
 621         /*
 622          *      Allocate and map physical pages to back new virtual pages.
 623          */
 624
 625         for (vaddr = round_page(addr);
 626              vaddr < addr + size;
 627              vaddr += PAGE_SIZE) {
 628                 if (!pmap_next_page(&phys_page))
 629                         panic("pmap_steal_memory");
 630
 631                 /*
 632                  *      XXX Logically, these mappings should be wired,
 633                  *      but some pmap modules barf if they are.
 634                  */
 635
 636                 pmap_enter(kernel_pmap, vaddr, phys_page,
 637                            VM_PROT_READ|VM_PROT_WRITE,
 638                                 VM_WIMG_USE_DEFAULT, FALSE);
 639                 /*
 640                  * Account for newly stolen memory
 641                  */
 642                 vm_page_wire_count++;
 643
 644         }
 645
 646         return (void *) addr;
 647 }
 648
 649 void
 650 pmap_startup(
 651         vm_offset_t *startp,
 652         vm_offset_t *endp)
 653 {
 654         unsigned int i, npages, pages_initialized, fill, fillval;
 655         ppnum_t         phys_page;
 656         addr64_t        tmpaddr;
 657         unsigned int    num_of_lopages = 0;
 658         unsigned int    last_index;
 659
 660         /*
 661          *      We calculate how many page frames we will have
 662          *      and then allocate the page structures in one chunk.
 663          */
 664
 665         tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;    /* Get the amount of memory left */
 666         tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start);       /* Account for any slop */
 667         npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));   /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
 668
 669         vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
 670
 671         /*
 672          *      Initialize the page frames.
 673          */
 674         for (i = 0, pages_initialized = 0; i < npages; i++) {
 675                 if (!pmap_next_page(&phys_page))
 676                         break;
 677
 678                 vm_page_init(&vm_pages[i], phys_page);
 679                 vm_page_pages++;
 680                 pages_initialized++;
 681         }
 682         vm_pages_count = pages_initialized;
 683
 684         /*
 685          * Check if we want to initialize pages to a known value
 686          */
 687         fill = 0;                                                               /* Assume no fill */
 688         if (PE_parse_boot_arg("fill", &fillval)) fill = 1;                      /* Set fill */
 689
 690
 691         /*
 692          * if vm_lopage_poolsize is non-zero, than we need to reserve
 693          * a pool of pages whose addresess are less than 4G... this pool
 694          * is used by drivers whose hardware can't DMA beyond 32 bits...
 695          *
 696          * note that I'm assuming that the page list is ascending and
 697          * ordered w/r to the physical address
 698          */
 699         for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
 700                 vm_page_t m;
 701
 702                 m = &vm_pages[i];
 703
 704                 if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
 705                         panic("couldn't reserve the lopage pool: not enough lo pages\n");
 706
 707                 if (m->phys_page < vm_lopage_poolend)
 708                         panic("couldn't reserve the lopage pool: page list out of order\n");
 709
 710                 vm_lopage_poolend = m->phys_page;
 711
 712                 if (vm_lopage_poolstart == 0)
 713                         vm_lopage_poolstart = m->phys_page;
 714                 else {
 715                         if (m->phys_page < vm_lopage_poolstart)
 716                                 panic("couldn't reserve the lopage pool: page list out of order\n");
 717                 }
 718
 719                 if (fill)
 720                         fillPage(m->phys_page, fillval);                /* Fill the page with a know value if requested at boot */
 721
 722                 vm_page_release(m);
 723         }
 724         last_index = i;
 725
 726         // -debug code remove
 727         if (2 == vm_himemory_mode) {
 728                 // free low -> high so high is preferred
 729                 for (i = last_index + 1; i <= pages_initialized; i++) {
 730                         if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 731                         vm_page_release(&vm_pages[i - 1]);
 732                 }
 733         }
 734         else
 735         // debug code remove-
 736
 737         /*
 738          * Release pages in reverse order so that physical pages
 739          * initially get allocated in ascending addresses. This keeps
 740          * the devices (which must address physical memory) happy if
 741          * they require several consecutive pages.
 742          */
 743         for (i = pages_initialized; i > last_index; i--) {
 744                 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);          /* Fill the page with a know value if requested at boot */
 745                 vm_page_release(&vm_pages[i - 1]);
 746         }
 747
 748 #if 0
 749         {
 750                 vm_page_t xx, xxo, xxl;
 751                 int i, j, k, l;
 752
 753                 j = 0;                                                                                                  /* (BRINGUP) */
 754                 xxl = 0;
 755
 756                 for( i = 0; i < vm_colors; i++ ) {
 757                         queue_iterate(&vm_page_queue_free[i],
 758                                       xx,
 759                                       vm_page_t,
 760                                       pageq) {  /* BRINGUP */
 761                                 j++;                                                                                            /* (BRINGUP) */
 762                                 if(j > vm_page_free_count) {                                            /* (BRINGUP) */
 763                                         panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
 764                                 }
 765
 766                                 l = vm_page_free_count - j;                                                     /* (BRINGUP) */
 767                                 k = 0;                                                                                          /* (BRINGUP) */
 768
 769                                 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
 770
 771                                 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {       /* (BRINGUP) */
 772                                         k++;
 773                                         if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
 774                                         if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {     /* (BRINGUP) */
 775                                                 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
 776                                         }
 777                                 }
 778
 779                                 xxl = xx;
 780                         }
 781                 }
 782
 783                 if(j != vm_page_free_count) {                                           /* (BRINGUP) */
 784                         panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
 785                 }
 786         }
 787 #endif
 788
 789
 790         /*
 791          *      We have to re-align virtual_space_start,
 792          *      because pmap_steal_memory has been using it.
 793          */
 794
 795         virtual_space_start = round_page_32(virtual_space_start);
 796
 797         *startp = virtual_space_start;
 798         *endp = virtual_space_end;
 799 }
 800 #endif  /* MACHINE_PAGES */
 801
 802 /*
 803  *      Routine:        vm_page_module_init
 804  *      Purpose:
 805  *              Second initialization pass, to be done after
 806  *              the basic VM system is ready.
 807  */
 808 void
 809 vm_page_module_init(void)
 810 {
 811         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
 812                              0, PAGE_SIZE, "vm pages");
 813
 814 #if     ZONE_DEBUG
 815         zone_debug_disable(vm_page_zone);
 816 #endif  /* ZONE_DEBUG */
 817
 818         zone_change(vm_page_zone, Z_EXPAND, FALSE);
 819         zone_change(vm_page_zone, Z_EXHAUST, TRUE);
 820         zone_change(vm_page_zone, Z_FOREIGN, TRUE);
 821
 822         /*
 823          * Adjust zone statistics to account for the real pages allocated
 824          * in vm_page_create(). [Q: is this really what we want?]
 825          */
 826         vm_page_zone->count += vm_page_pages;
 827         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
 828
 829         mutex_init(&vm_page_alloc_lock, 0);
 830 }
 831
 832 /*
 833  *      Routine:        vm_page_create
 834  *      Purpose:
 835  *              After the VM system is up, machine-dependent code
 836  *              may stumble across more physical memory.  For example,
 837  *              memory that it was reserving for a frame buffer.
 838  *              vm_page_create turns this memory into available pages.
 839  */
 840
 841 void
 842 vm_page_create(
 843         ppnum_t start,
 844         ppnum_t end)
 845 {
 846         ppnum_t         phys_page;
 847         vm_page_t       m;
 848
 849         for (phys_page = start;
 850              phys_page < end;
 851              phys_page++) {
 852                 while ((m = (vm_page_t) vm_page_grab_fictitious())
 853                         == VM_PAGE_NULL)
 854                         vm_page_more_fictitious();
 855
 856                 vm_page_init(m, phys_page);
 857                 vm_page_pages++;
 858                 vm_page_release(m);
 859         }
 860 }
 861
 862 /*
 863  *      vm_page_hash:
 864  *
 865  *      Distributes the object/offset key pair among hash buckets.
 866  *
 867  *      NOTE:   The bucket count must be a power of 2
 868  */
 869 #define vm_page_hash(object, offset) (\
 870         ( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
 871          & vm_page_hash_mask)
 872
 873
 874 /*
 875  *      vm_page_insert:         [ internal use only ]
 876  *
 877  *      Inserts the given mem entry into the object/object-page
 878  *      table and object list.
 879  *
 880  *      The object must be locked.
 881  */
 882 void
 883 vm_page_insert(
 884         vm_page_t               mem,
 885         vm_object_t             object,
 886         vm_object_offset_t      offset)
 887 {
 888         vm_page_insert_internal(mem, object, offset, FALSE);
 889 }
 890
 891
 892 static void
 893 vm_page_insert_internal(
 894         vm_page_t               mem,
 895         vm_object_t             object,
 896         vm_object_offset_t      offset,
 897         boolean_t       queues_lock_held)
 898 {
 899         register vm_page_bucket_t *bucket;
 900
 901         XPR(XPR_VM_PAGE,
 902                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
 903                 (integer_t)object, (integer_t)offset, (integer_t)mem, 0,0);
 904
 905         VM_PAGE_CHECK(mem);
 906
 907         if (object == vm_submap_object) {
 908                 /* the vm_submap_object is only a placeholder for submaps */
 909                 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
 910         }
 911
 912         vm_object_lock_assert_exclusive(object);
 913 #if DEBUG
 914         if (mem->tabled || mem->object != VM_OBJECT_NULL)
 915                 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
 916                       "already in (obj=%p,off=0x%llx)",
 917                       mem, object, offset, mem->object, mem->offset);
 918 #endif
 919         assert(!object->internal || offset < object->size);
 920
 921         /* only insert "pageout" pages into "pageout" objects,
 922          * and normal pages into normal objects */
 923         assert(object->pageout == mem->pageout);
 924
 925         assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
 926
 927         /*
 928          *      Record the object/offset pair in this page
 929          */
 930
 931         mem->object = object;
 932         mem->offset = offset;
 933
 934         /*
 935          *      Insert it into the object_object/offset hash table
 936          */
 937
 938         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
 939         simple_lock(&vm_page_bucket_lock);
 940         mem->next = bucket->pages;
 941         bucket->pages = mem;
 942 #if     MACH_PAGE_HASH_STATS
 943         if (++bucket->cur_count > bucket->hi_count)
 944                 bucket->hi_count = bucket->cur_count;
 945 #endif /* MACH_PAGE_HASH_STATS */
 946         simple_unlock(&vm_page_bucket_lock);
 947
 948         /*
 949          *      Now link into the object's list of backed pages.
 950          */
 951
 952         VM_PAGE_INSERT(mem, object);
 953         mem->tabled = TRUE;
 954
 955         /*
 956          *      Show that the object has one more resident page.
 957          */
 958
 959         object->resident_page_count++;
 960
 961         if (object->purgable == VM_PURGABLE_VOLATILE ||
 962             object->purgable == VM_PURGABLE_EMPTY) {
 963                 if (queues_lock_held == FALSE)
 964                         vm_page_lockspin_queues();
 965
 966                 vm_page_purgeable_count++;
 967
 968                 if (queues_lock_held == FALSE)
 969                         vm_page_unlock_queues();
 970         }
 971 }
 972
 973 /*
 974  *      vm_page_replace:
 975  *
 976  *      Exactly like vm_page_insert, except that we first
 977  *      remove any existing page at the given offset in object.
 978  *
 979  *      The object and page queues must be locked.
 980  */
 981
 982 void
 983 vm_page_replace(
 984         register vm_page_t              mem,
 985         register vm_object_t            object,
 986         register vm_object_offset_t     offset)
 987 {
 988         vm_page_bucket_t *bucket;
 989         vm_page_t        found_m = VM_PAGE_NULL;
 990
 991         VM_PAGE_CHECK(mem);
 992         vm_object_lock_assert_exclusive(object);
 993 #if DEBUG
 994         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
 995
 996         if (mem->tabled || mem->object != VM_OBJECT_NULL)
 997                 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
 998                       "already in (obj=%p,off=0x%llx)",
 999                       mem, object, offset, mem->object, mem->offset);
1000 #endif
1001         /*
1002          *      Record the object/offset pair in this page
1003          */
1004
1005         mem->object = object;
1006         mem->offset = offset;
1007
1008         /*
1009          *      Insert it into the object_object/offset hash table,
1010          *      replacing any page that might have been there.
1011          */
1012
1013         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1014         simple_lock(&vm_page_bucket_lock);
1015
1016         if (bucket->pages) {
1017                 vm_page_t *mp = &bucket->pages;
1018                 register vm_page_t m = *mp;
1019
1020                 do {
1021                         if (m->object == object && m->offset == offset) {
1022                                 /*
1023                                  * Remove old page from hash list
1024                                  */
1025                                 *mp = m->next;
1026
1027                                 found_m = m;
1028                                 break;
1029                         }
1030                         mp = &m->next;
1031                 } while ((m = *mp));
1032
1033                 mem->next = bucket->pages;
1034         } else {
1035                 mem->next = VM_PAGE_NULL;
1036         }
1037         /*
1038          * insert new page at head of hash list
1039          */
1040         bucket->pages = mem;
1041
1042         simple_unlock(&vm_page_bucket_lock);
1043
1044         if (found_m) {
1045                 /*
1046                  * there was already a page at the specified
1047                  * offset for this object... remove it from
1048                  * the object and free it back to the free list
1049                  */
1050                 VM_PAGE_REMOVE(found_m);
1051                 found_m->tabled = FALSE;
1052
1053                 found_m->object = VM_OBJECT_NULL;
1054                 found_m->offset = (vm_object_offset_t) -1;
1055                 object->resident_page_count--;
1056
1057                 if (object->purgable == VM_PURGABLE_VOLATILE ||
1058                     object->purgable == VM_PURGABLE_EMPTY) {
1059                         assert(vm_page_purgeable_count > 0);
1060                         vm_page_purgeable_count--;
1061                 }
1062
1063                 /*
1064                  * Return page to the free list.
1065                  * Note the page is not tabled now
1066                  */
1067                 vm_page_free(found_m);
1068         }
1069         /*
1070          *      Now link into the object's list of backed pages.
1071          */
1072
1073         VM_PAGE_INSERT(mem, object);
1074         mem->tabled = TRUE;
1075
1076         /*
1077          *      And show that the object has one more resident
1078          *      page.
1079          */
1080
1081         object->resident_page_count++;
1082
1083         if (object->purgable == VM_PURGABLE_VOLATILE ||
1084             object->purgable == VM_PURGABLE_EMPTY) {
1085                 vm_page_purgeable_count++;
1086         }
1087 }
1088
1089 /*
1090  *      vm_page_remove:         [ internal use only ]
1091  *
1092  *      Removes the given mem entry from the object/offset-page
1093  *      table and the object page list.
1094  *
1095  *      The object and page queues must be locked.
1096  */
1097
1098 void
1099 vm_page_remove(
1100         register vm_page_t      mem)
1101 {
1102         register vm_page_bucket_t       *bucket;
1103         register vm_page_t      this;
1104
1105         XPR(XPR_VM_PAGE,
1106                 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1107                 (integer_t)mem->object, (integer_t)mem->offset,
1108                 (integer_t)mem, 0,0);
1109 #if DEBUG
1110         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1111 #endif
1112         vm_object_lock_assert_exclusive(mem->object);
1113         assert(mem->tabled);
1114         assert(!mem->cleaning);
1115         VM_PAGE_CHECK(mem);
1116
1117
1118         /*
1119          *      Remove from the object_object/offset hash table
1120          */
1121
1122         bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
1123         simple_lock(&vm_page_bucket_lock);
1124         if ((this = bucket->pages) == mem) {
1125                 /* optimize for common case */
1126
1127                 bucket->pages = mem->next;
1128         } else {
1129                 register vm_page_t      *prev;
1130
1131                 for (prev = &this->next;
1132                      (this = *prev) != mem;
1133                      prev = &this->next)
1134                         continue;
1135                 *prev = this->next;
1136         }
1137 #if     MACH_PAGE_HASH_STATS
1138         bucket->cur_count--;
1139 #endif /* MACH_PAGE_HASH_STATS */
1140         simple_unlock(&vm_page_bucket_lock);
1141
1142         /*
1143          *      Now remove from the object's list of backed pages.
1144          */
1145
1146         VM_PAGE_REMOVE(mem);
1147
1148         /*
1149          *      And show that the object has one fewer resident
1150          *      page.
1151          */
1152
1153         mem->object->resident_page_count--;
1154
1155         if (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1156             mem->object->purgable == VM_PURGABLE_EMPTY) {
1157                 assert(vm_page_purgeable_count > 0);
1158                 vm_page_purgeable_count--;
1159         }
1160         mem->tabled = FALSE;
1161         mem->object = VM_OBJECT_NULL;
1162         mem->offset = (vm_object_offset_t) -1;
1163 }
1164
1165 /*
1166  *      vm_page_lookup:
1167  *
1168  *      Returns the page associated with the object/offset
1169  *      pair specified; if none is found, VM_PAGE_NULL is returned.
1170  *
1171  *      The object must be locked.  No side effects.
1172  */
1173
1174 unsigned long vm_page_lookup_hint = 0;
1175 unsigned long vm_page_lookup_hint_next = 0;
1176 unsigned long vm_page_lookup_hint_prev = 0;
1177 unsigned long vm_page_lookup_hint_miss = 0;
1178 unsigned long vm_page_lookup_bucket_NULL = 0;
1179 unsigned long vm_page_lookup_miss = 0;
1180
1181
1182 vm_page_t
1183 vm_page_lookup(
1184         register vm_object_t            object,
1185         register vm_object_offset_t     offset)
1186 {
1187         register vm_page_t      mem;
1188         register vm_page_bucket_t *bucket;
1189         queue_entry_t           qe;
1190
1191         vm_object_lock_assert_held(object);
1192         mem = object->memq_hint;
1193
1194         if (mem != VM_PAGE_NULL) {
1195                 assert(mem->object == object);
1196
1197                 if (mem->offset == offset) {
1198                         vm_page_lookup_hint++;
1199                         return mem;
1200                 }
1201                 qe = queue_next(&mem->listq);
1202
1203                 if (! queue_end(&object->memq, qe)) {
1204                         vm_page_t       next_page;
1205
1206                         next_page = (vm_page_t) qe;
1207                         assert(next_page->object == object);
1208
1209                         if (next_page->offset == offset) {
1210                                 vm_page_lookup_hint_next++;
1211                                 object->memq_hint = next_page; /* new hint */
1212                                 return next_page;
1213                         }
1214                 }
1215                 qe = queue_prev(&mem->listq);
1216
1217                 if (! queue_end(&object->memq, qe)) {
1218                         vm_page_t prev_page;
1219
1220                         prev_page = (vm_page_t) qe;
1221                         assert(prev_page->object == object);
1222
1223                         if (prev_page->offset == offset) {
1224                                 vm_page_lookup_hint_prev++;
1225                                 object->memq_hint = prev_page; /* new hint */
1226                                 return prev_page;
1227                         }
1228                 }
1229         }
1230         /*
1231          * Search the hash table for this object/offset pair
1232          */
1233         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1234
1235         /*
1236          * since we hold the object lock, we are guaranteed that no
1237          * new pages can be inserted into this object... this in turn
1238          * guarantess that the page we're looking for can't exist
1239          * if the bucket it hashes to is currently NULL even when looked
1240          * at outside the scope of the hash bucket lock... this is a
1241          * really cheap optimiztion to avoid taking the lock
1242          */
1243         if (bucket->pages == VM_PAGE_NULL) {
1244                 vm_page_lookup_bucket_NULL++;
1245
1246                 return (VM_PAGE_NULL);
1247         }
1248         simple_lock(&vm_page_bucket_lock);
1249
1250         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1251                 VM_PAGE_CHECK(mem);
1252                 if ((mem->object == object) && (mem->offset == offset))
1253                         break;
1254         }
1255         simple_unlock(&vm_page_bucket_lock);
1256
1257         if (mem != VM_PAGE_NULL) {
1258                 if (object->memq_hint != VM_PAGE_NULL) {
1259                         vm_page_lookup_hint_miss++;
1260                 }
1261                 assert(mem->object == object);
1262                 object->memq_hint = mem;
1263         } else
1264                 vm_page_lookup_miss++;
1265
1266         return(mem);
1267 }
1268
1269
1270 /*
1271  *      vm_page_rename:
1272  *
1273  *      Move the given memory entry from its
1274  *      current object to the specified target object/offset.
1275  *
1276  *      The object must be locked.
1277  */
1278 void
1279 vm_page_rename(
1280         register vm_page_t              mem,
1281         register vm_object_t            new_object,
1282         vm_object_offset_t              new_offset,
1283         boolean_t                       encrypted_ok)
1284 {
1285         assert(mem->object != new_object);
1286
1287         /*
1288          * ENCRYPTED SWAP:
1289          * The encryption key is based on the page's memory object
1290          * (aka "pager") and paging offset.  Moving the page to
1291          * another VM object changes its "pager" and "paging_offset"
1292          * so it has to be decrypted first, or we would lose the key.
1293          *
1294          * One exception is VM object collapsing, where we transfer pages
1295          * from one backing object to its parent object.  This operation also
1296          * transfers the paging information, so the <pager,paging_offset> info
1297          * should remain consistent.  The caller (vm_object_do_collapse())
1298          * sets "encrypted_ok" in this case.
1299          */
1300         if (!encrypted_ok && mem->encrypted) {
1301                 panic("vm_page_rename: page %p is encrypted\n", mem);
1302         }
1303
1304         /*
1305          *      Changes to mem->object require the page lock because
1306          *      the pageout daemon uses that lock to get the object.
1307          */
1308
1309         XPR(XPR_VM_PAGE,
1310                 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1311                 (integer_t)new_object, (integer_t)new_offset,
1312                 (integer_t)mem, 0,0);
1313
1314         vm_page_lockspin_queues();
1315         vm_page_remove(mem);
1316         vm_page_insert(mem, new_object, new_offset);
1317         vm_page_unlock_queues();
1318 }
1319
1320 /*
1321  *      vm_page_init:
1322  *
1323  *      Initialize the fields in a new page.
1324  *      This takes a structure with random values and initializes it
1325  *      so that it can be given to vm_page_release or vm_page_insert.
1326  */
1327 void
1328 vm_page_init(
1329         vm_page_t       mem,
1330         ppnum_t phys_page)
1331 {
1332         assert(phys_page);
1333         *mem = vm_page_template;
1334         mem->phys_page = phys_page;
1335 }
1336
1337 /*
1338  *      vm_page_grab_fictitious:
1339  *
1340  *      Remove a fictitious page from the free list.
1341  *      Returns VM_PAGE_NULL if there are no free pages.
1342  */
1343 int     c_vm_page_grab_fictitious = 0;
1344 int     c_vm_page_release_fictitious = 0;
1345 int     c_vm_page_more_fictitious = 0;
1346
1347 extern vm_page_t vm_page_grab_fictitious_common(vm_offset_t phys_addr);
1348
1349 vm_page_t
1350 vm_page_grab_fictitious_common(
1351         vm_offset_t phys_addr)
1352 {
1353         register vm_page_t m;
1354
1355         m = (vm_page_t)zget(vm_page_zone);
1356         if (m) {
1357                 vm_page_init(m, phys_addr);
1358                 m->fictitious = TRUE;
1359         }
1360
1361         c_vm_page_grab_fictitious++;
1362         return m;
1363 }
1364
1365 vm_page_t
1366 vm_page_grab_fictitious(void)
1367 {
1368         return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1369 }
1370
1371 vm_page_t
1372 vm_page_grab_guard(void)
1373 {
1374         return vm_page_grab_fictitious_common(vm_page_guard_addr);
1375 }
1376
1377 /*
1378  *      vm_page_release_fictitious:
1379  *
1380  *      Release a fictitious page to the free list.
1381  */
1382
1383 void
1384 vm_page_release_fictitious(
1385         register vm_page_t m)
1386 {
1387         assert(!m->free);
1388         assert(m->busy);
1389         assert(m->fictitious);
1390         assert(m->phys_page == vm_page_fictitious_addr ||
1391                m->phys_page == vm_page_guard_addr);
1392
1393         c_vm_page_release_fictitious++;
1394 #if DEBUG
1395         if (m->free)
1396                 panic("vm_page_release_fictitious");
1397 #endif
1398         m->free = TRUE;
1399         zfree(vm_page_zone, m);
1400 }
1401
1402 /*
1403  *      vm_page_more_fictitious:
1404  *
1405  *      Add more fictitious pages to the free list.
1406  *      Allowed to block. This routine is way intimate
1407  *      with the zones code, for several reasons:
1408  *      1. we need to carve some page structures out of physical
1409  *         memory before zones work, so they _cannot_ come from
1410  *         the zone_map.
1411  *      2. the zone needs to be collectable in order to prevent
1412  *         growth without bound. These structures are used by
1413  *         the device pager (by the hundreds and thousands), as
1414  *         private pages for pageout, and as blocking pages for
1415  *         pagein. Temporary bursts in demand should not result in
1416  *         permanent allocation of a resource.
1417  *      3. To smooth allocation humps, we allocate single pages
1418  *         with kernel_memory_allocate(), and cram them into the
1419  *         zone. This also allows us to initialize the vm_page_t's
1420  *         on the way into the zone, so that zget() always returns
1421  *         an initialized structure. The zone free element pointer
1422  *         and the free page pointer are both the first item in the
1423  *         vm_page_t.
1424  *      4. By having the pages in the zone pre-initialized, we need
1425  *         not keep 2 levels of lists. The garbage collector simply
1426  *         scans our list, and reduces physical memory usage as it
1427  *         sees fit.
1428  */
1429
1430 void vm_page_more_fictitious(void)
1431 {
1432         register vm_page_t m;
1433         vm_offset_t addr;
1434         kern_return_t retval;
1435         int i;
1436
1437         c_vm_page_more_fictitious++;
1438
1439         /*
1440          * Allocate a single page from the zone_map. Do not wait if no physical
1441          * pages are immediately available, and do not zero the space. We need
1442          * our own blocking lock here to prevent having multiple,
1443          * simultaneous requests from piling up on the zone_map lock. Exactly
1444          * one (of our) threads should be potentially waiting on the map lock.
1445          * If winner is not vm-privileged, then the page allocation will fail,
1446          * and it will temporarily block here in the vm_page_wait().
1447          */
1448         mutex_lock(&vm_page_alloc_lock);
1449         /*
1450          * If another thread allocated space, just bail out now.
1451          */
1452         if (zone_free_count(vm_page_zone) > 5) {
1453                 /*
1454                  * The number "5" is a small number that is larger than the
1455                  * number of fictitious pages that any single caller will
1456                  * attempt to allocate. Otherwise, a thread will attempt to
1457                  * acquire a fictitious page (vm_page_grab_fictitious), fail,
1458                  * release all of the resources and locks already acquired,
1459                  * and then call this routine. This routine finds the pages
1460                  * that the caller released, so fails to allocate new space.
1461                  * The process repeats infinitely. The largest known number
1462                  * of fictitious pages required in this manner is 2. 5 is
1463                  * simply a somewhat larger number.
1464                  */
1465                 mutex_unlock(&vm_page_alloc_lock);
1466                 return;
1467         }
1468
1469         retval = kernel_memory_allocate(zone_map,
1470                                         &addr, PAGE_SIZE, VM_PROT_ALL,
1471                                         KMA_KOBJECT|KMA_NOPAGEWAIT);
1472         if (retval != KERN_SUCCESS) {
1473                 /*
1474                  * No page was available. Tell the pageout daemon, drop the
1475                  * lock to give another thread a chance at it, and
1476                  * wait for the pageout daemon to make progress.
1477                  */
1478                 mutex_unlock(&vm_page_alloc_lock);
1479                 vm_page_wait(THREAD_UNINT);
1480                 return;
1481         }
1482         /*
1483          * Initialize as many vm_page_t's as will fit on this page. This
1484          * depends on the zone code disturbing ONLY the first item of
1485          * each zone element.
1486          */
1487         m = (vm_page_t)addr;
1488         for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1489                 vm_page_init(m, vm_page_fictitious_addr);
1490                 m->fictitious = TRUE;
1491                 m++;
1492         }
1493         zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1494         mutex_unlock(&vm_page_alloc_lock);
1495 }
1496
1497
1498 /*
1499  *      vm_pool_low():
1500  *
1501  *      Return true if it is not likely that a non-vm_privileged thread
1502  *      can get memory without blocking.  Advisory only, since the
1503  *      situation may change under us.
1504  */
1505 int
1506 vm_pool_low(void)
1507 {
1508         /* No locking, at worst we will fib. */
1509         return( vm_page_free_count < vm_page_free_reserved );
1510 }
1511
1512
1513
1514 /*
1515  * this is an interface to support bring-up of drivers
1516  * on platforms with physical memory > 4G...
1517  */
1518 int             vm_himemory_mode = 0;
1519
1520
1521 /*
1522  * this interface exists to support hardware controllers
1523  * incapable of generating DMAs with more than 32 bits
1524  * of address on platforms with physical memory > 4G...
1525  */
1526 unsigned int    vm_lopage_free_count = 0;
1527 unsigned int    vm_lopage_max_count = 0;
1528 queue_head_t    vm_lopage_queue_free;
1529
1530 vm_page_t
1531 vm_page_grablo(void)
1532 {
1533         register vm_page_t      mem;
1534         unsigned int vm_lopage_alloc_count;
1535
1536         if (vm_lopage_poolsize == 0)
1537                 return (vm_page_grab());
1538
1539         mutex_lock(&vm_page_queue_free_lock);
1540
1541         if (! queue_empty(&vm_lopage_queue_free)) {
1542                 queue_remove_first(&vm_lopage_queue_free,
1543                                    mem,
1544                                    vm_page_t,
1545                                    pageq);
1546                 assert(mem->free);
1547                 assert(mem->busy);
1548                 assert(!mem->pmapped);
1549
1550                 mem->pageq.next = NULL;
1551                 mem->pageq.prev = NULL;
1552                 mem->free = FALSE;
1553
1554                 vm_lopage_free_count--;
1555                 vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
1556                 if (vm_lopage_alloc_count > vm_lopage_max_count)
1557                         vm_lopage_max_count = vm_lopage_alloc_count;
1558         } else {
1559                 mem = VM_PAGE_NULL;
1560         }
1561         mutex_unlock(&vm_page_queue_free_lock);
1562
1563         return (mem);
1564 }
1565
1566
1567 /*
1568  *      vm_page_grab:
1569  *
1570  *      first try to grab a page from the per-cpu free list...
1571  *      this must be done while pre-emption is disabled... if
1572  *      a page is available, we're done...
1573  *      if no page is available, grab the vm_page_queue_free_lock
1574  *      and see if current number of free pages would allow us
1575  *      to grab at least 1... if not, return VM_PAGE_NULL as before...
1576  *      if there are pages available, disable preemption and
1577  *      recheck the state of the per-cpu free list... we could
1578  *      have been preempted and moved to a different cpu, or
1579  *      some other thread could have re-filled it... if still
1580  *      empty, figure out how many pages we can steal from the
1581  *      global free queue and move to the per-cpu queue...
1582  *      return 1 of these pages when done... only wakeup the
1583  *      pageout_scan thread if we moved pages from the global
1584  *      list... no need for the wakeup if we've satisfied the
1585  *      request from the per-cpu queue.
1586  */
1587
1588 #define COLOR_GROUPS_TO_STEAL   4
1589
1590
1591 vm_page_t
1592 vm_page_grab( void )
1593 {
1594         vm_page_t       mem;
1595
1596
1597         disable_preemption();
1598
1599         if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1600 return_page_from_cpu_list:
1601                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1602                 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1603                 mem->pageq.next = NULL;
1604
1605                 enable_preemption();
1606
1607                 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1608                 assert(mem->tabled == FALSE);
1609                 assert(mem->object == VM_OBJECT_NULL);
1610                 assert(!mem->laundry);
1611                 assert(!mem->free);
1612                 assert(pmap_verify_free(mem->phys_page));
1613                 assert(mem->busy);
1614                 assert(!mem->encrypted);
1615                 assert(!mem->pmapped);
1616
1617                 return mem;
1618         }
1619         enable_preemption();
1620
1621
1622         mutex_lock(&vm_page_queue_free_lock);
1623
1624         /*
1625          *      Optionally produce warnings if the wire or gobble
1626          *      counts exceed some threshold.
1627          */
1628         if (vm_page_wire_count_warning > 0
1629             && vm_page_wire_count >= vm_page_wire_count_warning) {
1630                 printf("mk: vm_page_grab(): high wired page count of %d\n",
1631                         vm_page_wire_count);
1632                 assert(vm_page_wire_count < vm_page_wire_count_warning);
1633         }
1634         if (vm_page_gobble_count_warning > 0
1635             && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1636                 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1637                         vm_page_gobble_count);
1638                 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1639         }
1640
1641         /*
1642          *      Only let privileged threads (involved in pageout)
1643          *      dip into the reserved pool.
1644          */
1645         if ((vm_page_free_count < vm_page_free_reserved) &&
1646             !(current_thread()->options & TH_OPT_VMPRIV)) {
1647                 mutex_unlock(&vm_page_queue_free_lock);
1648                 mem = VM_PAGE_NULL;
1649         }
1650         else {
1651                vm_page_t        head;
1652                vm_page_t        tail;
1653                unsigned int     pages_to_steal;
1654                unsigned int     color;
1655
1656                while ( vm_page_free_count == 0 ) {
1657
1658                         mutex_unlock(&vm_page_queue_free_lock);
1659                         /*
1660                          * must be a privileged thread to be
1661                          * in this state since a non-privileged
1662                          * thread would have bailed if we were
1663                          * under the vm_page_free_reserved mark
1664                          */
1665                         VM_PAGE_WAIT();
1666                         mutex_lock(&vm_page_queue_free_lock);
1667                 }
1668
1669                 disable_preemption();
1670
1671                 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1672                         mutex_unlock(&vm_page_queue_free_lock);
1673
1674                         /*
1675                          * we got preempted and moved to another processor
1676                          * or we got preempted and someone else ran and filled the cache
1677                          */
1678                         goto return_page_from_cpu_list;
1679                 }
1680                 if (vm_page_free_count <= vm_page_free_reserved)
1681                         pages_to_steal = 1;
1682                 else {
1683                         pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1684
1685                         if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1686                                 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1687                 }
1688                 color = PROCESSOR_DATA(current_processor(), start_color);
1689                 head = tail = NULL;
1690
1691                 while (pages_to_steal--) {
1692                         if (--vm_page_free_count < vm_page_free_count_minimum)
1693                                 vm_page_free_count_minimum = vm_page_free_count;
1694
1695                         while (queue_empty(&vm_page_queue_free[color]))
1696                                 color = (color + 1) & vm_color_mask;
1697
1698                         queue_remove_first(&vm_page_queue_free[color],
1699                                            mem,
1700                                            vm_page_t,
1701                                            pageq);
1702                         mem->pageq.next = NULL;
1703                         mem->pageq.prev = NULL;
1704
1705                         color = (color + 1) & vm_color_mask;
1706
1707                         if (head == NULL)
1708                                 head = mem;
1709                         else
1710                                 tail->pageq.next = (queue_t)mem;
1711                         tail = mem;
1712
1713                         mem->pageq.prev = NULL;
1714                         assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1715                         assert(mem->tabled == FALSE);
1716                         assert(mem->object == VM_OBJECT_NULL);
1717                         assert(!mem->laundry);
1718                         assert(mem->free);
1719                         mem->free = FALSE;
1720
1721                         assert(pmap_verify_free(mem->phys_page));
1722                         assert(mem->busy);
1723                         assert(!mem->free);
1724                         assert(!mem->encrypted);
1725                         assert(!mem->pmapped);
1726                 }
1727                 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1728                 PROCESSOR_DATA(current_processor(), start_color) = color;
1729
1730                 /*
1731                  * satisfy this request
1732                  */
1733                 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1734                 mem = head;
1735                 mem->pageq.next = NULL;
1736
1737                 mutex_unlock(&vm_page_queue_free_lock);
1738
1739                 enable_preemption();
1740         }
1741         /*
1742          *      Decide if we should poke the pageout daemon.
1743          *      We do this if the free count is less than the low
1744          *      water mark, or if the free count is less than the high
1745          *      water mark (but above the low water mark) and the inactive
1746          *      count is less than its target.
1747          *
1748          *      We don't have the counts locked ... if they change a little,
1749          *      it doesn't really matter.
1750          */
1751         if ((vm_page_free_count < vm_page_free_min) ||
1752             ((vm_page_free_count < vm_page_free_target) &&
1753              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1754                 thread_wakeup((event_t) &vm_page_free_wanted);
1755
1756 #if CONFIG_EMBEDDED
1757         {
1758         int     percent_avail;
1759
1760         /*
1761          * Decide if we need to poke the memorystatus notification thread.
1762          */
1763         percent_avail =
1764                 (vm_page_active_count + vm_page_inactive_count +
1765                  vm_page_speculative_count + vm_page_free_count +
1766                  vm_page_purgeable_count ) * 100 /
1767                 atop_64(max_mem);
1768         if (percent_avail <= (kern_memorystatus_level - 5)) {
1769                 kern_memorystatus_level = percent_avail;
1770                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1771         }
1772         }
1773 #endif
1774
1775 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);      /* (TEST/DEBUG) */
1776
1777         return mem;
1778 }
1779
1780 /*
1781  *      vm_page_release:
1782  *
1783  *      Return a page to the free list.
1784  */
1785
1786 void
1787 vm_page_release(
1788         register vm_page_t      mem)
1789 {
1790         unsigned int    color;
1791 #if 0
1792         unsigned int pindex;
1793         phys_entry *physent;
1794
1795         physent = mapping_phys_lookup(mem->phys_page, &pindex);         /* (BRINGUP) */
1796         if(physent->ppLink & ppN) {                                                                                     /* (BRINGUP) */
1797                 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1798         }
1799         physent->ppLink = physent->ppLink | ppN;                                                        /* (BRINGUP) */
1800 #endif
1801         assert(!mem->private && !mem->fictitious);
1802
1803 //      dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
1804
1805         mutex_lock(&vm_page_queue_free_lock);
1806 #if DEBUG
1807         if (mem->free)
1808                 panic("vm_page_release");
1809 #endif
1810         mem->free = TRUE;
1811
1812         assert(mem->busy);
1813         assert(!mem->laundry);
1814         assert(mem->object == VM_OBJECT_NULL);
1815         assert(mem->pageq.next == NULL &&
1816                mem->pageq.prev == NULL);
1817         assert(mem->listq.next == NULL &&
1818                mem->listq.prev == NULL);
1819
1820         if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
1821                 /*
1822                  * this exists to support hardware controllers
1823                  * incapable of generating DMAs with more than 32 bits
1824                  * of address on platforms with physical memory > 4G...
1825                  */
1826                 queue_enter_first(&vm_lopage_queue_free,
1827                                   mem,
1828                                   vm_page_t,
1829                                   pageq);
1830                 vm_lopage_free_count++;
1831         } else {
1832                 color = mem->phys_page & vm_color_mask;
1833                 queue_enter_first(&vm_page_queue_free[color],
1834                                   mem,
1835                                   vm_page_t,
1836                                   pageq);
1837                 vm_page_free_count++;
1838                 /*
1839                  *      Check if we should wake up someone waiting for page.
1840                  *      But don't bother waking them unless they can allocate.
1841                  *
1842                  *      We wakeup only one thread, to prevent starvation.
1843                  *      Because the scheduling system handles wait queues FIFO,
1844                  *      if we wakeup all waiting threads, one greedy thread
1845                  *      can starve multiple niceguy threads.  When the threads
1846                  *      all wakeup, the greedy threads runs first, grabs the page,
1847                  *      and waits for another page.  It will be the first to run
1848                  *      when the next page is freed.
1849                  *
1850                  *      However, there is a slight danger here.
1851                  *      The thread we wake might not use the free page.
1852                  *      Then the other threads could wait indefinitely
1853                  *      while the page goes unused.  To forestall this,
1854                  *      the pageout daemon will keep making free pages
1855                  *      as long as vm_page_free_wanted is non-zero.
1856                  */
1857
1858                 if ((vm_page_free_wanted_privileged > 0) && vm_page_free_count) {
1859                         vm_page_free_wanted_privileged--;
1860                         thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
1861                 } else if ((vm_page_free_wanted > 0) &&
1862                            (vm_page_free_count >= vm_page_free_reserved)) {
1863                         vm_page_free_wanted--;
1864                         thread_wakeup_one((event_t) &vm_page_free_count);
1865                 }
1866         }
1867         mutex_unlock(&vm_page_queue_free_lock);
1868
1869 #if CONFIG_EMBEDDED
1870         {
1871         int     percent_avail;
1872
1873         /*
1874          * Decide if we need to poke the memorystatus notification thread.
1875          * Locking is not a big issue, as only a single thread delivers these.
1876          */
1877         percent_avail =
1878                 (vm_page_active_count + vm_page_inactive_count +
1879                  vm_page_speculative_count + vm_page_free_count +
1880                  vm_page_purgeable_count ) * 100 /
1881                 atop_64(max_mem);
1882         if (percent_avail >= (kern_memorystatus_level + 5)) {
1883                 kern_memorystatus_level = percent_avail;
1884                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1885         }
1886         }
1887 #endif
1888 }
1889
1890 /*
1891  *      vm_page_wait:
1892  *
1893  *      Wait for a page to become available.
1894  *      If there are plenty of free pages, then we don't sleep.
1895  *
1896  *      Returns:
1897  *              TRUE:  There may be another page, try again
1898  *              FALSE: We were interrupted out of our wait, don't try again
1899  */
1900
1901 boolean_t
1902 vm_page_wait(
1903         int     interruptible )
1904 {
1905         /*
1906          *      We can't use vm_page_free_reserved to make this
1907          *      determination.  Consider: some thread might
1908          *      need to allocate two pages.  The first allocation
1909          *      succeeds, the second fails.  After the first page is freed,
1910          *      a call to vm_page_wait must really block.
1911          */
1912         kern_return_t   wait_result;
1913         int             need_wakeup = 0;
1914         int             is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1915
1916         mutex_lock(&vm_page_queue_free_lock);
1917
1918         if (is_privileged && vm_page_free_count) {
1919                 mutex_unlock(&vm_page_queue_free_lock);
1920                 return TRUE;
1921         }
1922         if (vm_page_free_count < vm_page_free_target) {
1923
1924                 if (is_privileged) {
1925                         if (vm_page_free_wanted_privileged++ == 0)
1926                                 need_wakeup = 1;
1927                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
1928                 } else {
1929                         if (vm_page_free_wanted++ == 0)
1930                                 need_wakeup = 1;
1931                         wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
1932                 }
1933                 mutex_unlock(&vm_page_queue_free_lock);
1934                 counter(c_vm_page_wait_block++);
1935
1936                 if (need_wakeup)
1937                         thread_wakeup((event_t)&vm_page_free_wanted);
1938
1939                 if (wait_result == THREAD_WAITING)
1940                         wait_result = thread_block(THREAD_CONTINUE_NULL);
1941
1942                 return(wait_result == THREAD_AWAKENED);
1943         } else {
1944                 mutex_unlock(&vm_page_queue_free_lock);
1945                 return TRUE;
1946         }
1947 }
1948
1949 /*
1950  *      vm_page_alloc:
1951  *
1952  *      Allocate and return a memory cell associated
1953  *      with this VM object/offset pair.
1954  *
1955  *      Object must be locked.
1956  */
1957
1958 vm_page_t
1959 vm_page_alloc(
1960         vm_object_t             object,
1961         vm_object_offset_t      offset)
1962 {
1963         register vm_page_t      mem;
1964
1965         vm_object_lock_assert_exclusive(object);
1966         mem = vm_page_grab();
1967         if (mem == VM_PAGE_NULL)
1968                 return VM_PAGE_NULL;
1969
1970         vm_page_insert(mem, object, offset);
1971
1972         return(mem);
1973 }
1974
1975 vm_page_t
1976 vm_page_alloclo(
1977         vm_object_t             object,
1978         vm_object_offset_t      offset)
1979 {
1980         register vm_page_t      mem;
1981
1982         vm_object_lock_assert_exclusive(object);
1983         mem = vm_page_grablo();
1984         if (mem == VM_PAGE_NULL)
1985                 return VM_PAGE_NULL;
1986
1987         vm_page_insert(mem, object, offset);
1988
1989         return(mem);
1990 }
1991
1992
1993 /*
1994  *      vm_page_alloc_guard:
1995  *
1996  *      Allocate a ficticious page which will be used
1997  *      as a guard page.  The page will be inserted into
1998  *      the object and returned to the caller.
1999  */
2000
2001 vm_page_t
2002 vm_page_alloc_guard(
2003         vm_object_t             object,
2004         vm_object_offset_t      offset)
2005 {
2006         register vm_page_t      mem;
2007
2008         vm_object_lock_assert_exclusive(object);
2009         mem = vm_page_grab_guard();
2010         if (mem == VM_PAGE_NULL)
2011                 return VM_PAGE_NULL;
2012
2013         vm_page_insert(mem, object, offset);
2014
2015         return(mem);
2016 }
2017
2018
2019 counter(unsigned int c_laundry_pages_freed = 0;)
2020
2021 boolean_t       vm_page_free_verify = TRUE;
2022 /*
2023  *      vm_page_free:
2024  *
2025  *      Returns the given page to the free list,
2026  *      disassociating it with any VM object.
2027  *
2028  *      Object and page queues must be locked prior to entry.
2029  */
2030 void
2031 vm_page_free_prepare(
2032         register vm_page_t      mem)
2033 {
2034         VM_PAGE_CHECK(mem);
2035         assert(!mem->free);
2036         assert(!mem->cleaning);
2037         assert(!mem->pageout);
2038
2039 #if DEBUG
2040         if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2041                 assert(pmap_verify_free(mem->phys_page));
2042         }
2043         if (mem->object)
2044                 vm_object_lock_assert_exclusive(mem->object);
2045         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2046
2047         if (mem->free)
2048                panic("vm_page_free: freeing page on free list\n");
2049 #endif
2050
2051         if (mem->laundry) {
2052                 /*
2053                  * We may have to free a page while it's being laundered
2054                  * if we lost its pager (due to a forced unmount, for example).
2055                  * We need to call vm_pageout_throttle_up() before removing
2056                  * the page from its VM object, so that we can find out on
2057                  * which pageout queue the page is.
2058                  */
2059                 vm_pageout_throttle_up(mem);
2060                 counter(++c_laundry_pages_freed);
2061         }
2062
2063         if (mem->tabled)
2064                 vm_page_remove(mem);    /* clears tabled, object, offset */
2065
2066         VM_PAGE_QUEUES_REMOVE(mem);     /* clears active/inactive/throttled/speculative */
2067
2068         if (mem->wire_count) {
2069                 if (!mem->private && !mem->fictitious)
2070                         vm_page_wire_count--;
2071                 mem->wire_count = 0;
2072                 assert(!mem->gobbled);
2073         } else if (mem->gobbled) {
2074                 if (!mem->private && !mem->fictitious)
2075                         vm_page_wire_count--;
2076                 vm_page_gobble_count--;
2077         }
2078         mem->gobbled = FALSE;
2079
2080         PAGE_WAKEUP(mem);       /* clears wanted */
2081
2082         /* Some of these may be unnecessary */
2083         mem->busy = TRUE;
2084         mem->absent = FALSE;
2085         mem->error = FALSE;
2086         mem->dirty = FALSE;
2087         mem->precious = FALSE;
2088         mem->reference = FALSE;
2089         mem->encrypted = FALSE;
2090         mem->encrypted_cleaning = FALSE;
2091         mem->deactivated = FALSE;
2092         mem->pmapped = FALSE;
2093
2094         if (mem->private) {
2095                 mem->private = FALSE;
2096                 mem->fictitious = TRUE;
2097                 mem->phys_page = vm_page_fictitious_addr;
2098         }
2099         if (!mem->fictitious) {
2100                 if (mem->zero_fill == TRUE) {
2101                         mem->zero_fill = FALSE;
2102                         OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
2103                 }
2104                 vm_page_init(mem, mem->phys_page);
2105         }
2106 }
2107
2108 void
2109 vm_page_free(
2110         vm_page_t       mem)
2111 {
2112         vm_page_free_prepare(mem);
2113         if (mem->fictitious) {
2114                 vm_page_release_fictitious(mem);
2115         } else {
2116                 vm_page_release(mem);
2117         }
2118 }
2119
2120 /*
2121  * Free a list of pages.  The list can be up to several hundred pages,
2122  * as blocked up by vm_pageout_scan().
2123  * The big win is not having to take the page q and free list locks once
2124  * per page.  We sort the incoming pages into n lists, one for
2125  * each color.
2126  *
2127  * The page queues must be locked, and are kept locked.
2128  */
2129 void
2130 vm_page_free_list(
2131         vm_page_t       mem)
2132 {
2133         vm_page_t       nxt;
2134         int             pg_count = 0;
2135         int             color;
2136         int             inuse_list_head = -1;
2137
2138         queue_head_t    free_list[MAX_COLORS];
2139         int             inuse[MAX_COLORS];
2140
2141         for (color = 0; color < (signed) vm_colors; color++) {
2142                 queue_init(&free_list[color]);
2143         }
2144
2145 #if DEBUG
2146         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2147 #endif
2148         while (mem) {
2149 #if DEBUG
2150                 if (mem->tabled || mem->object)
2151                         panic("vm_page_free_list: freeing tabled page\n");
2152                 if (mem->inactive || mem->active || mem->throttled || mem->free)
2153                         panic("vm_page_free_list: freeing page on list\n");
2154                 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2155                         assert(pmap_verify_free(mem->phys_page));
2156                 }
2157 #endif
2158                 assert(mem->pageq.prev == NULL);
2159                 assert(mem->busy);
2160                 assert(!mem->free);
2161                 nxt = (vm_page_t)(mem->pageq.next);
2162
2163                 if (!mem->fictitious) {
2164                         mem->free = TRUE;
2165
2166                         color = mem->phys_page & vm_color_mask;
2167                         if (queue_empty(&free_list[color])) {
2168                                 inuse[color] = inuse_list_head;
2169                                 inuse_list_head = color;
2170                         }
2171                         queue_enter_first(&free_list[color],
2172                                           mem,
2173                                           vm_page_t,
2174                                           pageq);
2175                         pg_count++;
2176                 } else {
2177                         assert(mem->phys_page == vm_page_fictitious_addr ||
2178                                mem->phys_page == vm_page_guard_addr);
2179                         vm_page_release_fictitious(mem);
2180                 }
2181                 mem = nxt;
2182         }
2183         if (pg_count) {
2184                 unsigned int    avail_free_count;
2185
2186                 mutex_lock(&vm_page_queue_free_lock);
2187
2188                 color = inuse_list_head;
2189
2190                 while( color != -1 ) {
2191                         vm_page_t first, last;
2192                         vm_page_t first_free;
2193
2194                         first = (vm_page_t) queue_first(&free_list[color]);
2195                         last = (vm_page_t) queue_last(&free_list[color]);
2196                         first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2197
2198                         if (queue_empty(&vm_page_queue_free[color])) {
2199                                 queue_last(&vm_page_queue_free[color]) =
2200                                         (queue_entry_t) last;
2201                         } else {
2202                                 queue_prev(&first_free->pageq) =
2203                                         (queue_entry_t) last;
2204                         }
2205                         queue_first(&vm_page_queue_free[color]) =
2206                                 (queue_entry_t) first;
2207                         queue_prev(&first->pageq) =
2208                                 (queue_entry_t) &vm_page_queue_free[color];
2209                         queue_next(&last->pageq) =
2210                                 (queue_entry_t) first_free;
2211                         color = inuse[color];
2212                 }
2213
2214                 vm_page_free_count += pg_count;
2215                 avail_free_count = vm_page_free_count;
2216
2217                 while ((vm_page_free_wanted_privileged > 0) && avail_free_count) {
2218                         vm_page_free_wanted_privileged--;
2219                         avail_free_count--;
2220
2221                         thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2222                 }
2223
2224                 if ((vm_page_free_wanted > 0) &&
2225                     (avail_free_count >= vm_page_free_reserved)) {
2226                         unsigned int  available_pages;
2227
2228                         if (avail_free_count >= vm_page_free_reserved) {
2229                                 available_pages = (avail_free_count - vm_page_free_reserved);
2230                         } else {
2231                                 available_pages = 0;
2232                         }
2233
2234                         if (available_pages >= vm_page_free_wanted) {
2235                                 vm_page_free_wanted = 0;
2236                                 thread_wakeup((event_t) &vm_page_free_count);
2237                         } else {
2238                                 while (available_pages--) {
2239                                         vm_page_free_wanted--;
2240                                         thread_wakeup_one((event_t) &vm_page_free_count);
2241                                 }
2242                         }
2243                 }
2244                 mutex_unlock(&vm_page_queue_free_lock);
2245
2246 #if CONFIG_EMBEDDED
2247                 {
2248                 int percent_avail;
2249
2250                 /*
2251                  * Decide if we need to poke the memorystatus notification thread.
2252                  */
2253                 percent_avail =
2254                         (vm_page_active_count + vm_page_inactive_count +
2255                          vm_page_speculative_count + vm_page_free_count +
2256                          vm_page_purgeable_count ) * 100 /
2257                         atop_64(max_mem);
2258                 if (percent_avail >= (kern_memorystatus_level + 5)) {
2259                         kern_memorystatus_level = percent_avail;
2260                         thread_wakeup((event_t)&kern_memorystatus_wakeup);
2261                 }
2262                 }
2263 #endif
2264         }
2265 }
2266
2267
2268 /*
2269  *      vm_page_wire:
2270  *
2271  *      Mark this page as wired down by yet
2272  *      another map, removing it from paging queues
2273  *      as necessary.
2274  *
2275  *      The page's object and the page queues must be locked.
2276  */
2277 void
2278 vm_page_wire(
2279         register vm_page_t      mem)
2280 {
2281
2282 //      dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
2283
2284         VM_PAGE_CHECK(mem);
2285 #if DEBUG
2286         if (mem->object)
2287                 vm_object_lock_assert_exclusive(mem->object);
2288         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2289 #endif
2290         if (mem->wire_count == 0) {
2291                 VM_PAGE_QUEUES_REMOVE(mem);
2292                 if (!mem->private && !mem->fictitious && !mem->gobbled)
2293                         vm_page_wire_count++;
2294                 if (mem->gobbled)
2295                         vm_page_gobble_count--;
2296                 mem->gobbled = FALSE;
2297                 if (mem->zero_fill == TRUE) {
2298                         mem->zero_fill = FALSE;
2299                         OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
2300                 }
2301                 /*
2302                  * ENCRYPTED SWAP:
2303                  * The page could be encrypted, but
2304                  * We don't have to decrypt it here
2305                  * because we don't guarantee that the
2306                  * data is actually valid at this point.
2307                  * The page will get decrypted in
2308                  * vm_fault_wire() if needed.
2309                  */
2310         }
2311         assert(!mem->gobbled);
2312         mem->wire_count++;
2313 }
2314
2315 /*
2316  *      vm_page_gobble:
2317  *
2318  *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2319  *
2320  *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2321  */
2322 void
2323 vm_page_gobble(
2324         register vm_page_t      mem)
2325 {
2326         vm_page_lockspin_queues();
2327         VM_PAGE_CHECK(mem);
2328
2329         assert(!mem->gobbled);
2330         assert(mem->wire_count == 0);
2331
2332         if (!mem->gobbled && mem->wire_count == 0) {
2333                 if (!mem->private && !mem->fictitious)
2334                         vm_page_wire_count++;
2335         }
2336         vm_page_gobble_count++;
2337         mem->gobbled = TRUE;
2338         vm_page_unlock_queues();
2339 }
2340
2341 /*
2342  *      vm_page_unwire:
2343  *
2344  *      Release one wiring of this page, potentially
2345  *      enabling it to be paged again.
2346  *
2347  *      The page's object and the page queues must be locked.
2348  */
2349 void
2350 vm_page_unwire(
2351         register vm_page_t      mem)
2352 {
2353
2354 //      dbgLog(current_thread(), mem->offset, mem->object, 0);  /* (TEST/DEBUG) */
2355
2356         VM_PAGE_CHECK(mem);
2357         assert(mem->wire_count > 0);
2358 #if DEBUG
2359         if (mem->object)
2360                 vm_object_lock_assert_exclusive(mem->object);
2361         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2362 #endif
2363         if (--mem->wire_count == 0) {
2364                 assert(!mem->private && !mem->fictitious);
2365                 vm_page_wire_count--;
2366                 assert(!mem->laundry);
2367                 assert(mem->object != kernel_object);
2368                 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2369                 if (!IP_VALID(memory_manager_default) &&
2370                         mem->dirty && mem->object->internal &&
2371                         (mem->object->purgable == VM_PURGABLE_DENY ||
2372                          mem->object->purgable == VM_PURGABLE_NONVOLATILE)) {
2373                         queue_enter(&vm_page_queue_throttled, mem, vm_page_t, pageq);
2374                         vm_page_throttled_count++;
2375                         mem->throttled = TRUE;
2376                 } else {
2377                         queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
2378                         vm_page_active_count++;
2379                         mem->active = TRUE;
2380                 }
2381                 mem->reference = TRUE;
2382         }
2383 }
2384
2385
2386 /*
2387  *      vm_page_deactivate:
2388  *
2389  *      Returns the given page to the inactive list,
2390  *      indicating that no physical maps have access
2391  *      to this page.  [Used by the physical mapping system.]
2392  *
2393  *      The page queues must be locked.
2394  */
2395 void
2396 vm_page_deactivate(
2397         register vm_page_t      m)
2398 {
2399         boolean_t rapid_age = FALSE;
2400
2401         VM_PAGE_CHECK(m);
2402         assert(m->object != kernel_object);
2403         assert(m->phys_page != vm_page_guard_addr);
2404
2405 //      dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);        /* (TEST/DEBUG) */
2406 #if DEBUG
2407         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2408 #endif
2409         /*
2410          *      This page is no longer very interesting.  If it was
2411          *      interesting (active or inactive/referenced), then we
2412          *      clear the reference bit and (re)enter it in the
2413          *      inactive queue.  Note wired pages should not have
2414          *      their reference bit cleared.
2415          */
2416         if (m->gobbled) {               /* can this happen? */
2417                 assert(m->wire_count == 0);
2418
2419                 if (!m->private && !m->fictitious)
2420                         vm_page_wire_count--;
2421                 vm_page_gobble_count--;
2422                 m->gobbled = FALSE;
2423         }
2424         if (m->private || (m->wire_count != 0))
2425                 return;
2426
2427         if (m->active && m->deactivated == TRUE) {
2428                 if (!pmap_is_referenced(m->phys_page))
2429                         rapid_age = TRUE;
2430         }
2431         if (rapid_age == FALSE && !m->fictitious && !m->absent)
2432                 pmap_clear_reference(m->phys_page);
2433
2434         m->reference = FALSE;
2435         m->deactivated = FALSE;
2436         m->no_cache = FALSE;
2437
2438         if (!m->inactive) {
2439                 VM_PAGE_QUEUES_REMOVE(m);
2440
2441                 assert(!m->laundry);
2442                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2443
2444                 if (!IP_VALID(memory_manager_default) &&
2445                         m->dirty && m->object->internal &&
2446                         (m->object->purgable == VM_PURGABLE_DENY ||
2447                          m->object->purgable == VM_PURGABLE_NONVOLATILE)) {
2448                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2449                         m->throttled = TRUE;
2450                         vm_page_throttled_count++;
2451                 } else {
2452                         if (rapid_age == TRUE ||
2453                             (!m->fictitious && m->object->named && m->object->ref_count == 1)) {
2454                                 vm_page_speculate(m, FALSE);
2455                                 vm_page_speculative_recreated++;
2456                                 return;
2457                         } else {
2458                                 if (m->zero_fill) {
2459                                         queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2460                                         vm_zf_queue_count++;
2461                                 } else {
2462                                         queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2463                                 }
2464                         }
2465                         m->inactive = TRUE;
2466                         if (!m->fictitious) {
2467                                 vm_page_inactive_count++;
2468                                 token_new_pagecount++;
2469                         }
2470                 }
2471         }
2472 }
2473
2474 /*
2475  *      vm_page_activate:
2476  *
2477  *      Put the specified page on the active list (if appropriate).
2478  *
2479  *      The page queues must be locked.
2480  */
2481
2482 void
2483 vm_page_activate(
2484         register vm_page_t      m)
2485 {
2486         VM_PAGE_CHECK(m);
2487 #ifdef  FIXME_4778297
2488         assert(m->object != kernel_object);
2489 #endif
2490         assert(m->phys_page != vm_page_guard_addr);
2491 #if DEBUG
2492         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2493 #endif
2494         if (m->gobbled) {
2495                 assert(m->wire_count == 0);
2496                 if (!m->private && !m->fictitious)
2497                         vm_page_wire_count--;
2498                 vm_page_gobble_count--;
2499                 m->gobbled = FALSE;
2500         }
2501         if (m->private)
2502                 return;
2503
2504 #if DEBUG
2505         if (m->active)
2506                 panic("vm_page_activate: already active");
2507 #endif
2508
2509         if (m->speculative) {
2510                 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2511                 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2512         }
2513
2514         VM_PAGE_QUEUES_REMOVE(m);
2515
2516         if (m->wire_count == 0) {
2517                 assert(!m->laundry);
2518                 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2519                 if (!IP_VALID(memory_manager_default) &&
2520                         !m->fictitious && m->dirty && m->object->internal &&
2521                         (m->object->purgable == VM_PURGABLE_DENY ||
2522                          m->object->purgable == VM_PURGABLE_NONVOLATILE)) {
2523                         queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2524                         m->throttled = TRUE;
2525                         vm_page_throttled_count++;
2526                 } else {
2527                         queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2528                         m->active = TRUE;
2529                         if (!m->fictitious)
2530                                 vm_page_active_count++;
2531                 }
2532                 m->reference = TRUE;
2533                 m->no_cache = FALSE;
2534         }
2535 }
2536
2537
2538 /*
2539  *      vm_page_speculate:
2540  *
2541  *      Put the specified page on the speculative list (if appropriate).
2542  *
2543  *      The page queues must be locked.
2544  */
2545 void
2546 vm_page_speculate(
2547         vm_page_t       m,
2548         boolean_t       new)
2549 {
2550         struct vm_speculative_age_q     *aq;
2551
2552         VM_PAGE_CHECK(m);
2553         assert(m->object != kernel_object);
2554         assert(!m->speculative && !m->active && !m->inactive && !m->throttled);
2555         assert(m->phys_page != vm_page_guard_addr);
2556         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2557 #if DEBUG
2558         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2559 #endif
2560         if (m->wire_count == 0) {
2561                 mach_timespec_t         ts;
2562
2563                 clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
2564
2565                 if (vm_page_speculative_count == 0) {
2566
2567                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2568                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2569
2570                         aq = &vm_page_queue_speculative[speculative_age_index];
2571
2572                         /*
2573                          * set the timer to begin a new group
2574                          */
2575                         aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2576                         aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2577
2578                         ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2579                 } else {
2580                         aq = &vm_page_queue_speculative[speculative_age_index];
2581
2582                         if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2583
2584                                 speculative_age_index++;
2585
2586                                 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2587                                         speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2588                                 if (speculative_age_index == speculative_steal_index) {
2589                                         speculative_steal_index = speculative_age_index + 1;
2590
2591                                         if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2592                                                 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2593                                 }
2594                                 aq = &vm_page_queue_speculative[speculative_age_index];
2595
2596                                 if (!queue_empty(&aq->age_q))
2597                                         vm_page_speculate_ageit(aq);
2598
2599                                 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2600                                 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2601
2602                                 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2603                         }
2604                 }
2605                 enqueue_tail(&aq->age_q, &m->pageq);
2606                 m->speculative = TRUE;
2607                 vm_page_speculative_count++;
2608
2609                 if (new == TRUE) {
2610                         m->object->pages_created++;
2611                         vm_page_speculative_created++;
2612                 }
2613         }
2614 }
2615
2616
2617 /*
2618  * move pages from the specified aging bin to
2619  * the speculative bin that pageout_scan claims from
2620  *
2621  *      The page queues must be locked.
2622  */
2623 void
2624 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2625 {
2626         struct vm_speculative_age_q     *sq;
2627         vm_page_t       t;
2628
2629         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2630
2631         if (queue_empty(&sq->age_q)) {
2632                 sq->age_q.next = aq->age_q.next;
2633                 sq->age_q.prev = aq->age_q.prev;
2634
2635                 t = (vm_page_t)sq->age_q.next;
2636                 t->pageq.prev = &sq->age_q;
2637
2638                 t = (vm_page_t)sq->age_q.prev;
2639                 t->pageq.next = &sq->age_q;
2640         } else {
2641                 t = (vm_page_t)sq->age_q.prev;
2642                 t->pageq.next = aq->age_q.next;
2643
2644                 t = (vm_page_t)aq->age_q.next;
2645                 t->pageq.prev = sq->age_q.prev;
2646
2647                 t = (vm_page_t)aq->age_q.prev;
2648                 t->pageq.next = &sq->age_q;
2649
2650                 sq->age_q.prev = aq->age_q.prev;
2651         }
2652         queue_init(&aq->age_q);
2653 }
2654
2655
2656 void
2657 vm_page_lru(
2658         vm_page_t       m)
2659 {
2660         VM_PAGE_CHECK(m);
2661         assert(m->object != kernel_object);
2662         assert(m->phys_page != vm_page_guard_addr);
2663
2664 #if DEBUG
2665         _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2666 #endif
2667         if (m->active || m->reference)
2668                 return;
2669
2670         if (m->private || (m->wire_count != 0))
2671                 return;
2672
2673         m->no_cache = FALSE;
2674
2675         VM_PAGE_QUEUES_REMOVE(m);
2676
2677         assert(!m->laundry);
2678         assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2679
2680         queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2681         m->inactive = TRUE;
2682
2683         vm_page_inactive_count++;
2684         token_new_pagecount++;
2685 }
2686
2687
2688 /*
2689  *      vm_page_part_zero_fill:
2690  *
2691  *      Zero-fill a part of the page.
2692  */
2693 void
2694 vm_page_part_zero_fill(
2695         vm_page_t       m,
2696         vm_offset_t     m_pa,
2697         vm_size_t       len)
2698 {
2699         vm_page_t       tmp;
2700
2701         VM_PAGE_CHECK(m);
2702 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
2703         pmap_zero_part_page(m->phys_page, m_pa, len);
2704 #else
2705         while (1) {
2706                 tmp = vm_page_grab();
2707                 if (tmp == VM_PAGE_NULL) {
2708                         vm_page_wait(THREAD_UNINT);
2709                         continue;
2710                 }
2711                 break;
2712         }
2713         vm_page_zero_fill(tmp);
2714         if(m_pa != 0) {
2715                 vm_page_part_copy(m, 0, tmp, 0, m_pa);
2716         }
2717         if((m_pa + len) <  PAGE_SIZE) {
2718                 vm_page_part_copy(m, m_pa + len, tmp,
2719                                 m_pa + len, PAGE_SIZE - (m_pa + len));
2720         }
2721         vm_page_copy(tmp,m);
2722         vm_page_lock_queues();
2723         vm_page_free(tmp);
2724         vm_page_unlock_queues();
2725 #endif
2726
2727 }
2728
2729 /*
2730  *      vm_page_zero_fill:
2731  *
2732  *      Zero-fill the specified page.
2733  */
2734 void
2735 vm_page_zero_fill(
2736         vm_page_t       m)
2737 {
2738         XPR(XPR_VM_PAGE,
2739                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
2740                 (integer_t)m->object, (integer_t)m->offset, (integer_t)m, 0,0);
2741
2742         VM_PAGE_CHECK(m);
2743
2744 //      dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
2745         pmap_zero_page(m->phys_page);
2746 }
2747
2748 /*
2749  *      vm_page_part_copy:
2750  *
2751  *      copy part of one page to another
2752  */
2753
2754 void
2755 vm_page_part_copy(
2756         vm_page_t       src_m,
2757         vm_offset_t     src_pa,
2758         vm_page_t       dst_m,
2759         vm_offset_t     dst_pa,
2760         vm_size_t       len)
2761 {
2762         VM_PAGE_CHECK(src_m);
2763         VM_PAGE_CHECK(dst_m);
2764
2765         pmap_copy_part_page(src_m->phys_page, src_pa,
2766                         dst_m->phys_page, dst_pa, len);
2767 }
2768
2769 /*
2770  *      vm_page_copy:
2771  *
2772  *      Copy one page to another
2773  *
2774  * ENCRYPTED SWAP:
2775  * The source page should not be encrypted.  The caller should
2776  * make sure the page is decrypted first, if necessary.
2777  */
2778
2779 int vm_page_copy_cs_validations = 0;
2780 int vm_page_copy_cs_tainted = 0;
2781
2782 void
2783 vm_page_copy(
2784         vm_page_t       src_m,
2785         vm_page_t       dest_m)
2786 {
2787         XPR(XPR_VM_PAGE,
2788         "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
2789         (integer_t)src_m->object, src_m->offset,
2790         (integer_t)dest_m->object, dest_m->offset,
2791         0);
2792
2793         VM_PAGE_CHECK(src_m);
2794         VM_PAGE_CHECK(dest_m);
2795
2796         /*
2797          * ENCRYPTED SWAP:
2798          * The source page should not be encrypted at this point.
2799          * The destination page will therefore not contain encrypted
2800          * data after the copy.
2801          */
2802         if (src_m->encrypted) {
2803                 panic("vm_page_copy: source page %p is encrypted\n", src_m);
2804         }
2805         dest_m->encrypted = FALSE;
2806
2807         if (src_m->object != VM_OBJECT_NULL &&
2808             src_m->object->code_signed &&
2809             !src_m->cs_validated) {
2810                 /*
2811                  * We're copying a not-yet-validated page from a
2812                  * code-signed object.
2813                  * Whoever ends up mapping the copy page might care about
2814                  * the original page's integrity, so let's validate the
2815                  * source page now.
2816                  */
2817                 vm_page_copy_cs_validations++;
2818                 vm_page_validate_cs(src_m);
2819         }
2820         /*
2821          * Propagate the code-signing bits to the copy page.
2822          */
2823         dest_m->cs_validated = src_m->cs_validated;
2824         dest_m->cs_tainted = src_m->cs_tainted;
2825         if (dest_m->cs_tainted) {
2826                 assert(dest_m->cs_validated);
2827                 vm_page_copy_cs_tainted++;
2828         }
2829
2830         pmap_copy_page(src_m->phys_page, dest_m->phys_page);
2831 }
2832
2833 #if MACH_ASSERT
2834 /*
2835  *      Check that the list of pages is ordered by
2836  *      ascending physical address and has no holes.
2837  */
2838 static int
2839 vm_page_verify_contiguous(
2840         vm_page_t       pages,
2841         unsigned int    npages)
2842 {
2843         register vm_page_t      m;
2844         unsigned int            page_count;
2845         vm_offset_t             prev_addr;
2846
2847         prev_addr = pages->phys_page;
2848         page_count = 1;
2849         for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
2850                 if (m->phys_page != prev_addr + 1) {
2851                         printf("m %p prev_addr 0x%x, current addr 0x%x\n",
2852                                m, prev_addr, m->phys_page);
2853                         printf("pages %p page_count %d\n", pages, page_count);
2854                         panic("vm_page_verify_contiguous:  not contiguous!");
2855                 }
2856                 prev_addr = m->phys_page;
2857                 ++page_count;
2858         }
2859         if (page_count != npages) {
2860                 printf("pages %p actual count 0x%x but requested 0x%x\n",
2861                        pages, page_count, npages);
2862                 panic("vm_page_verify_contiguous:  count error");
2863         }
2864         return 1;
2865 }
2866 #endif  /* MACH_ASSERT */
2867
2868
2869 #if MACH_ASSERT
2870 /*
2871  *      Check the free lists for proper length etc.
2872  */
2873 static void
2874 vm_page_verify_free_lists( void )
2875 {
2876         unsigned int    color, npages;
2877         vm_page_t       m;
2878         vm_page_t       prev_m;
2879
2880         npages = 0;
2881
2882         mutex_lock(&vm_page_queue_free_lock);
2883
2884         for( color = 0; color < vm_colors; color++ ) {
2885                 prev_m = (vm_page_t) &vm_page_queue_free[color];
2886                 queue_iterate(&vm_page_queue_free[color],
2887                               m,
2888                               vm_page_t,
2889                               pageq) {
2890                         if ((vm_page_t) m->pageq.prev != prev_m)
2891                                 panic("vm_page_verify_free_lists: corrupted prev ptr");
2892                         if ( ! m->free )
2893                                 panic("vm_page_verify_free_lists: not free");
2894                         if ( ! m->busy )
2895                                 panic("vm_page_verify_free_lists: not busy");
2896                         if ( (m->phys_page & vm_color_mask) != color)
2897                                 panic("vm_page_verify_free_lists: wrong color");
2898                         ++npages;
2899                         prev_m = m;
2900                 }
2901         }
2902         if (npages != vm_page_free_count)
2903                 panic("vm_page_verify_free_lists:  npages %u free_count %d",
2904                       npages, vm_page_free_count);
2905
2906         mutex_unlock(&vm_page_queue_free_lock);
2907 }
2908 #endif  /* MACH_ASSERT */
2909
2910
2911
2912 /*
2913  *      CONTIGUOUS PAGE ALLOCATION
2914  *      Additional levels of effort:
2915  *              + consider pages that are currently 'pmapped'
2916  *                  this could be expensive since we'd have
2917  *                  to ask the pmap layer about there state
2918  *              + consider dirty pages
2919  *                  either clean them or
2920  *                  copy them to other locations...
2921  *
2922  *      Find a region large enough to contain at least n pages
2923  *      of contiguous physical memory.
2924  *
2925  *      This is done by traversing the vm_page_t array in a linear fashion
2926  *      we assume that the vm_page_t array has the avaiable physical pages in an
2927  *      ordered, ascending list... this is currently true of all our implementations
2928  *      and must remain so... there can be 'holes' in the array...  we also can
2929  *      no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
2930  *      which use to happen via 'vm_page_convert'... that function was no longer
2931  *      being called and was removed...
2932  *
2933  *      The basic flow consists of stabilizing some of the interesting state of
2934  *      a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
2935  *      sweep at the beginning of the array looking for pages that meet our criterea
2936  *      for a 'stealable' page... currently we are pretty conservative... if the page
2937  *      meets this criterea and is physically contiguous to the previous page in the 'run'
2938  *      we keep developing it.  If we hit a page that doesn't fit, we reset our state
2939  *      and start to develop a new run... if at this point we've already considered
2940  *      at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
2941  *      and mutex_pause (which will yield the processor), to keep the latency low w/r
2942  *      to other threads trying to acquire free pages (or move pages from q to q),
2943  *      and then continue from the spot we left off... we only make 1 pass through the
2944  *      array.  Once we have a 'run' that is long enough, we'll go into the loop which
2945  *      which steals the pages from the queues they're currently on... pages on the free
2946  *      queue can be stolen directly... pages that are on any of the other queues
2947  *      must be removed from the object they are tabled on... this requires taking the
2948  *      object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
2949  *      or if the state of the page behind the vm_object lock is no longer viable, we'll
2950  *      dump the pages we've currently stolen back to the free list, and pick up our
2951  *      scan from the point where we aborted the 'current' run.
2952  *
2953  *
2954  *      Requirements:
2955  *              - neither vm_page_queue nor vm_free_list lock can be held on entry
2956  *
2957  *      Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
2958  *
2959  * Algorithm:
2960  */
2961
2962 #define MAX_CONSIDERED_BEFORE_YIELD     1000
2963
2964
2965 #define RESET_STATE_OF_RUN()    \
2966         MACRO_BEGIN             \
2967         prevcontaddr = -2;      \
2968         free_considered = 0;    \
2969         substitute_needed = 0;  \
2970         npages = 0;             \
2971         MACRO_END
2972
2973
2974 static vm_page_t
2975 vm_page_find_contiguous(
2976         unsigned int    contig_pages,
2977         ppnum_t         max_pnum,
2978         boolean_t       wire)
2979 {
2980         vm_page_t       m = NULL;
2981         ppnum_t         prevcontaddr;
2982         unsigned int    npages, considered;
2983         unsigned int    page_idx, start_idx;
2984         int             free_considered, free_available;
2985         int             substitute_needed;
2986 #if MACH_ASSERT
2987         uint32_t        tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
2988         int             yielded = 0;
2989         int             dumped_run = 0;
2990         int             stolen_pages = 0;
2991 #endif
2992
2993         if (contig_pages == 0)
2994                 return VM_PAGE_NULL;
2995
2996 #if MACH_ASSERT
2997         vm_page_verify_free_lists();
2998
2999         clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3000 #endif
3001         vm_page_lock_queues();
3002         mutex_lock(&vm_page_queue_free_lock);
3003
3004         RESET_STATE_OF_RUN();
3005
3006         considered = 0;
3007         free_available = vm_page_free_count - vm_page_free_reserved;
3008
3009         for (page_idx = 0, start_idx = 0;
3010              npages < contig_pages && page_idx < vm_pages_count;
3011              page_idx++) {
3012 retry:
3013                 m = &vm_pages[page_idx];
3014
3015                 if (max_pnum && m->phys_page > max_pnum) {
3016                         /* no more low pages... */
3017                         break;
3018                 }
3019                 if (m->phys_page <= vm_lopage_poolend &&
3020                     m->phys_page >= vm_lopage_poolstart) {
3021                         /*
3022                          * don't want to take pages from our
3023                          * reserved pool of low memory
3024                          * so don't consider it which
3025                          * means starting a new run
3026                          */
3027                         RESET_STATE_OF_RUN();
3028
3029                 } else if (m->wire_count || m->gobbled ||
3030                            m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3031                            m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3032                            m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending) {
3033                         /*
3034                          * page is in a transient state
3035                          * or a state we don't want to deal
3036                          * with, so don't consider it which
3037                          * means starting a new run
3038                          */
3039                         RESET_STATE_OF_RUN();
3040
3041                 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3042                         /*
3043                          * page needs to be on one of our queues
3044                          * in order for it to be stable behind the
3045                          * locks we hold at this point...
3046                          * if not, don't consider it which
3047                          * means starting a new run
3048                          */
3049                         RESET_STATE_OF_RUN();
3050
3051                 } else if (!m->free && (!m->tabled || m->busy)) {
3052                         /*
3053                          * pages on the free list are always 'busy'
3054                          * so we couldn't test for 'busy' in the check
3055                          * for the transient states... pages that are
3056                          * 'free' are never 'tabled', so we also couldn't
3057                          * test for 'tabled'.  So we check here to make
3058                          * sure that a non-free page is not busy and is
3059                          * tabled on an object...
3060                          * if not, don't consider it which
3061                          * means starting a new run
3062                          */
3063                         RESET_STATE_OF_RUN();
3064
3065                 } else {
3066                         if (m->phys_page != prevcontaddr + 1) {
3067                                 npages = 1;
3068                                 start_idx = page_idx;
3069                         } else {
3070                                 npages++;
3071                         }
3072                         prevcontaddr = m->phys_page;
3073
3074                         if (m->pmapped || m->dirty)
3075                                 substitute_needed++;
3076
3077                         if (m->free) {
3078                                 free_considered++;
3079                         }
3080                         if ((free_considered + substitute_needed) > free_available) {
3081                                 /*
3082                                  * if we let this run continue
3083                                  * we will end up dropping the vm_page_free_count
3084                                  * below the reserve limit... we need to abort
3085                                  * this run, but we can at least re-consider this
3086                                  * page... thus the jump back to 'retry'
3087                                  */
3088                                 RESET_STATE_OF_RUN();
3089
3090                                 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3091                                         considered++;
3092                                         goto retry;
3093                                 }
3094                                 /*
3095                                  * free_available == 0
3096                                  * so can't consider any free pages... if
3097                                  * we went to retry in this case, we'd
3098                                  * get stuck looking at the same page
3099                                  * w/o making any forward progress
3100                                  * we also want to take this path if we've already
3101                                  * reached our limit that controls the lock latency
3102                                  */
3103                         }
3104                 }
3105                 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3106
3107                         mutex_unlock(&vm_page_queue_free_lock);
3108                         vm_page_unlock_queues();
3109
3110                         mutex_pause(0);
3111
3112                         vm_page_lock_queues();
3113                         mutex_lock(&vm_page_queue_free_lock);
3114
3115                         RESET_STATE_OF_RUN();
3116                         /*
3117                          * reset our free page limit since we
3118                          * dropped the lock protecting the vm_page_free_queue
3119                          */
3120                         free_available = vm_page_free_count - vm_page_free_reserved;
3121                         considered = 0;
3122 #if MACH_ASSERT
3123                         yielded++;
3124 #endif
3125                         goto retry;
3126                 }
3127                 considered++;
3128         }
3129         m = VM_PAGE_NULL;
3130
3131         if (npages != contig_pages)
3132                 mutex_unlock(&vm_page_queue_free_lock);
3133         else {
3134                 vm_page_t       m1;
3135                 vm_page_t       m2;
3136                 unsigned int    cur_idx;
3137                 unsigned int    tmp_start_idx;
3138                 vm_object_t     locked_object = VM_OBJECT_NULL;
3139                 boolean_t       abort_run = FALSE;
3140
3141                 tmp_start_idx = start_idx;
3142
3143                 /*
3144                  * first pass through to pull the free pages
3145                  * off of the free queue so that in case we
3146                  * need substitute pages, we won't grab any
3147                  * of the free pages in the run... we'll clear
3148                  * the 'free' bit in the 2nd pass, and even in
3149                  * an abort_run case, we'll collect all of the
3150                  * free pages in this run and return them to the free list
3151                  */
3152                 while (start_idx < page_idx) {
3153
3154                         m1 = &vm_pages[start_idx++];
3155
3156                         if (m1->free) {
3157                                 unsigned int color;
3158
3159                                 color = m1->phys_page & vm_color_mask;
3160                                 queue_remove(&vm_page_queue_free[color],
3161                                              m1,
3162                                              vm_page_t,
3163                                              pageq);
3164
3165                                 vm_page_free_count--;
3166                         }
3167                 }
3168                 /*
3169                  * adjust global freelist counts
3170                  */
3171                 if (vm_page_free_count < vm_page_free_count_minimum)
3172                         vm_page_free_count_minimum = vm_page_free_count;
3173
3174                 /*
3175                  * we can drop the free queue lock at this point since
3176                  * we've pulled any 'free' candidates off of the list
3177                  * we need it dropped so that we can do a vm_page_grab
3178                  * when substituing for pmapped/dirty pages
3179                  */
3180                 mutex_unlock(&vm_page_queue_free_lock);
3181
3182                 start_idx = tmp_start_idx;
3183                 cur_idx = page_idx - 1;
3184
3185                 while (start_idx++ < page_idx) {
3186                         /*
3187                          * must go through the list from back to front
3188                          * so that the page list is created in the
3189                          * correct order - low -> high phys addresses
3190                          */
3191                         m1 = &vm_pages[cur_idx--];
3192
3193                         if (m1->free) {
3194                                 /*
3195                                  * pages have already been removed from
3196                                  * the free list in the 1st pass
3197                                  */
3198                                 assert(m1->free);
3199                                 assert(m1->busy);
3200                                 assert(!m1->wanted);
3201                                 assert(!m1->laundry);
3202                                 m1->free = FALSE;
3203
3204                         } else {
3205                                 vm_object_t object;
3206
3207                                 if (abort_run == TRUE)
3208                                         continue;
3209
3210                                 object = m1->object;
3211
3212                                 if (object != locked_object) {
3213                                         if (locked_object) {
3214                                                 vm_object_unlock(locked_object);
3215                                                 locked_object = VM_OBJECT_NULL;
3216                                         }
3217                                         if (vm_object_lock_try(object))
3218                                                 locked_object = object;
3219                                 }
3220                                 if (locked_object == VM_OBJECT_NULL ||
3221                                     (m1->wire_count || m1->gobbled ||
3222                                      m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3223                                      m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3224                                      m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3225
3226                                         if (locked_object) {
3227                                                 vm_object_unlock(locked_object);
3228                                                 locked_object = VM_OBJECT_NULL;
3229                                         }
3230                                         tmp_start_idx = cur_idx;
3231                                         abort_run = TRUE;
3232                                         continue;
3233                                 }
3234                                 if (m1->pmapped || m1->dirty) {
3235                                         int refmod;
3236                                         vm_object_offset_t offset;
3237
3238                                         m2 = vm_page_grab();
3239
3240                                         if (m2 == VM_PAGE_NULL) {
3241                                                 if (locked_object) {
3242                                                         vm_object_unlock(locked_object);
3243                                                         locked_object = VM_OBJECT_NULL;
3244                                                 }
3245                                                 tmp_start_idx = cur_idx;
3246                                                 abort_run = TRUE;
3247                                                 continue;
3248                                         }
3249                                         if (m1->pmapped)
3250                                                 refmod = pmap_disconnect(m1->phys_page);
3251                                         else
3252                                                 refmod = 0;
3253                                         vm_page_copy(m1, m2);
3254
3255                                         m2->reference = m1->reference;
3256                                         m2->dirty     = m1->dirty;
3257
3258                                         if (refmod & VM_MEM_REFERENCED)
3259                                                 m2->reference = TRUE;
3260                                         if (refmod & VM_MEM_MODIFIED)
3261                                                 m2->dirty = TRUE;
3262                                         offset = m1->offset;
3263
3264                                         /*
3265                                          * completely cleans up the state
3266                                          * of the page so that it is ready
3267                                          * to be put onto the free list, or
3268                                          * for this purpose it looks like it
3269                                          * just came off of the free list
3270                                          */
3271                                         vm_page_free_prepare(m1);
3272
3273                                         /*
3274                                          * make sure we clear the ref/mod state
3275                                          * from the pmap layer... else we risk
3276                                          * inheriting state from the last time
3277                                          * this page was used...
3278                                          */
3279                                         pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3280                                         /*
3281                                          * now put the substitute page on the object
3282                                          */
3283                                         vm_page_insert_internal(m2, locked_object, offset, TRUE);
3284
3285                                         if (m2->reference)
3286                                                 vm_page_activate(m2);
3287                                         else
3288                                                 vm_page_deactivate(m2);
3289
3290                                         PAGE_WAKEUP_DONE(m2);
3291
3292                                 } else {
3293                                         /*
3294                                          * completely cleans up the state
3295                                          * of the page so that it is ready
3296                                          * to be put onto the free list, or
3297                                          * for this purpose it looks like it
3298                                          * just came off of the free list
3299                                          */
3300                                         vm_page_free_prepare(m1);
3301                                 }
3302 #if MACH_ASSERT
3303                                 stolen_pages++;
3304 #endif
3305                         }
3306                         m1->pageq.next = (queue_entry_t) m;
3307                         m1->pageq.prev = NULL;
3308                         m = m1;
3309                 }
3310                 if (locked_object) {
3311                         vm_object_unlock(locked_object);
3312                         locked_object = VM_OBJECT_NULL;
3313                 }
3314
3315                 if (abort_run == TRUE) {
3316                         if (m != VM_PAGE_NULL) {
3317                                 vm_page_free_list(m);
3318                         }
3319 #if MACH_ASSERT
3320                         dumped_run++;
3321 #endif
3322                         /*
3323                          * want the index of the last
3324                          * page in this run that was
3325                          * successfully 'stolen', so back
3326                          * it up 1 for the auto-decrement on use
3327                          * and 1 more to bump back over this page
3328                          */
3329                         page_idx = tmp_start_idx + 2;
3330
3331                         if (page_idx >= vm_pages_count)
3332                                 goto done_scanning;
3333
3334                         mutex_lock(&vm_page_queue_free_lock);
3335
3336                         RESET_STATE_OF_RUN();
3337
3338                         /*
3339                          * reset our free page limit since we
3340                          * dropped the lock protecting the vm_page_free_queue
3341                          */
3342                         free_available = vm_page_free_count - vm_page_free_reserved;
3343
3344                         goto retry;
3345                 }
3346
3347                 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
3348
3349                         if (wire == TRUE)
3350                                 m1->wire_count++;
3351                         else
3352                                 m1->gobbled = TRUE;
3353                 }
3354                 if (wire == FALSE)
3355                         vm_page_gobble_count += npages;
3356
3357                 /*
3358                  * gobbled pages are also counted as wired pages
3359                  */
3360                 vm_page_wire_count += npages;
3361
3362                 assert(vm_page_verify_contiguous(m, npages));
3363         }
3364 done_scanning:
3365         vm_page_unlock_queues();
3366
3367 #if MACH_ASSERT
3368         clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
3369
3370         tv_end_sec -= tv_start_sec;
3371         if (tv_end_usec < tv_start_usec) {
3372                 tv_end_sec--;
3373                 tv_end_usec += 1000000;
3374         }
3375         tv_end_usec -= tv_start_usec;
3376         if (tv_end_usec >= 1000000) {
3377                 tv_end_sec++;
3378                 tv_end_sec -= 1000000;
3379         }
3380         printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
3381                contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
3382
3383         vm_page_verify_free_lists();
3384 #endif
3385         return m;
3386 }
3387
3388 /*
3389  *      Allocate a list of contiguous, wired pages.
3390  */
3391 kern_return_t
3392 cpm_allocate(
3393         vm_size_t       size,
3394         vm_page_t       *list,
3395         ppnum_t         max_pnum,
3396         boolean_t       wire)
3397 {
3398         vm_page_t               pages;
3399         unsigned int            npages;
3400
3401         if (size % page_size != 0)
3402                 return KERN_INVALID_ARGUMENT;
3403
3404         npages = size / page_size;
3405
3406         /*
3407          *      Obtain a pointer to a subset of the free
3408          *      list large enough to satisfy the request;
3409          *      the region will be physically contiguous.
3410          */
3411         pages = vm_page_find_contiguous(npages, max_pnum, wire);
3412
3413         if (pages == VM_PAGE_NULL)
3414                 return KERN_NO_SPACE;
3415         /*
3416          * determine need for wakeups
3417          */
3418         if ((vm_page_free_count < vm_page_free_min) ||
3419             ((vm_page_free_count < vm_page_free_target) &&
3420              ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
3421                 thread_wakeup((event_t) &vm_page_free_wanted);
3422
3423 #if CONFIG_EMBEDDED
3424         {
3425         int                     percent_avail;
3426
3427         /*
3428          * Decide if we need to poke the memorystatus notification thread.
3429          */
3430         percent_avail =
3431                 (vm_page_active_count + vm_page_inactive_count +
3432                  vm_page_speculative_count + vm_page_free_count +
3433                  vm_page_purgeable_count ) * 100 /
3434                 atop_64(max_mem);
3435         if (percent_avail <= (kern_memorystatus_level - 5)) {
3436                 kern_memorystatus_level = percent_avail;
3437                 thread_wakeup((event_t)&kern_memorystatus_wakeup);
3438         }
3439         }
3440 #endif
3441         /*
3442          *      The CPM pages should now be available and
3443          *      ordered by ascending physical address.
3444          */
3445         assert(vm_page_verify_contiguous(pages, npages));
3446
3447         *list = pages;
3448         return KERN_SUCCESS;
3449 }
3450
3451
3452 #include <mach_vm_debug.h>
3453 #if     MACH_VM_DEBUG
3454
3455 #include <mach_debug/hash_info.h>
3456 #include <vm/vm_debug.h>
3457
3458 /*
3459  *      Routine:        vm_page_info
3460  *      Purpose:
3461  *              Return information about the global VP table.
3462  *              Fills the buffer with as much information as possible
3463  *              and returns the desired size of the buffer.
3464  *      Conditions:
3465  *              Nothing locked.  The caller should provide
3466  *              possibly-pageable memory.
3467  */
3468
3469 unsigned int
3470 vm_page_info(
3471         hash_info_bucket_t *info,
3472         unsigned int count)
3473 {
3474         unsigned int i;
3475
3476         if (vm_page_bucket_count < count)
3477                 count = vm_page_bucket_count;
3478
3479         for (i = 0; i < count; i++) {
3480                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
3481                 unsigned int bucket_count = 0;
3482                 vm_page_t m;
3483
3484                 simple_lock(&vm_page_bucket_lock);
3485                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
3486                         bucket_count++;
3487                 simple_unlock(&vm_page_bucket_lock);
3488
3489                 /* don't touch pageable memory while holding locks */
3490                 info[i].hib_count = bucket_count;
3491         }
3492
3493         return vm_page_bucket_count;
3494 }
3495 #endif  /* MACH_VM_DEBUG */
3496
3497 #include <mach_kdb.h>
3498 #if     MACH_KDB
3499
3500 #include <ddb/db_output.h>
3501 #include <vm/vm_print.h>
3502 #define printf  kdbprintf
3503
3504 /*
3505  *      Routine:        vm_page_print [exported]
3506  */
3507 void
3508 vm_page_print(
3509         db_addr_t       db_addr)
3510 {
3511         vm_page_t       p;
3512
3513         p = (vm_page_t) (long) db_addr;
3514
3515         iprintf("page 0x%x\n", p);
3516
3517         db_indent += 2;
3518
3519         iprintf("object=0x%x", p->object);
3520         printf(", offset=0x%x", p->offset);
3521         printf(", wire_count=%d", p->wire_count);
3522
3523         iprintf("%sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
3524                 (p->inactive ? "" : "!"),
3525                 (p->active ? "" : "!"),
3526                 (p->throttled ? "" : "!"),
3527                 (p->gobbled ? "" : "!"),
3528                 (p->laundry ? "" : "!"),
3529                 (p->free ? "" : "!"),
3530                 (p->reference ? "" : "!"),
3531                 (p->encrypted ? "" : "!"));
3532         iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
3533                 (p->busy ? "" : "!"),
3534                 (p->wanted ? "" : "!"),
3535                 (p->tabled ? "" : "!"),
3536                 (p->fictitious ? "" : "!"),
3537                 (p->private ? "" : "!"),
3538                 (p->precious ? "" : "!"));
3539         iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
3540                 (p->absent ? "" : "!"),
3541                 (p->error ? "" : "!"),
3542                 (p->dirty ? "" : "!"),
3543                 (p->cleaning ? "" : "!"),
3544                 (p->pageout ? "" : "!"),
3545                 (p->clustered ? "" : "!"));
3546         iprintf("%soverwriting, %srestart, %sunusual\n",
3547                 (p->overwriting ? "" : "!"),
3548                 (p->restart ? "" : "!"),
3549                 (p->unusual ? "" : "!"));
3550
3551         iprintf("phys_page=0x%x", p->phys_page);
3552
3553         db_indent -= 2;
3554 }
3555 #endif  /* MACH_KDB */