]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/vm/vm_resident.c
xnu-1228.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65#include <debug.h>
66#include <libkern/OSAtomic.h>
67
68#include <mach/clock_types.h>
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
71#include <mach/sdt.h>
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
76#include <kern/zalloc.h>
77#include <kern/xpr.h>
78#include <vm/pmap.h>
79#include <vm/vm_init.h>
80#include <vm/vm_map.h>
81#include <vm/vm_page.h>
82#include <vm/vm_pageout.h>
83#include <vm/vm_kern.h> /* kernel_memory_allocate() */
84#include <kern/misc_protos.h>
85#include <zone_debug.h>
86#include <vm/cpm.h>
87#include <ppc/mappings.h> /* (BRINGUP) */
88#include <pexpert/pexpert.h> /* (BRINGUP) */
89
90#include <vm/vm_protos.h>
91#include <vm/memory_object.h>
92#include <vm/vm_purgeable_internal.h>
93
94#if CONFIG_EMBEDDED
95#include <sys/kern_memorystatus.h>
96#endif
97
98int speculative_age_index = 0;
99int speculative_steal_index = 0;
100
101struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
102
103static void vm_page_insert_internal(vm_page_t, vm_object_t, vm_object_offset_t, boolean_t);
104
105
106/*
107 * Associated with page of user-allocatable memory is a
108 * page structure.
109 */
110
111/*
112 * These variables record the values returned by vm_page_bootstrap,
113 * for debugging purposes. The implementation of pmap_steal_memory
114 * and pmap_startup here also uses them internally.
115 */
116
117vm_offset_t virtual_space_start;
118vm_offset_t virtual_space_end;
119int vm_page_pages;
120
121/*
122 * The vm_page_lookup() routine, which provides for fast
123 * (virtual memory object, offset) to page lookup, employs
124 * the following hash table. The vm_page_{insert,remove}
125 * routines install and remove associations in the table.
126 * [This table is often called the virtual-to-physical,
127 * or VP, table.]
128 */
129typedef struct {
130 vm_page_t pages;
131#if MACH_PAGE_HASH_STATS
132 int cur_count; /* current count */
133 int hi_count; /* high water mark */
134#endif /* MACH_PAGE_HASH_STATS */
135} vm_page_bucket_t;
136
137vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
138unsigned int vm_page_bucket_count = 0; /* How big is array? */
139unsigned int vm_page_hash_mask; /* Mask for hash function */
140unsigned int vm_page_hash_shift; /* Shift for hash function */
141uint32_t vm_page_bucket_hash; /* Basic bucket hash */
142decl_simple_lock_data(,vm_page_bucket_lock)
143
144
145#if MACH_PAGE_HASH_STATS
146/* This routine is only for debug. It is intended to be called by
147 * hand by a developer using a kernel debugger. This routine prints
148 * out vm_page_hash table statistics to the kernel debug console.
149 */
150void
151hash_debug(void)
152{
153 int i;
154 int numbuckets = 0;
155 int highsum = 0;
156 int maxdepth = 0;
157
158 for (i = 0; i < vm_page_bucket_count; i++) {
159 if (vm_page_buckets[i].hi_count) {
160 numbuckets++;
161 highsum += vm_page_buckets[i].hi_count;
162 if (vm_page_buckets[i].hi_count > maxdepth)
163 maxdepth = vm_page_buckets[i].hi_count;
164 }
165 }
166 printf("Total number of buckets: %d\n", vm_page_bucket_count);
167 printf("Number used buckets: %d = %d%%\n",
168 numbuckets, 100*numbuckets/vm_page_bucket_count);
169 printf("Number unused buckets: %d = %d%%\n",
170 vm_page_bucket_count - numbuckets,
171 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
172 printf("Sum of bucket max depth: %d\n", highsum);
173 printf("Average bucket depth: %d.%2d\n",
174 highsum/vm_page_bucket_count,
175 highsum%vm_page_bucket_count);
176 printf("Maximum bucket depth: %d\n", maxdepth);
177}
178#endif /* MACH_PAGE_HASH_STATS */
179
180/*
181 * The virtual page size is currently implemented as a runtime
182 * variable, but is constant once initialized using vm_set_page_size.
183 * This initialization must be done in the machine-dependent
184 * bootstrap sequence, before calling other machine-independent
185 * initializations.
186 *
187 * All references to the virtual page size outside this
188 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
189 * constants.
190 */
191vm_size_t page_size = PAGE_SIZE;
192vm_size_t page_mask = PAGE_MASK;
193int page_shift = PAGE_SHIFT;
194
195/*
196 * Resident page structures are initialized from
197 * a template (see vm_page_alloc).
198 *
199 * When adding a new field to the virtual memory
200 * object structure, be sure to add initialization
201 * (see vm_page_bootstrap).
202 */
203struct vm_page vm_page_template;
204
205vm_page_t vm_pages = VM_PAGE_NULL;
206unsigned int vm_pages_count = 0;
207
208/*
209 * Resident pages that represent real memory
210 * are allocated from a set of free lists,
211 * one per color.
212 */
213unsigned int vm_colors;
214unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
215unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
216queue_head_t vm_page_queue_free[MAX_COLORS];
217vm_page_t vm_page_queue_fictitious;
218unsigned int vm_page_free_wanted;
219unsigned int vm_page_free_wanted_privileged;
220unsigned int vm_page_free_count;
221unsigned int vm_page_fictitious_count;
222
223unsigned int vm_page_free_count_minimum; /* debugging */
224
225/*
226 * Occasionally, the virtual memory system uses
227 * resident page structures that do not refer to
228 * real pages, for example to leave a page with
229 * important state information in the VP table.
230 *
231 * These page structures are allocated the way
232 * most other kernel structures are.
233 */
234zone_t vm_page_zone;
235decl_mutex_data(,vm_page_alloc_lock)
236unsigned int io_throttle_zero_fill;
237
238/*
239 * Fictitious pages don't have a physical address,
240 * but we must initialize phys_page to something.
241 * For debugging, this should be a strange value
242 * that the pmap module can recognize in assertions.
243 */
244vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
245
246/*
247 * Guard pages are not accessible so they don't
248 * need a physical address, but we need to enter
249 * one in the pmap.
250 * Let's make it recognizable and make sure that
251 * we don't use a real physical page with that
252 * physical address.
253 */
254vm_offset_t vm_page_guard_addr = (vm_offset_t) -2;
255
256/*
257 * Resident page structures are also chained on
258 * queues that are used by the page replacement
259 * system (pageout daemon). These queues are
260 * defined here, but are shared by the pageout
261 * module. The inactive queue is broken into
262 * inactive and zf for convenience as the
263 * pageout daemon often assignes a higher
264 * affinity to zf pages
265 */
266queue_head_t vm_page_queue_active;
267queue_head_t vm_page_queue_inactive;
268queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
269
270unsigned int vm_page_active_count;
271unsigned int vm_page_inactive_count;
272unsigned int vm_page_throttled_count;
273unsigned int vm_page_speculative_count;
274unsigned int vm_page_wire_count;
275unsigned int vm_page_gobble_count = 0;
276unsigned int vm_page_wire_count_warning = 0;
277unsigned int vm_page_gobble_count_warning = 0;
278
279unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
280uint64_t vm_page_purged_count = 0; /* total count of purged pages */
281
282unsigned int vm_page_speculative_recreated = 0;
283unsigned int vm_page_speculative_created = 0;
284unsigned int vm_page_speculative_used = 0;
285
286ppnum_t vm_lopage_poolstart = 0;
287ppnum_t vm_lopage_poolend = 0;
288int vm_lopage_poolsize = 0;
289uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
290
291
292/*
293 * Several page replacement parameters are also
294 * shared with this module, so that page allocation
295 * (done here in vm_page_alloc) can trigger the
296 * pageout daemon.
297 */
298unsigned int vm_page_free_target = 0;
299unsigned int vm_page_free_min = 0;
300unsigned int vm_page_inactive_target = 0;
301unsigned int vm_page_inactive_min = 0;
302unsigned int vm_page_free_reserved = 0;
303unsigned int vm_page_zfill_throttle_count = 0;
304
305/*
306 * The VM system has a couple of heuristics for deciding
307 * that pages are "uninteresting" and should be placed
308 * on the inactive queue as likely candidates for replacement.
309 * These variables let the heuristics be controlled at run-time
310 * to make experimentation easier.
311 */
312
313boolean_t vm_page_deactivate_hint = TRUE;
314
315/*
316 * vm_set_page_size:
317 *
318 * Sets the page size, perhaps based upon the memory
319 * size. Must be called before any use of page-size
320 * dependent functions.
321 *
322 * Sets page_shift and page_mask from page_size.
323 */
324void
325vm_set_page_size(void)
326{
327 page_mask = page_size - 1;
328
329 if ((page_mask & page_size) != 0)
330 panic("vm_set_page_size: page size not a power of two");
331
332 for (page_shift = 0; ; page_shift++)
333 if ((1U << page_shift) == page_size)
334 break;
335}
336
337
338/* Called once during statup, once the cache geometry is known.
339 */
340static void
341vm_page_set_colors( void )
342{
343 unsigned int n, override;
344
345 if ( PE_parse_boot_arg("colors", &override) ) /* colors specified as a boot-arg? */
346 n = override;
347 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
348 n = vm_cache_geometry_colors;
349 else n = DEFAULT_COLORS; /* use default if all else fails */
350
351 if ( n == 0 )
352 n = 1;
353 if ( n > MAX_COLORS )
354 n = MAX_COLORS;
355
356 /* the count must be a power of 2 */
357 if ( ( n & (n - 1)) !=0 )
358 panic("vm_page_set_colors");
359
360 vm_colors = n;
361 vm_color_mask = n - 1;
362}
363
364
365/*
366 * vm_page_bootstrap:
367 *
368 * Initializes the resident memory module.
369 *
370 * Allocates memory for the page cells, and
371 * for the object/offset-to-page hash table headers.
372 * Each page cell is initialized and placed on the free list.
373 * Returns the range of available kernel virtual memory.
374 */
375
376void
377vm_page_bootstrap(
378 vm_offset_t *startp,
379 vm_offset_t *endp)
380{
381 register vm_page_t m;
382 unsigned int i;
383 unsigned int log1;
384 unsigned int log2;
385 unsigned int size;
386
387 /*
388 * Initialize the vm_page template.
389 */
390
391 m = &vm_page_template;
392 m->object = VM_OBJECT_NULL; /* reset later */
393 m->offset = (vm_object_offset_t) -1; /* reset later */
394 m->wire_count = 0;
395
396 m->pageq.next = NULL;
397 m->pageq.prev = NULL;
398 m->listq.next = NULL;
399 m->listq.prev = NULL;
400
401 m->speculative = FALSE;
402 m->throttled = FALSE;
403 m->inactive = FALSE;
404 m->active = FALSE;
405 m->no_cache = FALSE;
406 m->laundry = FALSE;
407 m->free = FALSE;
408 m->pmapped = FALSE;
409 m->reference = FALSE;
410 m->pageout = FALSE;
411 m->dump_cleaning = FALSE;
412 m->list_req_pending = FALSE;
413
414 m->busy = TRUE;
415 m->wanted = FALSE;
416 m->tabled = FALSE;
417 m->fictitious = FALSE;
418 m->private = FALSE;
419 m->absent = FALSE;
420 m->error = FALSE;
421 m->dirty = FALSE;
422 m->cleaning = FALSE;
423 m->precious = FALSE;
424 m->clustered = FALSE;
425 m->unusual = FALSE;
426 m->restart = FALSE;
427 m->zero_fill = FALSE;
428 m->encrypted = FALSE;
429 m->encrypted_cleaning = FALSE;
430 m->deactivated = FALSE;
431
432 m->phys_page = 0; /* reset later */
433
434 /*
435 * Initialize the page queues.
436 */
437
438 mutex_init(&vm_page_queue_free_lock, 0);
439 mutex_init(&vm_page_queue_lock, 0);
440
441 mutex_init(&vm_purgeable_queue_lock, 0);
442
443 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
444 int group;
445
446 purgeable_queues[i].token_q_head = 0;
447 purgeable_queues[i].token_q_tail = 0;
448 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
449 queue_init(&purgeable_queues[i].objq[group]);
450
451 purgeable_queues[i].type = i;
452 purgeable_queues[i].new_pages = 0;
453#if MACH_ASSERT
454 purgeable_queues[i].debug_count_tokens = 0;
455 purgeable_queues[i].debug_count_objects = 0;
456#endif
457 };
458
459 for (i = 0; i < MAX_COLORS; i++ )
460 queue_init(&vm_page_queue_free[i]);
461 queue_init(&vm_lopage_queue_free);
462 vm_page_queue_fictitious = VM_PAGE_NULL;
463 queue_init(&vm_page_queue_active);
464 queue_init(&vm_page_queue_inactive);
465 queue_init(&vm_page_queue_throttled);
466 queue_init(&vm_page_queue_zf);
467
468 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
469 queue_init(&vm_page_queue_speculative[i].age_q);
470
471 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
472 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
473 }
474 vm_page_free_wanted = 0;
475 vm_page_free_wanted_privileged = 0;
476
477 vm_page_set_colors();
478
479
480 /*
481 * Steal memory for the map and zone subsystems.
482 */
483
484 vm_map_steal_memory();
485 zone_steal_memory();
486
487 /*
488 * Allocate (and initialize) the virtual-to-physical
489 * table hash buckets.
490 *
491 * The number of buckets should be a power of two to
492 * get a good hash function. The following computation
493 * chooses the first power of two that is greater
494 * than the number of physical pages in the system.
495 */
496
497 simple_lock_init(&vm_page_bucket_lock, 0);
498
499 if (vm_page_bucket_count == 0) {
500 unsigned int npages = pmap_free_pages();
501
502 vm_page_bucket_count = 1;
503 while (vm_page_bucket_count < npages)
504 vm_page_bucket_count <<= 1;
505 }
506
507 vm_page_hash_mask = vm_page_bucket_count - 1;
508
509 /*
510 * Calculate object shift value for hashing algorithm:
511 * O = log2(sizeof(struct vm_object))
512 * B = log2(vm_page_bucket_count)
513 * hash shifts the object left by
514 * B/2 - O
515 */
516 size = vm_page_bucket_count;
517 for (log1 = 0; size > 1; log1++)
518 size /= 2;
519 size = sizeof(struct vm_object);
520 for (log2 = 0; size > 1; log2++)
521 size /= 2;
522 vm_page_hash_shift = log1/2 - log2 + 1;
523
524 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
525 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
526 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
527
528 if (vm_page_hash_mask & vm_page_bucket_count)
529 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
530
531 vm_page_buckets = (vm_page_bucket_t *)
532 pmap_steal_memory(vm_page_bucket_count *
533 sizeof(vm_page_bucket_t));
534
535 for (i = 0; i < vm_page_bucket_count; i++) {
536 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
537
538 bucket->pages = VM_PAGE_NULL;
539#if MACH_PAGE_HASH_STATS
540 bucket->cur_count = 0;
541 bucket->hi_count = 0;
542#endif /* MACH_PAGE_HASH_STATS */
543 }
544
545 /*
546 * Machine-dependent code allocates the resident page table.
547 * It uses vm_page_init to initialize the page frames.
548 * The code also returns to us the virtual space available
549 * to the kernel. We don't trust the pmap module
550 * to get the alignment right.
551 */
552
553 pmap_startup(&virtual_space_start, &virtual_space_end);
554 virtual_space_start = round_page(virtual_space_start);
555 virtual_space_end = trunc_page(virtual_space_end);
556
557 *startp = virtual_space_start;
558 *endp = virtual_space_end;
559
560 /*
561 * Compute the initial "wire" count.
562 * Up until now, the pages which have been set aside are not under
563 * the VM system's control, so although they aren't explicitly
564 * wired, they nonetheless can't be moved. At this moment,
565 * all VM managed pages are "free", courtesy of pmap_startup.
566 */
567 vm_page_wire_count = atop_64(max_mem) - vm_page_free_count; /* initial value */
568 vm_page_free_count_minimum = vm_page_free_count;
569
570 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
571 vm_page_free_count, vm_page_wire_count);
572
573 simple_lock_init(&vm_paging_lock, 0);
574}
575
576#ifndef MACHINE_PAGES
577/*
578 * We implement pmap_steal_memory and pmap_startup with the help
579 * of two simpler functions, pmap_virtual_space and pmap_next_page.
580 */
581
582void *
583pmap_steal_memory(
584 vm_size_t size)
585{
586 vm_offset_t addr, vaddr;
587 ppnum_t phys_page;
588
589 /*
590 * We round the size to a round multiple.
591 */
592
593 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
594
595 /*
596 * If this is the first call to pmap_steal_memory,
597 * we have to initialize ourself.
598 */
599
600 if (virtual_space_start == virtual_space_end) {
601 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
602
603 /*
604 * The initial values must be aligned properly, and
605 * we don't trust the pmap module to do it right.
606 */
607
608 virtual_space_start = round_page(virtual_space_start);
609 virtual_space_end = trunc_page(virtual_space_end);
610 }
611
612 /*
613 * Allocate virtual memory for this request.
614 */
615
616 addr = virtual_space_start;
617 virtual_space_start += size;
618
619 kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr, virtual_space_start, size); /* (TEST/DEBUG) */
620
621 /*
622 * Allocate and map physical pages to back new virtual pages.
623 */
624
625 for (vaddr = round_page(addr);
626 vaddr < addr + size;
627 vaddr += PAGE_SIZE) {
628 if (!pmap_next_page(&phys_page))
629 panic("pmap_steal_memory");
630
631 /*
632 * XXX Logically, these mappings should be wired,
633 * but some pmap modules barf if they are.
634 */
635
636 pmap_enter(kernel_pmap, vaddr, phys_page,
637 VM_PROT_READ|VM_PROT_WRITE,
638 VM_WIMG_USE_DEFAULT, FALSE);
639 /*
640 * Account for newly stolen memory
641 */
642 vm_page_wire_count++;
643
644 }
645
646 return (void *) addr;
647}
648
649void
650pmap_startup(
651 vm_offset_t *startp,
652 vm_offset_t *endp)
653{
654 unsigned int i, npages, pages_initialized, fill, fillval;
655 ppnum_t phys_page;
656 addr64_t tmpaddr;
657 unsigned int num_of_lopages = 0;
658 unsigned int last_index;
659
660 /*
661 * We calculate how many page frames we will have
662 * and then allocate the page structures in one chunk.
663 */
664
665 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
666 tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start); /* Account for any slop */
667 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
668
669 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
670
671 /*
672 * Initialize the page frames.
673 */
674 for (i = 0, pages_initialized = 0; i < npages; i++) {
675 if (!pmap_next_page(&phys_page))
676 break;
677
678 vm_page_init(&vm_pages[i], phys_page);
679 vm_page_pages++;
680 pages_initialized++;
681 }
682 vm_pages_count = pages_initialized;
683
684 /*
685 * Check if we want to initialize pages to a known value
686 */
687 fill = 0; /* Assume no fill */
688 if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */
689
690
691 /*
692 * if vm_lopage_poolsize is non-zero, than we need to reserve
693 * a pool of pages whose addresess are less than 4G... this pool
694 * is used by drivers whose hardware can't DMA beyond 32 bits...
695 *
696 * note that I'm assuming that the page list is ascending and
697 * ordered w/r to the physical address
698 */
699 for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
700 vm_page_t m;
701
702 m = &vm_pages[i];
703
704 if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
705 panic("couldn't reserve the lopage pool: not enough lo pages\n");
706
707 if (m->phys_page < vm_lopage_poolend)
708 panic("couldn't reserve the lopage pool: page list out of order\n");
709
710 vm_lopage_poolend = m->phys_page;
711
712 if (vm_lopage_poolstart == 0)
713 vm_lopage_poolstart = m->phys_page;
714 else {
715 if (m->phys_page < vm_lopage_poolstart)
716 panic("couldn't reserve the lopage pool: page list out of order\n");
717 }
718
719 if (fill)
720 fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */
721
722 vm_page_release(m);
723 }
724 last_index = i;
725
726 // -debug code remove
727 if (2 == vm_himemory_mode) {
728 // free low -> high so high is preferred
729 for (i = last_index + 1; i <= pages_initialized; i++) {
730 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
731 vm_page_release(&vm_pages[i - 1]);
732 }
733 }
734 else
735 // debug code remove-
736
737 /*
738 * Release pages in reverse order so that physical pages
739 * initially get allocated in ascending addresses. This keeps
740 * the devices (which must address physical memory) happy if
741 * they require several consecutive pages.
742 */
743 for (i = pages_initialized; i > last_index; i--) {
744 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
745 vm_page_release(&vm_pages[i - 1]);
746 }
747
748#if 0
749 {
750 vm_page_t xx, xxo, xxl;
751 int i, j, k, l;
752
753 j = 0; /* (BRINGUP) */
754 xxl = 0;
755
756 for( i = 0; i < vm_colors; i++ ) {
757 queue_iterate(&vm_page_queue_free[i],
758 xx,
759 vm_page_t,
760 pageq) { /* BRINGUP */
761 j++; /* (BRINGUP) */
762 if(j > vm_page_free_count) { /* (BRINGUP) */
763 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
764 }
765
766 l = vm_page_free_count - j; /* (BRINGUP) */
767 k = 0; /* (BRINGUP) */
768
769 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
770
771 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
772 k++;
773 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
774 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
775 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
776 }
777 }
778
779 xxl = xx;
780 }
781 }
782
783 if(j != vm_page_free_count) { /* (BRINGUP) */
784 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
785 }
786 }
787#endif
788
789
790 /*
791 * We have to re-align virtual_space_start,
792 * because pmap_steal_memory has been using it.
793 */
794
795 virtual_space_start = round_page_32(virtual_space_start);
796
797 *startp = virtual_space_start;
798 *endp = virtual_space_end;
799}
800#endif /* MACHINE_PAGES */
801
802/*
803 * Routine: vm_page_module_init
804 * Purpose:
805 * Second initialization pass, to be done after
806 * the basic VM system is ready.
807 */
808void
809vm_page_module_init(void)
810{
811 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
812 0, PAGE_SIZE, "vm pages");
813
814#if ZONE_DEBUG
815 zone_debug_disable(vm_page_zone);
816#endif /* ZONE_DEBUG */
817
818 zone_change(vm_page_zone, Z_EXPAND, FALSE);
819 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
820 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
821
822 /*
823 * Adjust zone statistics to account for the real pages allocated
824 * in vm_page_create(). [Q: is this really what we want?]
825 */
826 vm_page_zone->count += vm_page_pages;
827 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
828
829 mutex_init(&vm_page_alloc_lock, 0);
830}
831
832/*
833 * Routine: vm_page_create
834 * Purpose:
835 * After the VM system is up, machine-dependent code
836 * may stumble across more physical memory. For example,
837 * memory that it was reserving for a frame buffer.
838 * vm_page_create turns this memory into available pages.
839 */
840
841void
842vm_page_create(
843 ppnum_t start,
844 ppnum_t end)
845{
846 ppnum_t phys_page;
847 vm_page_t m;
848
849 for (phys_page = start;
850 phys_page < end;
851 phys_page++) {
852 while ((m = (vm_page_t) vm_page_grab_fictitious())
853 == VM_PAGE_NULL)
854 vm_page_more_fictitious();
855
856 vm_page_init(m, phys_page);
857 vm_page_pages++;
858 vm_page_release(m);
859 }
860}
861
862/*
863 * vm_page_hash:
864 *
865 * Distributes the object/offset key pair among hash buckets.
866 *
867 * NOTE: The bucket count must be a power of 2
868 */
869#define vm_page_hash(object, offset) (\
870 ( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
871 & vm_page_hash_mask)
872
873
874/*
875 * vm_page_insert: [ internal use only ]
876 *
877 * Inserts the given mem entry into the object/object-page
878 * table and object list.
879 *
880 * The object must be locked.
881 */
882void
883vm_page_insert(
884 vm_page_t mem,
885 vm_object_t object,
886 vm_object_offset_t offset)
887{
888 vm_page_insert_internal(mem, object, offset, FALSE);
889}
890
891
892static void
893vm_page_insert_internal(
894 vm_page_t mem,
895 vm_object_t object,
896 vm_object_offset_t offset,
897 boolean_t queues_lock_held)
898{
899 register vm_page_bucket_t *bucket;
900
901 XPR(XPR_VM_PAGE,
902 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
903 (integer_t)object, (integer_t)offset, (integer_t)mem, 0,0);
904
905 VM_PAGE_CHECK(mem);
906
907 if (object == vm_submap_object) {
908 /* the vm_submap_object is only a placeholder for submaps */
909 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
910 }
911
912 vm_object_lock_assert_exclusive(object);
913#if DEBUG
914 if (mem->tabled || mem->object != VM_OBJECT_NULL)
915 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
916 "already in (obj=%p,off=0x%llx)",
917 mem, object, offset, mem->object, mem->offset);
918#endif
919 assert(!object->internal || offset < object->size);
920
921 /* only insert "pageout" pages into "pageout" objects,
922 * and normal pages into normal objects */
923 assert(object->pageout == mem->pageout);
924
925 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
926
927 /*
928 * Record the object/offset pair in this page
929 */
930
931 mem->object = object;
932 mem->offset = offset;
933
934 /*
935 * Insert it into the object_object/offset hash table
936 */
937
938 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
939 simple_lock(&vm_page_bucket_lock);
940 mem->next = bucket->pages;
941 bucket->pages = mem;
942#if MACH_PAGE_HASH_STATS
943 if (++bucket->cur_count > bucket->hi_count)
944 bucket->hi_count = bucket->cur_count;
945#endif /* MACH_PAGE_HASH_STATS */
946 simple_unlock(&vm_page_bucket_lock);
947
948 /*
949 * Now link into the object's list of backed pages.
950 */
951
952 VM_PAGE_INSERT(mem, object);
953 mem->tabled = TRUE;
954
955 /*
956 * Show that the object has one more resident page.
957 */
958
959 object->resident_page_count++;
960
961 if (object->purgable == VM_PURGABLE_VOLATILE ||
962 object->purgable == VM_PURGABLE_EMPTY) {
963 if (queues_lock_held == FALSE)
964 vm_page_lockspin_queues();
965
966 vm_page_purgeable_count++;
967
968 if (queues_lock_held == FALSE)
969 vm_page_unlock_queues();
970 }
971}
972
973/*
974 * vm_page_replace:
975 *
976 * Exactly like vm_page_insert, except that we first
977 * remove any existing page at the given offset in object.
978 *
979 * The object and page queues must be locked.
980 */
981
982void
983vm_page_replace(
984 register vm_page_t mem,
985 register vm_object_t object,
986 register vm_object_offset_t offset)
987{
988 vm_page_bucket_t *bucket;
989 vm_page_t found_m = VM_PAGE_NULL;
990
991 VM_PAGE_CHECK(mem);
992 vm_object_lock_assert_exclusive(object);
993#if DEBUG
994 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
995
996 if (mem->tabled || mem->object != VM_OBJECT_NULL)
997 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
998 "already in (obj=%p,off=0x%llx)",
999 mem, object, offset, mem->object, mem->offset);
1000#endif
1001 /*
1002 * Record the object/offset pair in this page
1003 */
1004
1005 mem->object = object;
1006 mem->offset = offset;
1007
1008 /*
1009 * Insert it into the object_object/offset hash table,
1010 * replacing any page that might have been there.
1011 */
1012
1013 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1014 simple_lock(&vm_page_bucket_lock);
1015
1016 if (bucket->pages) {
1017 vm_page_t *mp = &bucket->pages;
1018 register vm_page_t m = *mp;
1019
1020 do {
1021 if (m->object == object && m->offset == offset) {
1022 /*
1023 * Remove old page from hash list
1024 */
1025 *mp = m->next;
1026
1027 found_m = m;
1028 break;
1029 }
1030 mp = &m->next;
1031 } while ((m = *mp));
1032
1033 mem->next = bucket->pages;
1034 } else {
1035 mem->next = VM_PAGE_NULL;
1036 }
1037 /*
1038 * insert new page at head of hash list
1039 */
1040 bucket->pages = mem;
1041
1042 simple_unlock(&vm_page_bucket_lock);
1043
1044 if (found_m) {
1045 /*
1046 * there was already a page at the specified
1047 * offset for this object... remove it from
1048 * the object and free it back to the free list
1049 */
1050 VM_PAGE_REMOVE(found_m);
1051 found_m->tabled = FALSE;
1052
1053 found_m->object = VM_OBJECT_NULL;
1054 found_m->offset = (vm_object_offset_t) -1;
1055 object->resident_page_count--;
1056
1057 if (object->purgable == VM_PURGABLE_VOLATILE ||
1058 object->purgable == VM_PURGABLE_EMPTY) {
1059 assert(vm_page_purgeable_count > 0);
1060 vm_page_purgeable_count--;
1061 }
1062
1063 /*
1064 * Return page to the free list.
1065 * Note the page is not tabled now
1066 */
1067 vm_page_free(found_m);
1068 }
1069 /*
1070 * Now link into the object's list of backed pages.
1071 */
1072
1073 VM_PAGE_INSERT(mem, object);
1074 mem->tabled = TRUE;
1075
1076 /*
1077 * And show that the object has one more resident
1078 * page.
1079 */
1080
1081 object->resident_page_count++;
1082
1083 if (object->purgable == VM_PURGABLE_VOLATILE ||
1084 object->purgable == VM_PURGABLE_EMPTY) {
1085 vm_page_purgeable_count++;
1086 }
1087}
1088
1089/*
1090 * vm_page_remove: [ internal use only ]
1091 *
1092 * Removes the given mem entry from the object/offset-page
1093 * table and the object page list.
1094 *
1095 * The object and page queues must be locked.
1096 */
1097
1098void
1099vm_page_remove(
1100 register vm_page_t mem)
1101{
1102 register vm_page_bucket_t *bucket;
1103 register vm_page_t this;
1104
1105 XPR(XPR_VM_PAGE,
1106 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1107 (integer_t)mem->object, (integer_t)mem->offset,
1108 (integer_t)mem, 0,0);
1109#if DEBUG
1110 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1111#endif
1112 vm_object_lock_assert_exclusive(mem->object);
1113 assert(mem->tabled);
1114 assert(!mem->cleaning);
1115 VM_PAGE_CHECK(mem);
1116
1117
1118 /*
1119 * Remove from the object_object/offset hash table
1120 */
1121
1122 bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
1123 simple_lock(&vm_page_bucket_lock);
1124 if ((this = bucket->pages) == mem) {
1125 /* optimize for common case */
1126
1127 bucket->pages = mem->next;
1128 } else {
1129 register vm_page_t *prev;
1130
1131 for (prev = &this->next;
1132 (this = *prev) != mem;
1133 prev = &this->next)
1134 continue;
1135 *prev = this->next;
1136 }
1137#if MACH_PAGE_HASH_STATS
1138 bucket->cur_count--;
1139#endif /* MACH_PAGE_HASH_STATS */
1140 simple_unlock(&vm_page_bucket_lock);
1141
1142 /*
1143 * Now remove from the object's list of backed pages.
1144 */
1145
1146 VM_PAGE_REMOVE(mem);
1147
1148 /*
1149 * And show that the object has one fewer resident
1150 * page.
1151 */
1152
1153 mem->object->resident_page_count--;
1154
1155 if (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1156 mem->object->purgable == VM_PURGABLE_EMPTY) {
1157 assert(vm_page_purgeable_count > 0);
1158 vm_page_purgeable_count--;
1159 }
1160 mem->tabled = FALSE;
1161 mem->object = VM_OBJECT_NULL;
1162 mem->offset = (vm_object_offset_t) -1;
1163}
1164
1165/*
1166 * vm_page_lookup:
1167 *
1168 * Returns the page associated with the object/offset
1169 * pair specified; if none is found, VM_PAGE_NULL is returned.
1170 *
1171 * The object must be locked. No side effects.
1172 */
1173
1174unsigned long vm_page_lookup_hint = 0;
1175unsigned long vm_page_lookup_hint_next = 0;
1176unsigned long vm_page_lookup_hint_prev = 0;
1177unsigned long vm_page_lookup_hint_miss = 0;
1178unsigned long vm_page_lookup_bucket_NULL = 0;
1179unsigned long vm_page_lookup_miss = 0;
1180
1181
1182vm_page_t
1183vm_page_lookup(
1184 register vm_object_t object,
1185 register vm_object_offset_t offset)
1186{
1187 register vm_page_t mem;
1188 register vm_page_bucket_t *bucket;
1189 queue_entry_t qe;
1190
1191 vm_object_lock_assert_held(object);
1192 mem = object->memq_hint;
1193
1194 if (mem != VM_PAGE_NULL) {
1195 assert(mem->object == object);
1196
1197 if (mem->offset == offset) {
1198 vm_page_lookup_hint++;
1199 return mem;
1200 }
1201 qe = queue_next(&mem->listq);
1202
1203 if (! queue_end(&object->memq, qe)) {
1204 vm_page_t next_page;
1205
1206 next_page = (vm_page_t) qe;
1207 assert(next_page->object == object);
1208
1209 if (next_page->offset == offset) {
1210 vm_page_lookup_hint_next++;
1211 object->memq_hint = next_page; /* new hint */
1212 return next_page;
1213 }
1214 }
1215 qe = queue_prev(&mem->listq);
1216
1217 if (! queue_end(&object->memq, qe)) {
1218 vm_page_t prev_page;
1219
1220 prev_page = (vm_page_t) qe;
1221 assert(prev_page->object == object);
1222
1223 if (prev_page->offset == offset) {
1224 vm_page_lookup_hint_prev++;
1225 object->memq_hint = prev_page; /* new hint */
1226 return prev_page;
1227 }
1228 }
1229 }
1230 /*
1231 * Search the hash table for this object/offset pair
1232 */
1233 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1234
1235 /*
1236 * since we hold the object lock, we are guaranteed that no
1237 * new pages can be inserted into this object... this in turn
1238 * guarantess that the page we're looking for can't exist
1239 * if the bucket it hashes to is currently NULL even when looked
1240 * at outside the scope of the hash bucket lock... this is a
1241 * really cheap optimiztion to avoid taking the lock
1242 */
1243 if (bucket->pages == VM_PAGE_NULL) {
1244 vm_page_lookup_bucket_NULL++;
1245
1246 return (VM_PAGE_NULL);
1247 }
1248 simple_lock(&vm_page_bucket_lock);
1249
1250 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1251 VM_PAGE_CHECK(mem);
1252 if ((mem->object == object) && (mem->offset == offset))
1253 break;
1254 }
1255 simple_unlock(&vm_page_bucket_lock);
1256
1257 if (mem != VM_PAGE_NULL) {
1258 if (object->memq_hint != VM_PAGE_NULL) {
1259 vm_page_lookup_hint_miss++;
1260 }
1261 assert(mem->object == object);
1262 object->memq_hint = mem;
1263 } else
1264 vm_page_lookup_miss++;
1265
1266 return(mem);
1267}
1268
1269
1270/*
1271 * vm_page_rename:
1272 *
1273 * Move the given memory entry from its
1274 * current object to the specified target object/offset.
1275 *
1276 * The object must be locked.
1277 */
1278void
1279vm_page_rename(
1280 register vm_page_t mem,
1281 register vm_object_t new_object,
1282 vm_object_offset_t new_offset,
1283 boolean_t encrypted_ok)
1284{
1285 assert(mem->object != new_object);
1286
1287 /*
1288 * ENCRYPTED SWAP:
1289 * The encryption key is based on the page's memory object
1290 * (aka "pager") and paging offset. Moving the page to
1291 * another VM object changes its "pager" and "paging_offset"
1292 * so it has to be decrypted first, or we would lose the key.
1293 *
1294 * One exception is VM object collapsing, where we transfer pages
1295 * from one backing object to its parent object. This operation also
1296 * transfers the paging information, so the <pager,paging_offset> info
1297 * should remain consistent. The caller (vm_object_do_collapse())
1298 * sets "encrypted_ok" in this case.
1299 */
1300 if (!encrypted_ok && mem->encrypted) {
1301 panic("vm_page_rename: page %p is encrypted\n", mem);
1302 }
1303
1304 /*
1305 * Changes to mem->object require the page lock because
1306 * the pageout daemon uses that lock to get the object.
1307 */
1308
1309 XPR(XPR_VM_PAGE,
1310 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1311 (integer_t)new_object, (integer_t)new_offset,
1312 (integer_t)mem, 0,0);
1313
1314 vm_page_lockspin_queues();
1315 vm_page_remove(mem);
1316 vm_page_insert(mem, new_object, new_offset);
1317 vm_page_unlock_queues();
1318}
1319
1320/*
1321 * vm_page_init:
1322 *
1323 * Initialize the fields in a new page.
1324 * This takes a structure with random values and initializes it
1325 * so that it can be given to vm_page_release or vm_page_insert.
1326 */
1327void
1328vm_page_init(
1329 vm_page_t mem,
1330 ppnum_t phys_page)
1331{
1332 assert(phys_page);
1333 *mem = vm_page_template;
1334 mem->phys_page = phys_page;
1335}
1336
1337/*
1338 * vm_page_grab_fictitious:
1339 *
1340 * Remove a fictitious page from the free list.
1341 * Returns VM_PAGE_NULL if there are no free pages.
1342 */
1343int c_vm_page_grab_fictitious = 0;
1344int c_vm_page_release_fictitious = 0;
1345int c_vm_page_more_fictitious = 0;
1346
1347extern vm_page_t vm_page_grab_fictitious_common(vm_offset_t phys_addr);
1348
1349vm_page_t
1350vm_page_grab_fictitious_common(
1351 vm_offset_t phys_addr)
1352{
1353 register vm_page_t m;
1354
1355 m = (vm_page_t)zget(vm_page_zone);
1356 if (m) {
1357 vm_page_init(m, phys_addr);
1358 m->fictitious = TRUE;
1359 }
1360
1361 c_vm_page_grab_fictitious++;
1362 return m;
1363}
1364
1365vm_page_t
1366vm_page_grab_fictitious(void)
1367{
1368 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1369}
1370
1371vm_page_t
1372vm_page_grab_guard(void)
1373{
1374 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1375}
1376
1377/*
1378 * vm_page_release_fictitious:
1379 *
1380 * Release a fictitious page to the free list.
1381 */
1382
1383void
1384vm_page_release_fictitious(
1385 register vm_page_t m)
1386{
1387 assert(!m->free);
1388 assert(m->busy);
1389 assert(m->fictitious);
1390 assert(m->phys_page == vm_page_fictitious_addr ||
1391 m->phys_page == vm_page_guard_addr);
1392
1393 c_vm_page_release_fictitious++;
1394#if DEBUG
1395 if (m->free)
1396 panic("vm_page_release_fictitious");
1397#endif
1398 m->free = TRUE;
1399 zfree(vm_page_zone, m);
1400}
1401
1402/*
1403 * vm_page_more_fictitious:
1404 *
1405 * Add more fictitious pages to the free list.
1406 * Allowed to block. This routine is way intimate
1407 * with the zones code, for several reasons:
1408 * 1. we need to carve some page structures out of physical
1409 * memory before zones work, so they _cannot_ come from
1410 * the zone_map.
1411 * 2. the zone needs to be collectable in order to prevent
1412 * growth without bound. These structures are used by
1413 * the device pager (by the hundreds and thousands), as
1414 * private pages for pageout, and as blocking pages for
1415 * pagein. Temporary bursts in demand should not result in
1416 * permanent allocation of a resource.
1417 * 3. To smooth allocation humps, we allocate single pages
1418 * with kernel_memory_allocate(), and cram them into the
1419 * zone. This also allows us to initialize the vm_page_t's
1420 * on the way into the zone, so that zget() always returns
1421 * an initialized structure. The zone free element pointer
1422 * and the free page pointer are both the first item in the
1423 * vm_page_t.
1424 * 4. By having the pages in the zone pre-initialized, we need
1425 * not keep 2 levels of lists. The garbage collector simply
1426 * scans our list, and reduces physical memory usage as it
1427 * sees fit.
1428 */
1429
1430void vm_page_more_fictitious(void)
1431{
1432 register vm_page_t m;
1433 vm_offset_t addr;
1434 kern_return_t retval;
1435 int i;
1436
1437 c_vm_page_more_fictitious++;
1438
1439 /*
1440 * Allocate a single page from the zone_map. Do not wait if no physical
1441 * pages are immediately available, and do not zero the space. We need
1442 * our own blocking lock here to prevent having multiple,
1443 * simultaneous requests from piling up on the zone_map lock. Exactly
1444 * one (of our) threads should be potentially waiting on the map lock.
1445 * If winner is not vm-privileged, then the page allocation will fail,
1446 * and it will temporarily block here in the vm_page_wait().
1447 */
1448 mutex_lock(&vm_page_alloc_lock);
1449 /*
1450 * If another thread allocated space, just bail out now.
1451 */
1452 if (zone_free_count(vm_page_zone) > 5) {
1453 /*
1454 * The number "5" is a small number that is larger than the
1455 * number of fictitious pages that any single caller will
1456 * attempt to allocate. Otherwise, a thread will attempt to
1457 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1458 * release all of the resources and locks already acquired,
1459 * and then call this routine. This routine finds the pages
1460 * that the caller released, so fails to allocate new space.
1461 * The process repeats infinitely. The largest known number
1462 * of fictitious pages required in this manner is 2. 5 is
1463 * simply a somewhat larger number.
1464 */
1465 mutex_unlock(&vm_page_alloc_lock);
1466 return;
1467 }
1468
1469 retval = kernel_memory_allocate(zone_map,
1470 &addr, PAGE_SIZE, VM_PROT_ALL,
1471 KMA_KOBJECT|KMA_NOPAGEWAIT);
1472 if (retval != KERN_SUCCESS) {
1473 /*
1474 * No page was available. Tell the pageout daemon, drop the
1475 * lock to give another thread a chance at it, and
1476 * wait for the pageout daemon to make progress.
1477 */
1478 mutex_unlock(&vm_page_alloc_lock);
1479 vm_page_wait(THREAD_UNINT);
1480 return;
1481 }
1482 /*
1483 * Initialize as many vm_page_t's as will fit on this page. This
1484 * depends on the zone code disturbing ONLY the first item of
1485 * each zone element.
1486 */
1487 m = (vm_page_t)addr;
1488 for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1489 vm_page_init(m, vm_page_fictitious_addr);
1490 m->fictitious = TRUE;
1491 m++;
1492 }
1493 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1494 mutex_unlock(&vm_page_alloc_lock);
1495}
1496
1497
1498/*
1499 * vm_pool_low():
1500 *
1501 * Return true if it is not likely that a non-vm_privileged thread
1502 * can get memory without blocking. Advisory only, since the
1503 * situation may change under us.
1504 */
1505int
1506vm_pool_low(void)
1507{
1508 /* No locking, at worst we will fib. */
1509 return( vm_page_free_count < vm_page_free_reserved );
1510}
1511
1512
1513
1514/*
1515 * this is an interface to support bring-up of drivers
1516 * on platforms with physical memory > 4G...
1517 */
1518int vm_himemory_mode = 0;
1519
1520
1521/*
1522 * this interface exists to support hardware controllers
1523 * incapable of generating DMAs with more than 32 bits
1524 * of address on platforms with physical memory > 4G...
1525 */
1526unsigned int vm_lopage_free_count = 0;
1527unsigned int vm_lopage_max_count = 0;
1528queue_head_t vm_lopage_queue_free;
1529
1530vm_page_t
1531vm_page_grablo(void)
1532{
1533 register vm_page_t mem;
1534 unsigned int vm_lopage_alloc_count;
1535
1536 if (vm_lopage_poolsize == 0)
1537 return (vm_page_grab());
1538
1539 mutex_lock(&vm_page_queue_free_lock);
1540
1541 if (! queue_empty(&vm_lopage_queue_free)) {
1542 queue_remove_first(&vm_lopage_queue_free,
1543 mem,
1544 vm_page_t,
1545 pageq);
1546 assert(mem->free);
1547 assert(mem->busy);
1548 assert(!mem->pmapped);
1549
1550 mem->pageq.next = NULL;
1551 mem->pageq.prev = NULL;
1552 mem->free = FALSE;
1553
1554 vm_lopage_free_count--;
1555 vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
1556 if (vm_lopage_alloc_count > vm_lopage_max_count)
1557 vm_lopage_max_count = vm_lopage_alloc_count;
1558 } else {
1559 mem = VM_PAGE_NULL;
1560 }
1561 mutex_unlock(&vm_page_queue_free_lock);
1562
1563 return (mem);
1564}
1565
1566
1567/*
1568 * vm_page_grab:
1569 *
1570 * first try to grab a page from the per-cpu free list...
1571 * this must be done while pre-emption is disabled... if
1572 * a page is available, we're done...
1573 * if no page is available, grab the vm_page_queue_free_lock
1574 * and see if current number of free pages would allow us
1575 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1576 * if there are pages available, disable preemption and
1577 * recheck the state of the per-cpu free list... we could
1578 * have been preempted and moved to a different cpu, or
1579 * some other thread could have re-filled it... if still
1580 * empty, figure out how many pages we can steal from the
1581 * global free queue and move to the per-cpu queue...
1582 * return 1 of these pages when done... only wakeup the
1583 * pageout_scan thread if we moved pages from the global
1584 * list... no need for the wakeup if we've satisfied the
1585 * request from the per-cpu queue.
1586 */
1587
1588#define COLOR_GROUPS_TO_STEAL 4
1589
1590
1591vm_page_t
1592vm_page_grab( void )
1593{
1594 vm_page_t mem;
1595
1596
1597 disable_preemption();
1598
1599 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1600return_page_from_cpu_list:
1601 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1602 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1603 mem->pageq.next = NULL;
1604
1605 enable_preemption();
1606
1607 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1608 assert(mem->tabled == FALSE);
1609 assert(mem->object == VM_OBJECT_NULL);
1610 assert(!mem->laundry);
1611 assert(!mem->free);
1612 assert(pmap_verify_free(mem->phys_page));
1613 assert(mem->busy);
1614 assert(!mem->encrypted);
1615 assert(!mem->pmapped);
1616
1617 return mem;
1618 }
1619 enable_preemption();
1620
1621
1622 mutex_lock(&vm_page_queue_free_lock);
1623
1624 /*
1625 * Optionally produce warnings if the wire or gobble
1626 * counts exceed some threshold.
1627 */
1628 if (vm_page_wire_count_warning > 0
1629 && vm_page_wire_count >= vm_page_wire_count_warning) {
1630 printf("mk: vm_page_grab(): high wired page count of %d\n",
1631 vm_page_wire_count);
1632 assert(vm_page_wire_count < vm_page_wire_count_warning);
1633 }
1634 if (vm_page_gobble_count_warning > 0
1635 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1636 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1637 vm_page_gobble_count);
1638 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1639 }
1640
1641 /*
1642 * Only let privileged threads (involved in pageout)
1643 * dip into the reserved pool.
1644 */
1645 if ((vm_page_free_count < vm_page_free_reserved) &&
1646 !(current_thread()->options & TH_OPT_VMPRIV)) {
1647 mutex_unlock(&vm_page_queue_free_lock);
1648 mem = VM_PAGE_NULL;
1649 }
1650 else {
1651 vm_page_t head;
1652 vm_page_t tail;
1653 unsigned int pages_to_steal;
1654 unsigned int color;
1655
1656 while ( vm_page_free_count == 0 ) {
1657
1658 mutex_unlock(&vm_page_queue_free_lock);
1659 /*
1660 * must be a privileged thread to be
1661 * in this state since a non-privileged
1662 * thread would have bailed if we were
1663 * under the vm_page_free_reserved mark
1664 */
1665 VM_PAGE_WAIT();
1666 mutex_lock(&vm_page_queue_free_lock);
1667 }
1668
1669 disable_preemption();
1670
1671 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1672 mutex_unlock(&vm_page_queue_free_lock);
1673
1674 /*
1675 * we got preempted and moved to another processor
1676 * or we got preempted and someone else ran and filled the cache
1677 */
1678 goto return_page_from_cpu_list;
1679 }
1680 if (vm_page_free_count <= vm_page_free_reserved)
1681 pages_to_steal = 1;
1682 else {
1683 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1684
1685 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1686 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1687 }
1688 color = PROCESSOR_DATA(current_processor(), start_color);
1689 head = tail = NULL;
1690
1691 while (pages_to_steal--) {
1692 if (--vm_page_free_count < vm_page_free_count_minimum)
1693 vm_page_free_count_minimum = vm_page_free_count;
1694
1695 while (queue_empty(&vm_page_queue_free[color]))
1696 color = (color + 1) & vm_color_mask;
1697
1698 queue_remove_first(&vm_page_queue_free[color],
1699 mem,
1700 vm_page_t,
1701 pageq);
1702 mem->pageq.next = NULL;
1703 mem->pageq.prev = NULL;
1704
1705 color = (color + 1) & vm_color_mask;
1706
1707 if (head == NULL)
1708 head = mem;
1709 else
1710 tail->pageq.next = (queue_t)mem;
1711 tail = mem;
1712
1713 mem->pageq.prev = NULL;
1714 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1715 assert(mem->tabled == FALSE);
1716 assert(mem->object == VM_OBJECT_NULL);
1717 assert(!mem->laundry);
1718 assert(mem->free);
1719 mem->free = FALSE;
1720
1721 assert(pmap_verify_free(mem->phys_page));
1722 assert(mem->busy);
1723 assert(!mem->free);
1724 assert(!mem->encrypted);
1725 assert(!mem->pmapped);
1726 }
1727 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1728 PROCESSOR_DATA(current_processor(), start_color) = color;
1729
1730 /*
1731 * satisfy this request
1732 */
1733 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1734 mem = head;
1735 mem->pageq.next = NULL;
1736
1737 mutex_unlock(&vm_page_queue_free_lock);
1738
1739 enable_preemption();
1740 }
1741 /*
1742 * Decide if we should poke the pageout daemon.
1743 * We do this if the free count is less than the low
1744 * water mark, or if the free count is less than the high
1745 * water mark (but above the low water mark) and the inactive
1746 * count is less than its target.
1747 *
1748 * We don't have the counts locked ... if they change a little,
1749 * it doesn't really matter.
1750 */
1751 if ((vm_page_free_count < vm_page_free_min) ||
1752 ((vm_page_free_count < vm_page_free_target) &&
1753 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1754 thread_wakeup((event_t) &vm_page_free_wanted);
1755
1756#if CONFIG_EMBEDDED
1757 {
1758 int percent_avail;
1759
1760 /*
1761 * Decide if we need to poke the memorystatus notification thread.
1762 */
1763 percent_avail =
1764 (vm_page_active_count + vm_page_inactive_count +
1765 vm_page_speculative_count + vm_page_free_count +
1766 vm_page_purgeable_count ) * 100 /
1767 atop_64(max_mem);
1768 if (percent_avail <= (kern_memorystatus_level - 5)) {
1769 kern_memorystatus_level = percent_avail;
1770 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1771 }
1772 }
1773#endif
1774
1775// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1776
1777 return mem;
1778}
1779
1780/*
1781 * vm_page_release:
1782 *
1783 * Return a page to the free list.
1784 */
1785
1786void
1787vm_page_release(
1788 register vm_page_t mem)
1789{
1790 unsigned int color;
1791#if 0
1792 unsigned int pindex;
1793 phys_entry *physent;
1794
1795 physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
1796 if(physent->ppLink & ppN) { /* (BRINGUP) */
1797 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1798 }
1799 physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */
1800#endif
1801 assert(!mem->private && !mem->fictitious);
1802
1803// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1804
1805 mutex_lock(&vm_page_queue_free_lock);
1806#if DEBUG
1807 if (mem->free)
1808 panic("vm_page_release");
1809#endif
1810 mem->free = TRUE;
1811
1812 assert(mem->busy);
1813 assert(!mem->laundry);
1814 assert(mem->object == VM_OBJECT_NULL);
1815 assert(mem->pageq.next == NULL &&
1816 mem->pageq.prev == NULL);
1817 assert(mem->listq.next == NULL &&
1818 mem->listq.prev == NULL);
1819
1820 if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
1821 /*
1822 * this exists to support hardware controllers
1823 * incapable of generating DMAs with more than 32 bits
1824 * of address on platforms with physical memory > 4G...
1825 */
1826 queue_enter_first(&vm_lopage_queue_free,
1827 mem,
1828 vm_page_t,
1829 pageq);
1830 vm_lopage_free_count++;
1831 } else {
1832 color = mem->phys_page & vm_color_mask;
1833 queue_enter_first(&vm_page_queue_free[color],
1834 mem,
1835 vm_page_t,
1836 pageq);
1837 vm_page_free_count++;
1838 /*
1839 * Check if we should wake up someone waiting for page.
1840 * But don't bother waking them unless they can allocate.
1841 *
1842 * We wakeup only one thread, to prevent starvation.
1843 * Because the scheduling system handles wait queues FIFO,
1844 * if we wakeup all waiting threads, one greedy thread
1845 * can starve multiple niceguy threads. When the threads
1846 * all wakeup, the greedy threads runs first, grabs the page,
1847 * and waits for another page. It will be the first to run
1848 * when the next page is freed.
1849 *
1850 * However, there is a slight danger here.
1851 * The thread we wake might not use the free page.
1852 * Then the other threads could wait indefinitely
1853 * while the page goes unused. To forestall this,
1854 * the pageout daemon will keep making free pages
1855 * as long as vm_page_free_wanted is non-zero.
1856 */
1857
1858 if ((vm_page_free_wanted_privileged > 0) && vm_page_free_count) {
1859 vm_page_free_wanted_privileged--;
1860 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
1861 } else if ((vm_page_free_wanted > 0) &&
1862 (vm_page_free_count >= vm_page_free_reserved)) {
1863 vm_page_free_wanted--;
1864 thread_wakeup_one((event_t) &vm_page_free_count);
1865 }
1866 }
1867 mutex_unlock(&vm_page_queue_free_lock);
1868
1869#if CONFIG_EMBEDDED
1870 {
1871 int percent_avail;
1872
1873 /*
1874 * Decide if we need to poke the memorystatus notification thread.
1875 * Locking is not a big issue, as only a single thread delivers these.
1876 */
1877 percent_avail =
1878 (vm_page_active_count + vm_page_inactive_count +
1879 vm_page_speculative_count + vm_page_free_count +
1880 vm_page_purgeable_count ) * 100 /
1881 atop_64(max_mem);
1882 if (percent_avail >= (kern_memorystatus_level + 5)) {
1883 kern_memorystatus_level = percent_avail;
1884 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1885 }
1886 }
1887#endif
1888}
1889
1890/*
1891 * vm_page_wait:
1892 *
1893 * Wait for a page to become available.
1894 * If there are plenty of free pages, then we don't sleep.
1895 *
1896 * Returns:
1897 * TRUE: There may be another page, try again
1898 * FALSE: We were interrupted out of our wait, don't try again
1899 */
1900
1901boolean_t
1902vm_page_wait(
1903 int interruptible )
1904{
1905 /*
1906 * We can't use vm_page_free_reserved to make this
1907 * determination. Consider: some thread might
1908 * need to allocate two pages. The first allocation
1909 * succeeds, the second fails. After the first page is freed,
1910 * a call to vm_page_wait must really block.
1911 */
1912 kern_return_t wait_result;
1913 int need_wakeup = 0;
1914 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1915
1916 mutex_lock(&vm_page_queue_free_lock);
1917
1918 if (is_privileged && vm_page_free_count) {
1919 mutex_unlock(&vm_page_queue_free_lock);
1920 return TRUE;
1921 }
1922 if (vm_page_free_count < vm_page_free_target) {
1923
1924 if (is_privileged) {
1925 if (vm_page_free_wanted_privileged++ == 0)
1926 need_wakeup = 1;
1927 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
1928 } else {
1929 if (vm_page_free_wanted++ == 0)
1930 need_wakeup = 1;
1931 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
1932 }
1933 mutex_unlock(&vm_page_queue_free_lock);
1934 counter(c_vm_page_wait_block++);
1935
1936 if (need_wakeup)
1937 thread_wakeup((event_t)&vm_page_free_wanted);
1938
1939 if (wait_result == THREAD_WAITING)
1940 wait_result = thread_block(THREAD_CONTINUE_NULL);
1941
1942 return(wait_result == THREAD_AWAKENED);
1943 } else {
1944 mutex_unlock(&vm_page_queue_free_lock);
1945 return TRUE;
1946 }
1947}
1948
1949/*
1950 * vm_page_alloc:
1951 *
1952 * Allocate and return a memory cell associated
1953 * with this VM object/offset pair.
1954 *
1955 * Object must be locked.
1956 */
1957
1958vm_page_t
1959vm_page_alloc(
1960 vm_object_t object,
1961 vm_object_offset_t offset)
1962{
1963 register vm_page_t mem;
1964
1965 vm_object_lock_assert_exclusive(object);
1966 mem = vm_page_grab();
1967 if (mem == VM_PAGE_NULL)
1968 return VM_PAGE_NULL;
1969
1970 vm_page_insert(mem, object, offset);
1971
1972 return(mem);
1973}
1974
1975vm_page_t
1976vm_page_alloclo(
1977 vm_object_t object,
1978 vm_object_offset_t offset)
1979{
1980 register vm_page_t mem;
1981
1982 vm_object_lock_assert_exclusive(object);
1983 mem = vm_page_grablo();
1984 if (mem == VM_PAGE_NULL)
1985 return VM_PAGE_NULL;
1986
1987 vm_page_insert(mem, object, offset);
1988
1989 return(mem);
1990}
1991
1992
1993/*
1994 * vm_page_alloc_guard:
1995 *
1996 * Allocate a ficticious page which will be used
1997 * as a guard page. The page will be inserted into
1998 * the object and returned to the caller.
1999 */
2000
2001vm_page_t
2002vm_page_alloc_guard(
2003 vm_object_t object,
2004 vm_object_offset_t offset)
2005{
2006 register vm_page_t mem;
2007
2008 vm_object_lock_assert_exclusive(object);
2009 mem = vm_page_grab_guard();
2010 if (mem == VM_PAGE_NULL)
2011 return VM_PAGE_NULL;
2012
2013 vm_page_insert(mem, object, offset);
2014
2015 return(mem);
2016}
2017
2018
2019counter(unsigned int c_laundry_pages_freed = 0;)
2020
2021boolean_t vm_page_free_verify = TRUE;
2022/*
2023 * vm_page_free:
2024 *
2025 * Returns the given page to the free list,
2026 * disassociating it with any VM object.
2027 *
2028 * Object and page queues must be locked prior to entry.
2029 */
2030void
2031vm_page_free_prepare(
2032 register vm_page_t mem)
2033{
2034 VM_PAGE_CHECK(mem);
2035 assert(!mem->free);
2036 assert(!mem->cleaning);
2037 assert(!mem->pageout);
2038
2039#if DEBUG
2040 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2041 assert(pmap_verify_free(mem->phys_page));
2042 }
2043 if (mem->object)
2044 vm_object_lock_assert_exclusive(mem->object);
2045 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2046
2047 if (mem->free)
2048 panic("vm_page_free: freeing page on free list\n");
2049#endif
2050
2051 if (mem->laundry) {
2052 /*
2053 * We may have to free a page while it's being laundered
2054 * if we lost its pager (due to a forced unmount, for example).
2055 * We need to call vm_pageout_throttle_up() before removing
2056 * the page from its VM object, so that we can find out on
2057 * which pageout queue the page is.
2058 */
2059 vm_pageout_throttle_up(mem);
2060 counter(++c_laundry_pages_freed);
2061 }
2062
2063 if (mem->tabled)
2064 vm_page_remove(mem); /* clears tabled, object, offset */
2065
2066 VM_PAGE_QUEUES_REMOVE(mem); /* clears active/inactive/throttled/speculative */
2067
2068 if (mem->wire_count) {
2069 if (!mem->private && !mem->fictitious)
2070 vm_page_wire_count--;
2071 mem->wire_count = 0;
2072 assert(!mem->gobbled);
2073 } else if (mem->gobbled) {
2074 if (!mem->private && !mem->fictitious)
2075 vm_page_wire_count--;
2076 vm_page_gobble_count--;
2077 }
2078 mem->gobbled = FALSE;
2079
2080 PAGE_WAKEUP(mem); /* clears wanted */
2081
2082 /* Some of these may be unnecessary */
2083 mem->busy = TRUE;
2084 mem->absent = FALSE;
2085 mem->error = FALSE;
2086 mem->dirty = FALSE;
2087 mem->precious = FALSE;
2088 mem->reference = FALSE;
2089 mem->encrypted = FALSE;
2090 mem->encrypted_cleaning = FALSE;
2091 mem->deactivated = FALSE;
2092 mem->pmapped = FALSE;
2093
2094 if (mem->private) {
2095 mem->private = FALSE;
2096 mem->fictitious = TRUE;
2097 mem->phys_page = vm_page_fictitious_addr;
2098 }
2099 if (!mem->fictitious) {
2100 if (mem->zero_fill == TRUE) {
2101 mem->zero_fill = FALSE;
2102 OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
2103 }
2104 vm_page_init(mem, mem->phys_page);
2105 }
2106}
2107
2108void
2109vm_page_free(
2110 vm_page_t mem)
2111{
2112 vm_page_free_prepare(mem);
2113 if (mem->fictitious) {
2114 vm_page_release_fictitious(mem);
2115 } else {
2116 vm_page_release(mem);
2117 }
2118}
2119
2120/*
2121 * Free a list of pages. The list can be up to several hundred pages,
2122 * as blocked up by vm_pageout_scan().
2123 * The big win is not having to take the page q and free list locks once
2124 * per page. We sort the incoming pages into n lists, one for
2125 * each color.
2126 *
2127 * The page queues must be locked, and are kept locked.
2128 */
2129void
2130vm_page_free_list(
2131 vm_page_t mem)
2132{
2133 vm_page_t nxt;
2134 int pg_count = 0;
2135 int color;
2136 int inuse_list_head = -1;
2137
2138 queue_head_t free_list[MAX_COLORS];
2139 int inuse[MAX_COLORS];
2140
2141 for (color = 0; color < (signed) vm_colors; color++) {
2142 queue_init(&free_list[color]);
2143 }
2144
2145#if DEBUG
2146 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2147#endif
2148 while (mem) {
2149#if DEBUG
2150 if (mem->tabled || mem->object)
2151 panic("vm_page_free_list: freeing tabled page\n");
2152 if (mem->inactive || mem->active || mem->throttled || mem->free)
2153 panic("vm_page_free_list: freeing page on list\n");
2154 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2155 assert(pmap_verify_free(mem->phys_page));
2156 }
2157#endif
2158 assert(mem->pageq.prev == NULL);
2159 assert(mem->busy);
2160 assert(!mem->free);
2161 nxt = (vm_page_t)(mem->pageq.next);
2162
2163 if (!mem->fictitious) {
2164 mem->free = TRUE;
2165
2166 color = mem->phys_page & vm_color_mask;
2167 if (queue_empty(&free_list[color])) {
2168 inuse[color] = inuse_list_head;
2169 inuse_list_head = color;
2170 }
2171 queue_enter_first(&free_list[color],
2172 mem,
2173 vm_page_t,
2174 pageq);
2175 pg_count++;
2176 } else {
2177 assert(mem->phys_page == vm_page_fictitious_addr ||
2178 mem->phys_page == vm_page_guard_addr);
2179 vm_page_release_fictitious(mem);
2180 }
2181 mem = nxt;
2182 }
2183 if (pg_count) {
2184 unsigned int avail_free_count;
2185
2186 mutex_lock(&vm_page_queue_free_lock);
2187
2188 color = inuse_list_head;
2189
2190 while( color != -1 ) {
2191 vm_page_t first, last;
2192 vm_page_t first_free;
2193
2194 first = (vm_page_t) queue_first(&free_list[color]);
2195 last = (vm_page_t) queue_last(&free_list[color]);
2196 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2197
2198 if (queue_empty(&vm_page_queue_free[color])) {
2199 queue_last(&vm_page_queue_free[color]) =
2200 (queue_entry_t) last;
2201 } else {
2202 queue_prev(&first_free->pageq) =
2203 (queue_entry_t) last;
2204 }
2205 queue_first(&vm_page_queue_free[color]) =
2206 (queue_entry_t) first;
2207 queue_prev(&first->pageq) =
2208 (queue_entry_t) &vm_page_queue_free[color];
2209 queue_next(&last->pageq) =
2210 (queue_entry_t) first_free;
2211 color = inuse[color];
2212 }
2213
2214 vm_page_free_count += pg_count;
2215 avail_free_count = vm_page_free_count;
2216
2217 while ((vm_page_free_wanted_privileged > 0) && avail_free_count) {
2218 vm_page_free_wanted_privileged--;
2219 avail_free_count--;
2220
2221 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2222 }
2223
2224 if ((vm_page_free_wanted > 0) &&
2225 (avail_free_count >= vm_page_free_reserved)) {
2226 unsigned int available_pages;
2227
2228 if (avail_free_count >= vm_page_free_reserved) {
2229 available_pages = (avail_free_count - vm_page_free_reserved);
2230 } else {
2231 available_pages = 0;
2232 }
2233
2234 if (available_pages >= vm_page_free_wanted) {
2235 vm_page_free_wanted = 0;
2236 thread_wakeup((event_t) &vm_page_free_count);
2237 } else {
2238 while (available_pages--) {
2239 vm_page_free_wanted--;
2240 thread_wakeup_one((event_t) &vm_page_free_count);
2241 }
2242 }
2243 }
2244 mutex_unlock(&vm_page_queue_free_lock);
2245
2246#if CONFIG_EMBEDDED
2247 {
2248 int percent_avail;
2249
2250 /*
2251 * Decide if we need to poke the memorystatus notification thread.
2252 */
2253 percent_avail =
2254 (vm_page_active_count + vm_page_inactive_count +
2255 vm_page_speculative_count + vm_page_free_count +
2256 vm_page_purgeable_count ) * 100 /
2257 atop_64(max_mem);
2258 if (percent_avail >= (kern_memorystatus_level + 5)) {
2259 kern_memorystatus_level = percent_avail;
2260 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2261 }
2262 }
2263#endif
2264 }
2265}
2266
2267
2268/*
2269 * vm_page_wire:
2270 *
2271 * Mark this page as wired down by yet
2272 * another map, removing it from paging queues
2273 * as necessary.
2274 *
2275 * The page's object and the page queues must be locked.
2276 */
2277void
2278vm_page_wire(
2279 register vm_page_t mem)
2280{
2281
2282// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2283
2284 VM_PAGE_CHECK(mem);
2285#if DEBUG
2286 if (mem->object)
2287 vm_object_lock_assert_exclusive(mem->object);
2288 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2289#endif
2290 if (mem->wire_count == 0) {
2291 VM_PAGE_QUEUES_REMOVE(mem);
2292 if (!mem->private && !mem->fictitious && !mem->gobbled)
2293 vm_page_wire_count++;
2294 if (mem->gobbled)
2295 vm_page_gobble_count--;
2296 mem->gobbled = FALSE;
2297 if (mem->zero_fill == TRUE) {
2298 mem->zero_fill = FALSE;
2299 OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
2300 }
2301 /*
2302 * ENCRYPTED SWAP:
2303 * The page could be encrypted, but
2304 * We don't have to decrypt it here
2305 * because we don't guarantee that the
2306 * data is actually valid at this point.
2307 * The page will get decrypted in
2308 * vm_fault_wire() if needed.
2309 */
2310 }
2311 assert(!mem->gobbled);
2312 mem->wire_count++;
2313}
2314
2315/*
2316 * vm_page_gobble:
2317 *
2318 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2319 *
2320 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2321 */
2322void
2323vm_page_gobble(
2324 register vm_page_t mem)
2325{
2326 vm_page_lockspin_queues();
2327 VM_PAGE_CHECK(mem);
2328
2329 assert(!mem->gobbled);
2330 assert(mem->wire_count == 0);
2331
2332 if (!mem->gobbled && mem->wire_count == 0) {
2333 if (!mem->private && !mem->fictitious)
2334 vm_page_wire_count++;
2335 }
2336 vm_page_gobble_count++;
2337 mem->gobbled = TRUE;
2338 vm_page_unlock_queues();
2339}
2340
2341/*
2342 * vm_page_unwire:
2343 *
2344 * Release one wiring of this page, potentially
2345 * enabling it to be paged again.
2346 *
2347 * The page's object and the page queues must be locked.
2348 */
2349void
2350vm_page_unwire(
2351 register vm_page_t mem)
2352{
2353
2354// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2355
2356 VM_PAGE_CHECK(mem);
2357 assert(mem->wire_count > 0);
2358#if DEBUG
2359 if (mem->object)
2360 vm_object_lock_assert_exclusive(mem->object);
2361 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2362#endif
2363 if (--mem->wire_count == 0) {
2364 assert(!mem->private && !mem->fictitious);
2365 vm_page_wire_count--;
2366 assert(!mem->laundry);
2367 assert(mem->object != kernel_object);
2368 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2369 if (!IP_VALID(memory_manager_default) &&
2370 mem->dirty && mem->object->internal &&
2371 (mem->object->purgable == VM_PURGABLE_DENY ||
2372 mem->object->purgable == VM_PURGABLE_NONVOLATILE)) {
2373 queue_enter(&vm_page_queue_throttled, mem, vm_page_t, pageq);
2374 vm_page_throttled_count++;
2375 mem->throttled = TRUE;
2376 } else {
2377 queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
2378 vm_page_active_count++;
2379 mem->active = TRUE;
2380 }
2381 mem->reference = TRUE;
2382 }
2383}
2384
2385
2386/*
2387 * vm_page_deactivate:
2388 *
2389 * Returns the given page to the inactive list,
2390 * indicating that no physical maps have access
2391 * to this page. [Used by the physical mapping system.]
2392 *
2393 * The page queues must be locked.
2394 */
2395void
2396vm_page_deactivate(
2397 register vm_page_t m)
2398{
2399 boolean_t rapid_age = FALSE;
2400
2401 VM_PAGE_CHECK(m);
2402 assert(m->object != kernel_object);
2403 assert(m->phys_page != vm_page_guard_addr);
2404
2405// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2406#if DEBUG
2407 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2408#endif
2409 /*
2410 * This page is no longer very interesting. If it was
2411 * interesting (active or inactive/referenced), then we
2412 * clear the reference bit and (re)enter it in the
2413 * inactive queue. Note wired pages should not have
2414 * their reference bit cleared.
2415 */
2416 if (m->gobbled) { /* can this happen? */
2417 assert(m->wire_count == 0);
2418
2419 if (!m->private && !m->fictitious)
2420 vm_page_wire_count--;
2421 vm_page_gobble_count--;
2422 m->gobbled = FALSE;
2423 }
2424 if (m->private || (m->wire_count != 0))
2425 return;
2426
2427 if (m->active && m->deactivated == TRUE) {
2428 if (!pmap_is_referenced(m->phys_page))
2429 rapid_age = TRUE;
2430 }
2431 if (rapid_age == FALSE && !m->fictitious && !m->absent)
2432 pmap_clear_reference(m->phys_page);
2433
2434 m->reference = FALSE;
2435 m->deactivated = FALSE;
2436 m->no_cache = FALSE;
2437
2438 if (!m->inactive) {
2439 VM_PAGE_QUEUES_REMOVE(m);
2440
2441 assert(!m->laundry);
2442 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2443
2444 if (!IP_VALID(memory_manager_default) &&
2445 m->dirty && m->object->internal &&
2446 (m->object->purgable == VM_PURGABLE_DENY ||
2447 m->object->purgable == VM_PURGABLE_NONVOLATILE)) {
2448 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2449 m->throttled = TRUE;
2450 vm_page_throttled_count++;
2451 } else {
2452 if (rapid_age == TRUE ||
2453 (!m->fictitious && m->object->named && m->object->ref_count == 1)) {
2454 vm_page_speculate(m, FALSE);
2455 vm_page_speculative_recreated++;
2456 return;
2457 } else {
2458 if (m->zero_fill) {
2459 queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2460 vm_zf_queue_count++;
2461 } else {
2462 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2463 }
2464 }
2465 m->inactive = TRUE;
2466 if (!m->fictitious) {
2467 vm_page_inactive_count++;
2468 token_new_pagecount++;
2469 }
2470 }
2471 }
2472}
2473
2474/*
2475 * vm_page_activate:
2476 *
2477 * Put the specified page on the active list (if appropriate).
2478 *
2479 * The page queues must be locked.
2480 */
2481
2482void
2483vm_page_activate(
2484 register vm_page_t m)
2485{
2486 VM_PAGE_CHECK(m);
2487#ifdef FIXME_4778297
2488 assert(m->object != kernel_object);
2489#endif
2490 assert(m->phys_page != vm_page_guard_addr);
2491#if DEBUG
2492 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2493#endif
2494 if (m->gobbled) {
2495 assert(m->wire_count == 0);
2496 if (!m->private && !m->fictitious)
2497 vm_page_wire_count--;
2498 vm_page_gobble_count--;
2499 m->gobbled = FALSE;
2500 }
2501 if (m->private)
2502 return;
2503
2504#if DEBUG
2505 if (m->active)
2506 panic("vm_page_activate: already active");
2507#endif
2508
2509 if (m->speculative) {
2510 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2511 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2512 }
2513
2514 VM_PAGE_QUEUES_REMOVE(m);
2515
2516 if (m->wire_count == 0) {
2517 assert(!m->laundry);
2518 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2519 if (!IP_VALID(memory_manager_default) &&
2520 !m->fictitious && m->dirty && m->object->internal &&
2521 (m->object->purgable == VM_PURGABLE_DENY ||
2522 m->object->purgable == VM_PURGABLE_NONVOLATILE)) {
2523 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2524 m->throttled = TRUE;
2525 vm_page_throttled_count++;
2526 } else {
2527 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2528 m->active = TRUE;
2529 if (!m->fictitious)
2530 vm_page_active_count++;
2531 }
2532 m->reference = TRUE;
2533 m->no_cache = FALSE;
2534 }
2535}
2536
2537
2538/*
2539 * vm_page_speculate:
2540 *
2541 * Put the specified page on the speculative list (if appropriate).
2542 *
2543 * The page queues must be locked.
2544 */
2545void
2546vm_page_speculate(
2547 vm_page_t m,
2548 boolean_t new)
2549{
2550 struct vm_speculative_age_q *aq;
2551
2552 VM_PAGE_CHECK(m);
2553 assert(m->object != kernel_object);
2554 assert(!m->speculative && !m->active && !m->inactive && !m->throttled);
2555 assert(m->phys_page != vm_page_guard_addr);
2556 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2557#if DEBUG
2558 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2559#endif
2560 if (m->wire_count == 0) {
2561 mach_timespec_t ts;
2562
2563 clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
2564
2565 if (vm_page_speculative_count == 0) {
2566
2567 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2568 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2569
2570 aq = &vm_page_queue_speculative[speculative_age_index];
2571
2572 /*
2573 * set the timer to begin a new group
2574 */
2575 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2576 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2577
2578 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2579 } else {
2580 aq = &vm_page_queue_speculative[speculative_age_index];
2581
2582 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2583
2584 speculative_age_index++;
2585
2586 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2587 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2588 if (speculative_age_index == speculative_steal_index) {
2589 speculative_steal_index = speculative_age_index + 1;
2590
2591 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2592 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2593 }
2594 aq = &vm_page_queue_speculative[speculative_age_index];
2595
2596 if (!queue_empty(&aq->age_q))
2597 vm_page_speculate_ageit(aq);
2598
2599 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2600 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2601
2602 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2603 }
2604 }
2605 enqueue_tail(&aq->age_q, &m->pageq);
2606 m->speculative = TRUE;
2607 vm_page_speculative_count++;
2608
2609 if (new == TRUE) {
2610 m->object->pages_created++;
2611 vm_page_speculative_created++;
2612 }
2613 }
2614}
2615
2616
2617/*
2618 * move pages from the specified aging bin to
2619 * the speculative bin that pageout_scan claims from
2620 *
2621 * The page queues must be locked.
2622 */
2623void
2624vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2625{
2626 struct vm_speculative_age_q *sq;
2627 vm_page_t t;
2628
2629 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2630
2631 if (queue_empty(&sq->age_q)) {
2632 sq->age_q.next = aq->age_q.next;
2633 sq->age_q.prev = aq->age_q.prev;
2634
2635 t = (vm_page_t)sq->age_q.next;
2636 t->pageq.prev = &sq->age_q;
2637
2638 t = (vm_page_t)sq->age_q.prev;
2639 t->pageq.next = &sq->age_q;
2640 } else {
2641 t = (vm_page_t)sq->age_q.prev;
2642 t->pageq.next = aq->age_q.next;
2643
2644 t = (vm_page_t)aq->age_q.next;
2645 t->pageq.prev = sq->age_q.prev;
2646
2647 t = (vm_page_t)aq->age_q.prev;
2648 t->pageq.next = &sq->age_q;
2649
2650 sq->age_q.prev = aq->age_q.prev;
2651 }
2652 queue_init(&aq->age_q);
2653}
2654
2655
2656void
2657vm_page_lru(
2658 vm_page_t m)
2659{
2660 VM_PAGE_CHECK(m);
2661 assert(m->object != kernel_object);
2662 assert(m->phys_page != vm_page_guard_addr);
2663
2664#if DEBUG
2665 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2666#endif
2667 if (m->active || m->reference)
2668 return;
2669
2670 if (m->private || (m->wire_count != 0))
2671 return;
2672
2673 m->no_cache = FALSE;
2674
2675 VM_PAGE_QUEUES_REMOVE(m);
2676
2677 assert(!m->laundry);
2678 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2679
2680 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2681 m->inactive = TRUE;
2682
2683 vm_page_inactive_count++;
2684 token_new_pagecount++;
2685}
2686
2687
2688/*
2689 * vm_page_part_zero_fill:
2690 *
2691 * Zero-fill a part of the page.
2692 */
2693void
2694vm_page_part_zero_fill(
2695 vm_page_t m,
2696 vm_offset_t m_pa,
2697 vm_size_t len)
2698{
2699 vm_page_t tmp;
2700
2701 VM_PAGE_CHECK(m);
2702#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
2703 pmap_zero_part_page(m->phys_page, m_pa, len);
2704#else
2705 while (1) {
2706 tmp = vm_page_grab();
2707 if (tmp == VM_PAGE_NULL) {
2708 vm_page_wait(THREAD_UNINT);
2709 continue;
2710 }
2711 break;
2712 }
2713 vm_page_zero_fill(tmp);
2714 if(m_pa != 0) {
2715 vm_page_part_copy(m, 0, tmp, 0, m_pa);
2716 }
2717 if((m_pa + len) < PAGE_SIZE) {
2718 vm_page_part_copy(m, m_pa + len, tmp,
2719 m_pa + len, PAGE_SIZE - (m_pa + len));
2720 }
2721 vm_page_copy(tmp,m);
2722 vm_page_lock_queues();
2723 vm_page_free(tmp);
2724 vm_page_unlock_queues();
2725#endif
2726
2727}
2728
2729/*
2730 * vm_page_zero_fill:
2731 *
2732 * Zero-fill the specified page.
2733 */
2734void
2735vm_page_zero_fill(
2736 vm_page_t m)
2737{
2738 XPR(XPR_VM_PAGE,
2739 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
2740 (integer_t)m->object, (integer_t)m->offset, (integer_t)m, 0,0);
2741
2742 VM_PAGE_CHECK(m);
2743
2744// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
2745 pmap_zero_page(m->phys_page);
2746}
2747
2748/*
2749 * vm_page_part_copy:
2750 *
2751 * copy part of one page to another
2752 */
2753
2754void
2755vm_page_part_copy(
2756 vm_page_t src_m,
2757 vm_offset_t src_pa,
2758 vm_page_t dst_m,
2759 vm_offset_t dst_pa,
2760 vm_size_t len)
2761{
2762 VM_PAGE_CHECK(src_m);
2763 VM_PAGE_CHECK(dst_m);
2764
2765 pmap_copy_part_page(src_m->phys_page, src_pa,
2766 dst_m->phys_page, dst_pa, len);
2767}
2768
2769/*
2770 * vm_page_copy:
2771 *
2772 * Copy one page to another
2773 *
2774 * ENCRYPTED SWAP:
2775 * The source page should not be encrypted. The caller should
2776 * make sure the page is decrypted first, if necessary.
2777 */
2778
2779int vm_page_copy_cs_validations = 0;
2780int vm_page_copy_cs_tainted = 0;
2781
2782void
2783vm_page_copy(
2784 vm_page_t src_m,
2785 vm_page_t dest_m)
2786{
2787 XPR(XPR_VM_PAGE,
2788 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
2789 (integer_t)src_m->object, src_m->offset,
2790 (integer_t)dest_m->object, dest_m->offset,
2791 0);
2792
2793 VM_PAGE_CHECK(src_m);
2794 VM_PAGE_CHECK(dest_m);
2795
2796 /*
2797 * ENCRYPTED SWAP:
2798 * The source page should not be encrypted at this point.
2799 * The destination page will therefore not contain encrypted
2800 * data after the copy.
2801 */
2802 if (src_m->encrypted) {
2803 panic("vm_page_copy: source page %p is encrypted\n", src_m);
2804 }
2805 dest_m->encrypted = FALSE;
2806
2807 if (src_m->object != VM_OBJECT_NULL &&
2808 src_m->object->code_signed &&
2809 !src_m->cs_validated) {
2810 /*
2811 * We're copying a not-yet-validated page from a
2812 * code-signed object.
2813 * Whoever ends up mapping the copy page might care about
2814 * the original page's integrity, so let's validate the
2815 * source page now.
2816 */
2817 vm_page_copy_cs_validations++;
2818 vm_page_validate_cs(src_m);
2819 }
2820 /*
2821 * Propagate the code-signing bits to the copy page.
2822 */
2823 dest_m->cs_validated = src_m->cs_validated;
2824 dest_m->cs_tainted = src_m->cs_tainted;
2825 if (dest_m->cs_tainted) {
2826 assert(dest_m->cs_validated);
2827 vm_page_copy_cs_tainted++;
2828 }
2829
2830 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
2831}
2832
2833#if MACH_ASSERT
2834/*
2835 * Check that the list of pages is ordered by
2836 * ascending physical address and has no holes.
2837 */
2838static int
2839vm_page_verify_contiguous(
2840 vm_page_t pages,
2841 unsigned int npages)
2842{
2843 register vm_page_t m;
2844 unsigned int page_count;
2845 vm_offset_t prev_addr;
2846
2847 prev_addr = pages->phys_page;
2848 page_count = 1;
2849 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
2850 if (m->phys_page != prev_addr + 1) {
2851 printf("m %p prev_addr 0x%x, current addr 0x%x\n",
2852 m, prev_addr, m->phys_page);
2853 printf("pages %p page_count %d\n", pages, page_count);
2854 panic("vm_page_verify_contiguous: not contiguous!");
2855 }
2856 prev_addr = m->phys_page;
2857 ++page_count;
2858 }
2859 if (page_count != npages) {
2860 printf("pages %p actual count 0x%x but requested 0x%x\n",
2861 pages, page_count, npages);
2862 panic("vm_page_verify_contiguous: count error");
2863 }
2864 return 1;
2865}
2866#endif /* MACH_ASSERT */
2867
2868
2869#if MACH_ASSERT
2870/*
2871 * Check the free lists for proper length etc.
2872 */
2873static void
2874vm_page_verify_free_lists( void )
2875{
2876 unsigned int color, npages;
2877 vm_page_t m;
2878 vm_page_t prev_m;
2879
2880 npages = 0;
2881
2882 mutex_lock(&vm_page_queue_free_lock);
2883
2884 for( color = 0; color < vm_colors; color++ ) {
2885 prev_m = (vm_page_t) &vm_page_queue_free[color];
2886 queue_iterate(&vm_page_queue_free[color],
2887 m,
2888 vm_page_t,
2889 pageq) {
2890 if ((vm_page_t) m->pageq.prev != prev_m)
2891 panic("vm_page_verify_free_lists: corrupted prev ptr");
2892 if ( ! m->free )
2893 panic("vm_page_verify_free_lists: not free");
2894 if ( ! m->busy )
2895 panic("vm_page_verify_free_lists: not busy");
2896 if ( (m->phys_page & vm_color_mask) != color)
2897 panic("vm_page_verify_free_lists: wrong color");
2898 ++npages;
2899 prev_m = m;
2900 }
2901 }
2902 if (npages != vm_page_free_count)
2903 panic("vm_page_verify_free_lists: npages %u free_count %d",
2904 npages, vm_page_free_count);
2905
2906 mutex_unlock(&vm_page_queue_free_lock);
2907}
2908#endif /* MACH_ASSERT */
2909
2910
2911
2912/*
2913 * CONTIGUOUS PAGE ALLOCATION
2914 * Additional levels of effort:
2915 * + consider pages that are currently 'pmapped'
2916 * this could be expensive since we'd have
2917 * to ask the pmap layer about there state
2918 * + consider dirty pages
2919 * either clean them or
2920 * copy them to other locations...
2921 *
2922 * Find a region large enough to contain at least n pages
2923 * of contiguous physical memory.
2924 *
2925 * This is done by traversing the vm_page_t array in a linear fashion
2926 * we assume that the vm_page_t array has the avaiable physical pages in an
2927 * ordered, ascending list... this is currently true of all our implementations
2928 * and must remain so... there can be 'holes' in the array... we also can
2929 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
2930 * which use to happen via 'vm_page_convert'... that function was no longer
2931 * being called and was removed...
2932 *
2933 * The basic flow consists of stabilizing some of the interesting state of
2934 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
2935 * sweep at the beginning of the array looking for pages that meet our criterea
2936 * for a 'stealable' page... currently we are pretty conservative... if the page
2937 * meets this criterea and is physically contiguous to the previous page in the 'run'
2938 * we keep developing it. If we hit a page that doesn't fit, we reset our state
2939 * and start to develop a new run... if at this point we've already considered
2940 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
2941 * and mutex_pause (which will yield the processor), to keep the latency low w/r
2942 * to other threads trying to acquire free pages (or move pages from q to q),
2943 * and then continue from the spot we left off... we only make 1 pass through the
2944 * array. Once we have a 'run' that is long enough, we'll go into the loop which
2945 * which steals the pages from the queues they're currently on... pages on the free
2946 * queue can be stolen directly... pages that are on any of the other queues
2947 * must be removed from the object they are tabled on... this requires taking the
2948 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
2949 * or if the state of the page behind the vm_object lock is no longer viable, we'll
2950 * dump the pages we've currently stolen back to the free list, and pick up our
2951 * scan from the point where we aborted the 'current' run.
2952 *
2953 *
2954 * Requirements:
2955 * - neither vm_page_queue nor vm_free_list lock can be held on entry
2956 *
2957 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
2958 *
2959 * Algorithm:
2960 */
2961
2962#define MAX_CONSIDERED_BEFORE_YIELD 1000
2963
2964
2965#define RESET_STATE_OF_RUN() \
2966 MACRO_BEGIN \
2967 prevcontaddr = -2; \
2968 free_considered = 0; \
2969 substitute_needed = 0; \
2970 npages = 0; \
2971 MACRO_END
2972
2973
2974static vm_page_t
2975vm_page_find_contiguous(
2976 unsigned int contig_pages,
2977 ppnum_t max_pnum,
2978 boolean_t wire)
2979{
2980 vm_page_t m = NULL;
2981 ppnum_t prevcontaddr;
2982 unsigned int npages, considered;
2983 unsigned int page_idx, start_idx;
2984 int free_considered, free_available;
2985 int substitute_needed;
2986#if MACH_ASSERT
2987 uint32_t tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
2988 int yielded = 0;
2989 int dumped_run = 0;
2990 int stolen_pages = 0;
2991#endif
2992
2993 if (contig_pages == 0)
2994 return VM_PAGE_NULL;
2995
2996#if MACH_ASSERT
2997 vm_page_verify_free_lists();
2998
2999 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3000#endif
3001 vm_page_lock_queues();
3002 mutex_lock(&vm_page_queue_free_lock);
3003
3004 RESET_STATE_OF_RUN();
3005
3006 considered = 0;
3007 free_available = vm_page_free_count - vm_page_free_reserved;
3008
3009 for (page_idx = 0, start_idx = 0;
3010 npages < contig_pages && page_idx < vm_pages_count;
3011 page_idx++) {
3012retry:
3013 m = &vm_pages[page_idx];
3014
3015 if (max_pnum && m->phys_page > max_pnum) {
3016 /* no more low pages... */
3017 break;
3018 }
3019 if (m->phys_page <= vm_lopage_poolend &&
3020 m->phys_page >= vm_lopage_poolstart) {
3021 /*
3022 * don't want to take pages from our
3023 * reserved pool of low memory
3024 * so don't consider it which
3025 * means starting a new run
3026 */
3027 RESET_STATE_OF_RUN();
3028
3029 } else if (m->wire_count || m->gobbled ||
3030 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3031 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3032 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending) {
3033 /*
3034 * page is in a transient state
3035 * or a state we don't want to deal
3036 * with, so don't consider it which
3037 * means starting a new run
3038 */
3039 RESET_STATE_OF_RUN();
3040
3041 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3042 /*
3043 * page needs to be on one of our queues
3044 * in order for it to be stable behind the
3045 * locks we hold at this point...
3046 * if not, don't consider it which
3047 * means starting a new run
3048 */
3049 RESET_STATE_OF_RUN();
3050
3051 } else if (!m->free && (!m->tabled || m->busy)) {
3052 /*
3053 * pages on the free list are always 'busy'
3054 * so we couldn't test for 'busy' in the check
3055 * for the transient states... pages that are
3056 * 'free' are never 'tabled', so we also couldn't
3057 * test for 'tabled'. So we check here to make
3058 * sure that a non-free page is not busy and is
3059 * tabled on an object...
3060 * if not, don't consider it which
3061 * means starting a new run
3062 */
3063 RESET_STATE_OF_RUN();
3064
3065 } else {
3066 if (m->phys_page != prevcontaddr + 1) {
3067 npages = 1;
3068 start_idx = page_idx;
3069 } else {
3070 npages++;
3071 }
3072 prevcontaddr = m->phys_page;
3073
3074 if (m->pmapped || m->dirty)
3075 substitute_needed++;
3076
3077 if (m->free) {
3078 free_considered++;
3079 }
3080 if ((free_considered + substitute_needed) > free_available) {
3081 /*
3082 * if we let this run continue
3083 * we will end up dropping the vm_page_free_count
3084 * below the reserve limit... we need to abort
3085 * this run, but we can at least re-consider this
3086 * page... thus the jump back to 'retry'
3087 */
3088 RESET_STATE_OF_RUN();
3089
3090 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3091 considered++;
3092 goto retry;
3093 }
3094 /*
3095 * free_available == 0
3096 * so can't consider any free pages... if
3097 * we went to retry in this case, we'd
3098 * get stuck looking at the same page
3099 * w/o making any forward progress
3100 * we also want to take this path if we've already
3101 * reached our limit that controls the lock latency
3102 */
3103 }
3104 }
3105 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3106
3107 mutex_unlock(&vm_page_queue_free_lock);
3108 vm_page_unlock_queues();
3109
3110 mutex_pause(0);
3111
3112 vm_page_lock_queues();
3113 mutex_lock(&vm_page_queue_free_lock);
3114
3115 RESET_STATE_OF_RUN();
3116 /*
3117 * reset our free page limit since we
3118 * dropped the lock protecting the vm_page_free_queue
3119 */
3120 free_available = vm_page_free_count - vm_page_free_reserved;
3121 considered = 0;
3122#if MACH_ASSERT
3123 yielded++;
3124#endif
3125 goto retry;
3126 }
3127 considered++;
3128 }
3129 m = VM_PAGE_NULL;
3130
3131 if (npages != contig_pages)
3132 mutex_unlock(&vm_page_queue_free_lock);
3133 else {
3134 vm_page_t m1;
3135 vm_page_t m2;
3136 unsigned int cur_idx;
3137 unsigned int tmp_start_idx;
3138 vm_object_t locked_object = VM_OBJECT_NULL;
3139 boolean_t abort_run = FALSE;
3140
3141 tmp_start_idx = start_idx;
3142
3143 /*
3144 * first pass through to pull the free pages
3145 * off of the free queue so that in case we
3146 * need substitute pages, we won't grab any
3147 * of the free pages in the run... we'll clear
3148 * the 'free' bit in the 2nd pass, and even in
3149 * an abort_run case, we'll collect all of the
3150 * free pages in this run and return them to the free list
3151 */
3152 while (start_idx < page_idx) {
3153
3154 m1 = &vm_pages[start_idx++];
3155
3156 if (m1->free) {
3157 unsigned int color;
3158
3159 color = m1->phys_page & vm_color_mask;
3160 queue_remove(&vm_page_queue_free[color],
3161 m1,
3162 vm_page_t,
3163 pageq);
3164
3165 vm_page_free_count--;
3166 }
3167 }
3168 /*
3169 * adjust global freelist counts
3170 */
3171 if (vm_page_free_count < vm_page_free_count_minimum)
3172 vm_page_free_count_minimum = vm_page_free_count;
3173
3174 /*
3175 * we can drop the free queue lock at this point since
3176 * we've pulled any 'free' candidates off of the list
3177 * we need it dropped so that we can do a vm_page_grab
3178 * when substituing for pmapped/dirty pages
3179 */
3180 mutex_unlock(&vm_page_queue_free_lock);
3181
3182 start_idx = tmp_start_idx;
3183 cur_idx = page_idx - 1;
3184
3185 while (start_idx++ < page_idx) {
3186 /*
3187 * must go through the list from back to front
3188 * so that the page list is created in the
3189 * correct order - low -> high phys addresses
3190 */
3191 m1 = &vm_pages[cur_idx--];
3192
3193 if (m1->free) {
3194 /*
3195 * pages have already been removed from
3196 * the free list in the 1st pass
3197 */
3198 assert(m1->free);
3199 assert(m1->busy);
3200 assert(!m1->wanted);
3201 assert(!m1->laundry);
3202 m1->free = FALSE;
3203
3204 } else {
3205 vm_object_t object;
3206
3207 if (abort_run == TRUE)
3208 continue;
3209
3210 object = m1->object;
3211
3212 if (object != locked_object) {
3213 if (locked_object) {
3214 vm_object_unlock(locked_object);
3215 locked_object = VM_OBJECT_NULL;
3216 }
3217 if (vm_object_lock_try(object))
3218 locked_object = object;
3219 }
3220 if (locked_object == VM_OBJECT_NULL ||
3221 (m1->wire_count || m1->gobbled ||
3222 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3223 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3224 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3225
3226 if (locked_object) {
3227 vm_object_unlock(locked_object);
3228 locked_object = VM_OBJECT_NULL;
3229 }
3230 tmp_start_idx = cur_idx;
3231 abort_run = TRUE;
3232 continue;
3233 }
3234 if (m1->pmapped || m1->dirty) {
3235 int refmod;
3236 vm_object_offset_t offset;
3237
3238 m2 = vm_page_grab();
3239
3240 if (m2 == VM_PAGE_NULL) {
3241 if (locked_object) {
3242 vm_object_unlock(locked_object);
3243 locked_object = VM_OBJECT_NULL;
3244 }
3245 tmp_start_idx = cur_idx;
3246 abort_run = TRUE;
3247 continue;
3248 }
3249 if (m1->pmapped)
3250 refmod = pmap_disconnect(m1->phys_page);
3251 else
3252 refmod = 0;
3253 vm_page_copy(m1, m2);
3254
3255 m2->reference = m1->reference;
3256 m2->dirty = m1->dirty;
3257
3258 if (refmod & VM_MEM_REFERENCED)
3259 m2->reference = TRUE;
3260 if (refmod & VM_MEM_MODIFIED)
3261 m2->dirty = TRUE;
3262 offset = m1->offset;
3263
3264 /*
3265 * completely cleans up the state
3266 * of the page so that it is ready
3267 * to be put onto the free list, or
3268 * for this purpose it looks like it
3269 * just came off of the free list
3270 */
3271 vm_page_free_prepare(m1);
3272
3273 /*
3274 * make sure we clear the ref/mod state
3275 * from the pmap layer... else we risk
3276 * inheriting state from the last time
3277 * this page was used...
3278 */
3279 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3280 /*
3281 * now put the substitute page on the object
3282 */
3283 vm_page_insert_internal(m2, locked_object, offset, TRUE);
3284
3285 if (m2->reference)
3286 vm_page_activate(m2);
3287 else
3288 vm_page_deactivate(m2);
3289
3290 PAGE_WAKEUP_DONE(m2);
3291
3292 } else {
3293 /*
3294 * completely cleans up the state
3295 * of the page so that it is ready
3296 * to be put onto the free list, or
3297 * for this purpose it looks like it
3298 * just came off of the free list
3299 */
3300 vm_page_free_prepare(m1);
3301 }
3302#if MACH_ASSERT
3303 stolen_pages++;
3304#endif
3305 }
3306 m1->pageq.next = (queue_entry_t) m;
3307 m1->pageq.prev = NULL;
3308 m = m1;
3309 }
3310 if (locked_object) {
3311 vm_object_unlock(locked_object);
3312 locked_object = VM_OBJECT_NULL;
3313 }
3314
3315 if (abort_run == TRUE) {
3316 if (m != VM_PAGE_NULL) {
3317 vm_page_free_list(m);
3318 }
3319#if MACH_ASSERT
3320 dumped_run++;
3321#endif
3322 /*
3323 * want the index of the last
3324 * page in this run that was
3325 * successfully 'stolen', so back
3326 * it up 1 for the auto-decrement on use
3327 * and 1 more to bump back over this page
3328 */
3329 page_idx = tmp_start_idx + 2;
3330
3331 if (page_idx >= vm_pages_count)
3332 goto done_scanning;
3333
3334 mutex_lock(&vm_page_queue_free_lock);
3335
3336 RESET_STATE_OF_RUN();
3337
3338 /*
3339 * reset our free page limit since we
3340 * dropped the lock protecting the vm_page_free_queue
3341 */
3342 free_available = vm_page_free_count - vm_page_free_reserved;
3343
3344 goto retry;
3345 }
3346
3347 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
3348
3349 if (wire == TRUE)
3350 m1->wire_count++;
3351 else
3352 m1->gobbled = TRUE;
3353 }
3354 if (wire == FALSE)
3355 vm_page_gobble_count += npages;
3356
3357 /*
3358 * gobbled pages are also counted as wired pages
3359 */
3360 vm_page_wire_count += npages;
3361
3362 assert(vm_page_verify_contiguous(m, npages));
3363 }
3364done_scanning:
3365 vm_page_unlock_queues();
3366
3367#if MACH_ASSERT
3368 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
3369
3370 tv_end_sec -= tv_start_sec;
3371 if (tv_end_usec < tv_start_usec) {
3372 tv_end_sec--;
3373 tv_end_usec += 1000000;
3374 }
3375 tv_end_usec -= tv_start_usec;
3376 if (tv_end_usec >= 1000000) {
3377 tv_end_sec++;
3378 tv_end_sec -= 1000000;
3379 }
3380 printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
3381 contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
3382
3383 vm_page_verify_free_lists();
3384#endif
3385 return m;
3386}
3387
3388/*
3389 * Allocate a list of contiguous, wired pages.
3390 */
3391kern_return_t
3392cpm_allocate(
3393 vm_size_t size,
3394 vm_page_t *list,
3395 ppnum_t max_pnum,
3396 boolean_t wire)
3397{
3398 vm_page_t pages;
3399 unsigned int npages;
3400
3401 if (size % page_size != 0)
3402 return KERN_INVALID_ARGUMENT;
3403
3404 npages = size / page_size;
3405
3406 /*
3407 * Obtain a pointer to a subset of the free
3408 * list large enough to satisfy the request;
3409 * the region will be physically contiguous.
3410 */
3411 pages = vm_page_find_contiguous(npages, max_pnum, wire);
3412
3413 if (pages == VM_PAGE_NULL)
3414 return KERN_NO_SPACE;
3415 /*
3416 * determine need for wakeups
3417 */
3418 if ((vm_page_free_count < vm_page_free_min) ||
3419 ((vm_page_free_count < vm_page_free_target) &&
3420 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
3421 thread_wakeup((event_t) &vm_page_free_wanted);
3422
3423#if CONFIG_EMBEDDED
3424 {
3425 int percent_avail;
3426
3427 /*
3428 * Decide if we need to poke the memorystatus notification thread.
3429 */
3430 percent_avail =
3431 (vm_page_active_count + vm_page_inactive_count +
3432 vm_page_speculative_count + vm_page_free_count +
3433 vm_page_purgeable_count ) * 100 /
3434 atop_64(max_mem);
3435 if (percent_avail <= (kern_memorystatus_level - 5)) {
3436 kern_memorystatus_level = percent_avail;
3437 thread_wakeup((event_t)&kern_memorystatus_wakeup);
3438 }
3439 }
3440#endif
3441 /*
3442 * The CPM pages should now be available and
3443 * ordered by ascending physical address.
3444 */
3445 assert(vm_page_verify_contiguous(pages, npages));
3446
3447 *list = pages;
3448 return KERN_SUCCESS;
3449}
3450
3451
3452#include <mach_vm_debug.h>
3453#if MACH_VM_DEBUG
3454
3455#include <mach_debug/hash_info.h>
3456#include <vm/vm_debug.h>
3457
3458/*
3459 * Routine: vm_page_info
3460 * Purpose:
3461 * Return information about the global VP table.
3462 * Fills the buffer with as much information as possible
3463 * and returns the desired size of the buffer.
3464 * Conditions:
3465 * Nothing locked. The caller should provide
3466 * possibly-pageable memory.
3467 */
3468
3469unsigned int
3470vm_page_info(
3471 hash_info_bucket_t *info,
3472 unsigned int count)
3473{
3474 unsigned int i;
3475
3476 if (vm_page_bucket_count < count)
3477 count = vm_page_bucket_count;
3478
3479 for (i = 0; i < count; i++) {
3480 vm_page_bucket_t *bucket = &vm_page_buckets[i];
3481 unsigned int bucket_count = 0;
3482 vm_page_t m;
3483
3484 simple_lock(&vm_page_bucket_lock);
3485 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
3486 bucket_count++;
3487 simple_unlock(&vm_page_bucket_lock);
3488
3489 /* don't touch pageable memory while holding locks */
3490 info[i].hib_count = bucket_count;
3491 }
3492
3493 return vm_page_bucket_count;
3494}
3495#endif /* MACH_VM_DEBUG */
3496
3497#include <mach_kdb.h>
3498#if MACH_KDB
3499
3500#include <ddb/db_output.h>
3501#include <vm/vm_print.h>
3502#define printf kdbprintf
3503
3504/*
3505 * Routine: vm_page_print [exported]
3506 */
3507void
3508vm_page_print(
3509 db_addr_t db_addr)
3510{
3511 vm_page_t p;
3512
3513 p = (vm_page_t) (long) db_addr;
3514
3515 iprintf("page 0x%x\n", p);
3516
3517 db_indent += 2;
3518
3519 iprintf("object=0x%x", p->object);
3520 printf(", offset=0x%x", p->offset);
3521 printf(", wire_count=%d", p->wire_count);
3522
3523 iprintf("%sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
3524 (p->inactive ? "" : "!"),
3525 (p->active ? "" : "!"),
3526 (p->throttled ? "" : "!"),
3527 (p->gobbled ? "" : "!"),
3528 (p->laundry ? "" : "!"),
3529 (p->free ? "" : "!"),
3530 (p->reference ? "" : "!"),
3531 (p->encrypted ? "" : "!"));
3532 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
3533 (p->busy ? "" : "!"),
3534 (p->wanted ? "" : "!"),
3535 (p->tabled ? "" : "!"),
3536 (p->fictitious ? "" : "!"),
3537 (p->private ? "" : "!"),
3538 (p->precious ? "" : "!"));
3539 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
3540 (p->absent ? "" : "!"),
3541 (p->error ? "" : "!"),
3542 (p->dirty ? "" : "!"),
3543 (p->cleaning ? "" : "!"),
3544 (p->pageout ? "" : "!"),
3545 (p->clustered ? "" : "!"));
3546 iprintf("%soverwriting, %srestart, %sunusual\n",
3547 (p->overwriting ? "" : "!"),
3548 (p->restart ? "" : "!"),
3549 (p->unusual ? "" : "!"));
3550
3551 iprintf("phys_page=0x%x", p->phys_page);
3552
3553 db_indent -= 2;
3554}
3555#endif /* MACH_KDB */