2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
29 * All Rights Reserved.
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
41 * Carnegie Mellon requests users of this software to return to
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Resident memory management module.
62 #include <mach/clock_types.h>
63 #include <mach/vm_prot.h>
64 #include <mach/vm_statistics.h>
65 #include <kern/counters.h>
66 #include <kern/sched_prim.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/zalloc.h>
72 #include <vm/vm_init.h>
73 #include <vm/vm_map.h>
74 #include <vm/vm_page.h>
75 #include <vm/vm_pageout.h>
76 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
77 #include <kern/misc_protos.h>
78 #include <zone_debug.h>
80 #include <ppc/mappings.h> /* (BRINGUP) */
81 #include <pexpert/pexpert.h> /* (BRINGUP) */
83 #include <vm/vm_protos.h>
85 /* Variables used to indicate the relative age of pages in the
89 unsigned int vm_page_ticket_roll
= 0;
90 unsigned int vm_page_ticket
= 0;
92 * Associated with page of user-allocatable memory is a
97 * These variables record the values returned by vm_page_bootstrap,
98 * for debugging purposes. The implementation of pmap_steal_memory
99 * and pmap_startup here also uses them internally.
102 vm_offset_t virtual_space_start
;
103 vm_offset_t virtual_space_end
;
107 * The vm_page_lookup() routine, which provides for fast
108 * (virtual memory object, offset) to page lookup, employs
109 * the following hash table. The vm_page_{insert,remove}
110 * routines install and remove associations in the table.
111 * [This table is often called the virtual-to-physical,
116 #if MACH_PAGE_HASH_STATS
117 int cur_count
; /* current count */
118 int hi_count
; /* high water mark */
119 #endif /* MACH_PAGE_HASH_STATS */
122 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
123 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
124 unsigned int vm_page_hash_mask
; /* Mask for hash function */
125 unsigned int vm_page_hash_shift
; /* Shift for hash function */
126 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
127 decl_simple_lock_data(,vm_page_bucket_lock
)
130 vm_page_lookup_nohint(vm_object_t object
, vm_object_offset_t offset
);
133 #if MACH_PAGE_HASH_STATS
134 /* This routine is only for debug. It is intended to be called by
135 * hand by a developer using a kernel debugger. This routine prints
136 * out vm_page_hash table statistics to the kernel debug console.
146 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
147 if (vm_page_buckets
[i
].hi_count
) {
149 highsum
+= vm_page_buckets
[i
].hi_count
;
150 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
151 maxdepth
= vm_page_buckets
[i
].hi_count
;
154 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
155 printf("Number used buckets: %d = %d%%\n",
156 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
157 printf("Number unused buckets: %d = %d%%\n",
158 vm_page_bucket_count
- numbuckets
,
159 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
160 printf("Sum of bucket max depth: %d\n", highsum
);
161 printf("Average bucket depth: %d.%2d\n",
162 highsum
/vm_page_bucket_count
,
163 highsum%vm_page_bucket_count
);
164 printf("Maximum bucket depth: %d\n", maxdepth
);
166 #endif /* MACH_PAGE_HASH_STATS */
169 * The virtual page size is currently implemented as a runtime
170 * variable, but is constant once initialized using vm_set_page_size.
171 * This initialization must be done in the machine-dependent
172 * bootstrap sequence, before calling other machine-independent
175 * All references to the virtual page size outside this
176 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
179 vm_size_t page_size
= PAGE_SIZE
;
180 vm_size_t page_mask
= PAGE_MASK
;
181 int page_shift
= PAGE_SHIFT
;
184 * Resident page structures are initialized from
185 * a template (see vm_page_alloc).
187 * When adding a new field to the virtual memory
188 * object structure, be sure to add initialization
189 * (see vm_page_bootstrap).
191 struct vm_page vm_page_template
;
194 * Resident pages that represent real memory
195 * are allocated from a free list.
197 vm_page_t vm_page_queue_free
;
198 vm_page_t vm_page_queue_fictitious
;
199 unsigned int vm_page_free_wanted
;
200 unsigned int vm_page_free_count
;
201 unsigned int vm_page_fictitious_count
;
203 unsigned int vm_page_free_count_minimum
; /* debugging */
206 * Occasionally, the virtual memory system uses
207 * resident page structures that do not refer to
208 * real pages, for example to leave a page with
209 * important state information in the VP table.
211 * These page structures are allocated the way
212 * most other kernel structures are.
215 decl_mutex_data(,vm_page_alloc_lock
)
216 unsigned int io_throttle_zero_fill
;
219 * Fictitious pages don't have a physical address,
220 * but we must initialize phys_page to something.
221 * For debugging, this should be a strange value
222 * that the pmap module can recognize in assertions.
224 vm_offset_t vm_page_fictitious_addr
= (vm_offset_t
) -1;
227 * Resident page structures are also chained on
228 * queues that are used by the page replacement
229 * system (pageout daemon). These queues are
230 * defined here, but are shared by the pageout
231 * module. The inactive queue is broken into
232 * inactive and zf for convenience as the
233 * pageout daemon often assignes a higher
234 * affinity to zf pages
236 queue_head_t vm_page_queue_active
;
237 queue_head_t vm_page_queue_inactive
;
238 unsigned int vm_page_active_count
;
239 unsigned int vm_page_inactive_count
;
240 unsigned int vm_page_wire_count
;
241 unsigned int vm_page_gobble_count
= 0;
242 unsigned int vm_page_wire_count_warning
= 0;
243 unsigned int vm_page_gobble_count_warning
= 0;
245 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
246 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
249 * Several page replacement parameters are also
250 * shared with this module, so that page allocation
251 * (done here in vm_page_alloc) can trigger the
254 unsigned int vm_page_free_target
= 0;
255 unsigned int vm_page_free_min
= 0;
256 unsigned int vm_page_inactive_target
= 0;
257 unsigned int vm_page_free_reserved
= 0;
258 unsigned int vm_page_throttled_count
= 0;
261 * The VM system has a couple of heuristics for deciding
262 * that pages are "uninteresting" and should be placed
263 * on the inactive queue as likely candidates for replacement.
264 * These variables let the heuristics be controlled at run-time
265 * to make experimentation easier.
268 boolean_t vm_page_deactivate_hint
= TRUE
;
273 * Sets the page size, perhaps based upon the memory
274 * size. Must be called before any use of page-size
275 * dependent functions.
277 * Sets page_shift and page_mask from page_size.
280 vm_set_page_size(void)
282 page_mask
= page_size
- 1;
284 if ((page_mask
& page_size
) != 0)
285 panic("vm_set_page_size: page size not a power of two");
287 for (page_shift
= 0; ; page_shift
++)
288 if ((1U << page_shift
) == page_size
)
295 * Initializes the resident memory module.
297 * Allocates memory for the page cells, and
298 * for the object/offset-to-page hash table headers.
299 * Each page cell is initialized and placed on the free list.
300 * Returns the range of available kernel virtual memory.
308 register vm_page_t m
;
315 * Initialize the vm_page template.
318 m
= &vm_page_template
;
319 m
->object
= VM_OBJECT_NULL
; /* reset later */
320 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
323 m
->pageq
.next
= NULL
;
324 m
->pageq
.prev
= NULL
;
325 m
->listq
.next
= NULL
;
326 m
->listq
.prev
= NULL
;
333 m
->reference
= FALSE
;
335 m
->dump_cleaning
= FALSE
;
336 m
->list_req_pending
= FALSE
;
341 m
->fictitious
= FALSE
;
348 m
->clustered
= FALSE
;
349 m
->lock_supplied
= FALSE
;
352 m
->zero_fill
= FALSE
;
353 m
->encrypted
= FALSE
;
355 m
->phys_page
= 0; /* reset later */
357 m
->page_lock
= VM_PROT_NONE
;
358 m
->unlock_request
= VM_PROT_NONE
;
359 m
->page_error
= KERN_SUCCESS
;
362 * Initialize the page queues.
365 mutex_init(&vm_page_queue_free_lock
, 0);
366 mutex_init(&vm_page_queue_lock
, 0);
368 vm_page_queue_free
= VM_PAGE_NULL
;
369 vm_page_queue_fictitious
= VM_PAGE_NULL
;
370 queue_init(&vm_page_queue_active
);
371 queue_init(&vm_page_queue_inactive
);
372 queue_init(&vm_page_queue_zf
);
374 vm_page_free_wanted
= 0;
377 * Steal memory for the map and zone subsystems.
380 vm_map_steal_memory();
384 * Allocate (and initialize) the virtual-to-physical
385 * table hash buckets.
387 * The number of buckets should be a power of two to
388 * get a good hash function. The following computation
389 * chooses the first power of two that is greater
390 * than the number of physical pages in the system.
393 simple_lock_init(&vm_page_bucket_lock
, 0);
395 if (vm_page_bucket_count
== 0) {
396 unsigned int npages
= pmap_free_pages();
398 vm_page_bucket_count
= 1;
399 while (vm_page_bucket_count
< npages
)
400 vm_page_bucket_count
<<= 1;
403 vm_page_hash_mask
= vm_page_bucket_count
- 1;
406 * Calculate object shift value for hashing algorithm:
407 * O = log2(sizeof(struct vm_object))
408 * B = log2(vm_page_bucket_count)
409 * hash shifts the object left by
412 size
= vm_page_bucket_count
;
413 for (log1
= 0; size
> 1; log1
++)
415 size
= sizeof(struct vm_object
);
416 for (log2
= 0; size
> 1; log2
++)
418 vm_page_hash_shift
= log1
/2 - log2
+ 1;
420 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
421 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
422 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
424 if (vm_page_hash_mask
& vm_page_bucket_count
)
425 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
427 vm_page_buckets
= (vm_page_bucket_t
*)
428 pmap_steal_memory(vm_page_bucket_count
*
429 sizeof(vm_page_bucket_t
));
431 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
432 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
434 bucket
->pages
= VM_PAGE_NULL
;
435 #if MACH_PAGE_HASH_STATS
436 bucket
->cur_count
= 0;
437 bucket
->hi_count
= 0;
438 #endif /* MACH_PAGE_HASH_STATS */
442 * Machine-dependent code allocates the resident page table.
443 * It uses vm_page_init to initialize the page frames.
444 * The code also returns to us the virtual space available
445 * to the kernel. We don't trust the pmap module
446 * to get the alignment right.
449 pmap_startup(&virtual_space_start
, &virtual_space_end
);
450 virtual_space_start
= round_page(virtual_space_start
);
451 virtual_space_end
= trunc_page(virtual_space_end
);
453 *startp
= virtual_space_start
;
454 *endp
= virtual_space_end
;
457 * Compute the initial "wire" count.
458 * Up until now, the pages which have been set aside are not under
459 * the VM system's control, so although they aren't explicitly
460 * wired, they nonetheless can't be moved. At this moment,
461 * all VM managed pages are "free", courtesy of pmap_startup.
463 vm_page_wire_count
= atop_64(max_mem
) - vm_page_free_count
; /* initial value */
465 printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count
);
466 vm_page_free_count_minimum
= vm_page_free_count
;
468 simple_lock_init(&vm_paging_lock
, 0);
471 #ifndef MACHINE_PAGES
473 * We implement pmap_steal_memory and pmap_startup with the help
474 * of two simpler functions, pmap_virtual_space and pmap_next_page.
481 vm_offset_t addr
, vaddr
;
485 * We round the size to a round multiple.
488 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
491 * If this is the first call to pmap_steal_memory,
492 * we have to initialize ourself.
495 if (virtual_space_start
== virtual_space_end
) {
496 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
499 * The initial values must be aligned properly, and
500 * we don't trust the pmap module to do it right.
503 virtual_space_start
= round_page(virtual_space_start
);
504 virtual_space_end
= trunc_page(virtual_space_end
);
508 * Allocate virtual memory for this request.
511 addr
= virtual_space_start
;
512 virtual_space_start
+= size
;
514 kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr
, virtual_space_start
, size
); /* (TEST/DEBUG) */
517 * Allocate and map physical pages to back new virtual pages.
520 for (vaddr
= round_page(addr
);
522 vaddr
+= PAGE_SIZE
) {
523 if (!pmap_next_page(&phys_page
))
524 panic("pmap_steal_memory");
527 * XXX Logically, these mappings should be wired,
528 * but some pmap modules barf if they are.
531 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
532 VM_PROT_READ
|VM_PROT_WRITE
,
533 VM_WIMG_USE_DEFAULT
, FALSE
);
535 * Account for newly stolen memory
537 vm_page_wire_count
++;
541 return (void *) addr
;
549 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
555 * We calculate how many page frames we will have
556 * and then allocate the page structures in one chunk.
559 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
560 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page_32(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
561 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
563 pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *pages
);
566 * Initialize the page frames.
569 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
570 if (!pmap_next_page(&phys_page
))
573 vm_page_init(&pages
[i
], phys_page
);
579 * Release pages in reverse order so that physical pages
580 * initially get allocated in ascending addresses. This keeps
581 * the devices (which must address physical memory) happy if
582 * they require several consecutive pages.
586 * Check if we want to initialize pages to a known value
589 fill
= 0; /* Assume no fill */
590 if (PE_parse_boot_arg("fill", &fillval
)) fill
= 1; /* Set fill */
592 for (i
= pages_initialized
; i
> 0; i
--) {
593 if(fill
) fillPage(pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
594 vm_page_release(&pages
[i
- 1]);
599 vm_page_t xx
, xxo
, xxl
;
602 j
= 0; /* (BRINGUP) */
605 for(xx
= vm_page_queue_free
; xx
; xxl
= xx
, xx
= xx
->pageq
.next
) { /* (BRINGUP) */
607 if(j
> vm_page_free_count
) { /* (BRINGUP) */
608 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
611 l
= vm_page_free_count
- j
; /* (BRINGUP) */
612 k
= 0; /* (BRINGUP) */
614 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
616 for(xxo
= xx
->pageq
.next
; xxo
; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
618 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
619 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
620 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
625 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
626 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
633 * We have to re-align virtual_space_start,
634 * because pmap_steal_memory has been using it.
637 virtual_space_start
= round_page_32(virtual_space_start
);
639 *startp
= virtual_space_start
;
640 *endp
= virtual_space_end
;
642 #endif /* MACHINE_PAGES */
645 * Routine: vm_page_module_init
647 * Second initialization pass, to be done after
648 * the basic VM system is ready.
651 vm_page_module_init(void)
653 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
654 0, PAGE_SIZE
, "vm pages");
657 zone_debug_disable(vm_page_zone
);
658 #endif /* ZONE_DEBUG */
660 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
661 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
662 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
665 * Adjust zone statistics to account for the real pages allocated
666 * in vm_page_create(). [Q: is this really what we want?]
668 vm_page_zone
->count
+= vm_page_pages
;
669 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
671 mutex_init(&vm_page_alloc_lock
, 0);
675 * Routine: vm_page_create
677 * After the VM system is up, machine-dependent code
678 * may stumble across more physical memory. For example,
679 * memory that it was reserving for a frame buffer.
680 * vm_page_create turns this memory into available pages.
691 for (phys_page
= start
;
694 while ((m
= (vm_page_t
) vm_page_grab_fictitious())
696 vm_page_more_fictitious();
698 vm_page_init(m
, phys_page
);
707 * Distributes the object/offset key pair among hash buckets.
709 * NOTE: The bucket count must be a power of 2
711 #define vm_page_hash(object, offset) (\
712 ( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
716 * vm_page_insert: [ internal use only ]
718 * Inserts the given mem entry into the object/object-page
719 * table and object list.
721 * The object must be locked.
726 register vm_page_t mem
,
727 register vm_object_t object
,
728 register vm_object_offset_t offset
)
730 register vm_page_bucket_t
*bucket
;
733 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
734 (integer_t
)object
, (integer_t
)offset
, (integer_t
)mem
, 0,0);
738 _mutex_assert(&object
->Lock
, MA_OWNED
);
740 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
741 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
742 "already in (obj=%p,off=0x%llx)",
743 mem
, object
, offset
, mem
->object
, mem
->offset
);
745 assert(!object
->internal
|| offset
< object
->size
);
747 /* only insert "pageout" pages into "pageout" objects,
748 * and normal pages into normal objects */
749 assert(object
->pageout
== mem
->pageout
);
751 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
754 * Record the object/offset pair in this page
757 mem
->object
= object
;
758 mem
->offset
= offset
;
761 * Insert it into the object_object/offset hash table
764 bucket
= &vm_page_buckets
[vm_page_hash(object
, offset
)];
765 simple_lock(&vm_page_bucket_lock
);
766 mem
->next
= bucket
->pages
;
768 #if MACH_PAGE_HASH_STATS
769 if (++bucket
->cur_count
> bucket
->hi_count
)
770 bucket
->hi_count
= bucket
->cur_count
;
771 #endif /* MACH_PAGE_HASH_STATS */
772 simple_unlock(&vm_page_bucket_lock
);
775 * Now link into the object's list of backed pages.
778 VM_PAGE_INSERT(mem
, object
);
782 * Show that the object has one more resident page.
785 object
->resident_page_count
++;
787 if (object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
||
788 object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) {
789 vm_page_lock_queues();
790 vm_page_purgeable_count
++;
791 vm_page_unlock_queues();
798 * Exactly like vm_page_insert, except that we first
799 * remove any existing page at the given offset in object.
801 * The object and page queues must be locked.
806 register vm_page_t mem
,
807 register vm_object_t object
,
808 register vm_object_offset_t offset
)
810 register vm_page_bucket_t
*bucket
;
814 _mutex_assert(&object
->Lock
, MA_OWNED
);
815 _mutex_assert(&vm_page_queue_lock
, MA_OWNED
);
817 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
818 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
819 "already in (obj=%p,off=0x%llx)",
820 mem
, object
, offset
, mem
->object
, mem
->offset
);
823 * Record the object/offset pair in this page
826 mem
->object
= object
;
827 mem
->offset
= offset
;
830 * Insert it into the object_object/offset hash table,
831 * replacing any page that might have been there.
834 bucket
= &vm_page_buckets
[vm_page_hash(object
, offset
)];
835 simple_lock(&vm_page_bucket_lock
);
837 vm_page_t
*mp
= &bucket
->pages
;
838 register vm_page_t m
= *mp
;
840 if (m
->object
== object
&& m
->offset
== offset
) {
842 * Remove page from bucket and from object,
843 * and return it to the free list.
848 m
->object
= VM_OBJECT_NULL
;
849 m
->offset
= (vm_object_offset_t
) -1;
850 object
->resident_page_count
--;
852 if (object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
||
853 object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) {
854 assert(vm_page_purgeable_count
> 0);
855 vm_page_purgeable_count
--;
859 * Return page to the free list.
860 * Note the page is not tabled now, so this
861 * won't self-deadlock on the bucket lock.
869 mem
->next
= bucket
->pages
;
871 mem
->next
= VM_PAGE_NULL
;
874 simple_unlock(&vm_page_bucket_lock
);
877 * Now link into the object's list of backed pages.
880 VM_PAGE_INSERT(mem
, object
);
884 * And show that the object has one more resident
888 object
->resident_page_count
++;
890 if (object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
||
891 object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) {
892 vm_page_purgeable_count
++;
897 * vm_page_remove: [ internal use only ]
899 * Removes the given mem entry from the object/offset-page
900 * table and the object page list.
902 * The object and page queues must be locked.
907 register vm_page_t mem
)
909 register vm_page_bucket_t
*bucket
;
910 register vm_page_t
this;
913 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
914 (integer_t
)mem
->object
, (integer_t
)mem
->offset
,
915 (integer_t
)mem
, 0,0);
917 _mutex_assert(&vm_page_queue_lock
, MA_OWNED
);
918 _mutex_assert(&mem
->object
->Lock
, MA_OWNED
);
921 assert(!mem
->cleaning
);
926 * Remove from the object_object/offset hash table
929 bucket
= &vm_page_buckets
[vm_page_hash(mem
->object
, mem
->offset
)];
930 simple_lock(&vm_page_bucket_lock
);
931 if ((this = bucket
->pages
) == mem
) {
932 /* optimize for common case */
934 bucket
->pages
= mem
->next
;
936 register vm_page_t
*prev
;
938 for (prev
= &this->next
;
939 (this = *prev
) != mem
;
944 #if MACH_PAGE_HASH_STATS
946 #endif /* MACH_PAGE_HASH_STATS */
947 simple_unlock(&vm_page_bucket_lock
);
950 * Now remove from the object's list of backed pages.
956 * And show that the object has one fewer resident
960 mem
->object
->resident_page_count
--;
962 if (mem
->object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
||
963 mem
->object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) {
964 assert(vm_page_purgeable_count
> 0);
965 vm_page_purgeable_count
--;
969 mem
->object
= VM_OBJECT_NULL
;
970 mem
->offset
= (vm_object_offset_t
) -1;
976 * Returns the page associated with the object/offset
977 * pair specified; if none is found, VM_PAGE_NULL is returned.
979 * The object must be locked. No side effects.
982 unsigned long vm_page_lookup_hint
= 0;
983 unsigned long vm_page_lookup_hint_next
= 0;
984 unsigned long vm_page_lookup_hint_prev
= 0;
985 unsigned long vm_page_lookup_hint_miss
= 0;
989 register vm_object_t object
,
990 register vm_object_offset_t offset
)
992 register vm_page_t mem
;
993 register vm_page_bucket_t
*bucket
;
996 _mutex_assert(&object
->Lock
, MA_OWNED
);
999 mem
= object
->memq_hint
;
1000 if (mem
!= VM_PAGE_NULL
) {
1001 assert(mem
->object
== object
);
1002 if (mem
->offset
== offset
) {
1003 vm_page_lookup_hint
++;
1006 qe
= queue_next(&mem
->listq
);
1007 if (! queue_end(&object
->memq
, qe
)) {
1008 vm_page_t next_page
;
1010 next_page
= (vm_page_t
) qe
;
1011 assert(next_page
->object
== object
);
1012 if (next_page
->offset
== offset
) {
1013 vm_page_lookup_hint_next
++;
1014 object
->memq_hint
= next_page
; /* new hint */
1018 qe
= queue_prev(&mem
->listq
);
1019 if (! queue_end(&object
->memq
, qe
)) {
1020 vm_page_t prev_page
;
1022 prev_page
= (vm_page_t
) qe
;
1023 assert(prev_page
->object
== object
);
1024 if (prev_page
->offset
== offset
) {
1025 vm_page_lookup_hint_prev
++;
1026 object
->memq_hint
= prev_page
; /* new hint */
1033 * Search the hash table for this object/offset pair
1036 bucket
= &vm_page_buckets
[vm_page_hash(object
, offset
)];
1038 simple_lock(&vm_page_bucket_lock
);
1039 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
1041 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1044 simple_unlock(&vm_page_bucket_lock
);
1046 if (mem
!= VM_PAGE_NULL
) {
1047 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1048 vm_page_lookup_hint_miss
++;
1050 assert(mem
->object
== object
);
1051 object
->memq_hint
= mem
;
1059 vm_page_lookup_nohint(
1061 vm_object_offset_t offset
)
1063 register vm_page_t mem
;
1064 register vm_page_bucket_t
*bucket
;
1067 _mutex_assert(&object
->Lock
, MA_OWNED
);
1070 * Search the hash table for this object/offset pair
1073 bucket
= &vm_page_buckets
[vm_page_hash(object
, offset
)];
1075 simple_lock(&vm_page_bucket_lock
);
1076 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
1078 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1081 simple_unlock(&vm_page_bucket_lock
);
1089 * Move the given memory entry from its
1090 * current object to the specified target object/offset.
1092 * The object must be locked.
1096 register vm_page_t mem
,
1097 register vm_object_t new_object
,
1098 vm_object_offset_t new_offset
)
1100 assert(mem
->object
!= new_object
);
1103 * The encryption key is based on the page's memory object
1104 * (aka "pager") and paging offset. Moving the page to
1105 * another VM object changes its "pager" and "paging_offset"
1106 * so it has to be decrypted first.
1108 if (mem
->encrypted
) {
1109 panic("vm_page_rename: page %p is encrypted\n", mem
);
1112 * Changes to mem->object require the page lock because
1113 * the pageout daemon uses that lock to get the object.
1117 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1118 (integer_t
)new_object
, (integer_t
)new_offset
,
1119 (integer_t
)mem
, 0,0);
1121 vm_page_lock_queues();
1122 vm_page_remove(mem
);
1123 vm_page_insert(mem
, new_object
, new_offset
);
1124 vm_page_unlock_queues();
1130 * Initialize the fields in a new page.
1131 * This takes a structure with random values and initializes it
1132 * so that it can be given to vm_page_release or vm_page_insert.
1140 *mem
= vm_page_template
;
1141 mem
->phys_page
= phys_page
;
1145 * vm_page_grab_fictitious:
1147 * Remove a fictitious page from the free list.
1148 * Returns VM_PAGE_NULL if there are no free pages.
1150 int c_vm_page_grab_fictitious
= 0;
1151 int c_vm_page_release_fictitious
= 0;
1152 int c_vm_page_more_fictitious
= 0;
1155 vm_page_grab_fictitious(void)
1157 register vm_page_t m
;
1159 m
= (vm_page_t
)zget(vm_page_zone
);
1161 vm_page_init(m
, vm_page_fictitious_addr
);
1162 m
->fictitious
= TRUE
;
1165 c_vm_page_grab_fictitious
++;
1170 * vm_page_release_fictitious:
1172 * Release a fictitious page to the free list.
1176 vm_page_release_fictitious(
1177 register vm_page_t m
)
1181 assert(m
->fictitious
);
1182 assert(m
->phys_page
== vm_page_fictitious_addr
);
1184 c_vm_page_release_fictitious
++;
1187 panic("vm_page_release_fictitious");
1190 zfree(vm_page_zone
, m
);
1194 * vm_page_more_fictitious:
1196 * Add more fictitious pages to the free list.
1197 * Allowed to block. This routine is way intimate
1198 * with the zones code, for several reasons:
1199 * 1. we need to carve some page structures out of physical
1200 * memory before zones work, so they _cannot_ come from
1202 * 2. the zone needs to be collectable in order to prevent
1203 * growth without bound. These structures are used by
1204 * the device pager (by the hundreds and thousands), as
1205 * private pages for pageout, and as blocking pages for
1206 * pagein. Temporary bursts in demand should not result in
1207 * permanent allocation of a resource.
1208 * 3. To smooth allocation humps, we allocate single pages
1209 * with kernel_memory_allocate(), and cram them into the
1210 * zone. This also allows us to initialize the vm_page_t's
1211 * on the way into the zone, so that zget() always returns
1212 * an initialized structure. The zone free element pointer
1213 * and the free page pointer are both the first item in the
1215 * 4. By having the pages in the zone pre-initialized, we need
1216 * not keep 2 levels of lists. The garbage collector simply
1217 * scans our list, and reduces physical memory usage as it
1221 void vm_page_more_fictitious(void)
1223 register vm_page_t m
;
1225 kern_return_t retval
;
1228 c_vm_page_more_fictitious
++;
1231 * Allocate a single page from the zone_map. Do not wait if no physical
1232 * pages are immediately available, and do not zero the space. We need
1233 * our own blocking lock here to prevent having multiple,
1234 * simultaneous requests from piling up on the zone_map lock. Exactly
1235 * one (of our) threads should be potentially waiting on the map lock.
1236 * If winner is not vm-privileged, then the page allocation will fail,
1237 * and it will temporarily block here in the vm_page_wait().
1239 mutex_lock(&vm_page_alloc_lock
);
1241 * If another thread allocated space, just bail out now.
1243 if (zone_free_count(vm_page_zone
) > 5) {
1245 * The number "5" is a small number that is larger than the
1246 * number of fictitious pages that any single caller will
1247 * attempt to allocate. Otherwise, a thread will attempt to
1248 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1249 * release all of the resources and locks already acquired,
1250 * and then call this routine. This routine finds the pages
1251 * that the caller released, so fails to allocate new space.
1252 * The process repeats infinitely. The largest known number
1253 * of fictitious pages required in this manner is 2. 5 is
1254 * simply a somewhat larger number.
1256 mutex_unlock(&vm_page_alloc_lock
);
1260 retval
= kernel_memory_allocate(zone_map
,
1261 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1262 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1263 if (retval
!= KERN_SUCCESS
) {
1265 * No page was available. Tell the pageout daemon, drop the
1266 * lock to give another thread a chance at it, and
1267 * wait for the pageout daemon to make progress.
1269 mutex_unlock(&vm_page_alloc_lock
);
1270 vm_page_wait(THREAD_UNINT
);
1274 * Initialize as many vm_page_t's as will fit on this page. This
1275 * depends on the zone code disturbing ONLY the first item of
1276 * each zone element.
1278 m
= (vm_page_t
)addr
;
1279 for (i
= PAGE_SIZE
/sizeof(struct vm_page
); i
> 0; i
--) {
1280 vm_page_init(m
, vm_page_fictitious_addr
);
1281 m
->fictitious
= TRUE
;
1284 zcram(vm_page_zone
, (void *) addr
, PAGE_SIZE
);
1285 mutex_unlock(&vm_page_alloc_lock
);
1291 * Attempt to convert a fictitious page into a real page.
1296 register vm_page_t m
)
1298 register vm_page_t real_m
;
1301 assert(m
->fictitious
);
1304 real_m
= vm_page_grab();
1305 if (real_m
== VM_PAGE_NULL
)
1308 m
->phys_page
= real_m
->phys_page
;
1309 m
->fictitious
= FALSE
;
1312 vm_page_lock_queues();
1314 vm_page_active_count
++;
1315 else if (m
->inactive
)
1316 vm_page_inactive_count
++;
1317 vm_page_unlock_queues();
1319 real_m
->phys_page
= vm_page_fictitious_addr
;
1320 real_m
->fictitious
= TRUE
;
1322 vm_page_release_fictitious(real_m
);
1329 * Return true if it is not likely that a non-vm_privileged thread
1330 * can get memory without blocking. Advisory only, since the
1331 * situation may change under us.
1336 /* No locking, at worst we will fib. */
1337 return( vm_page_free_count
< vm_page_free_reserved
);
1343 * Remove a page from the free list.
1344 * Returns VM_PAGE_NULL if the free list is too small.
1347 unsigned long vm_page_grab_count
= 0; /* measure demand */
1352 register vm_page_t mem
;
1354 mutex_lock(&vm_page_queue_free_lock
);
1355 vm_page_grab_count
++;
1358 * Optionally produce warnings if the wire or gobble
1359 * counts exceed some threshold.
1361 if (vm_page_wire_count_warning
> 0
1362 && vm_page_wire_count
>= vm_page_wire_count_warning
) {
1363 printf("mk: vm_page_grab(): high wired page count of %d\n",
1364 vm_page_wire_count
);
1365 assert(vm_page_wire_count
< vm_page_wire_count_warning
);
1367 if (vm_page_gobble_count_warning
> 0
1368 && vm_page_gobble_count
>= vm_page_gobble_count_warning
) {
1369 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1370 vm_page_gobble_count
);
1371 assert(vm_page_gobble_count
< vm_page_gobble_count_warning
);
1375 * Only let privileged threads (involved in pageout)
1376 * dip into the reserved pool.
1379 if ((vm_page_free_count
< vm_page_free_reserved
) &&
1380 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
1381 mutex_unlock(&vm_page_queue_free_lock
);
1383 goto wakeup_pageout
;
1386 while (vm_page_queue_free
== VM_PAGE_NULL
) {
1387 mutex_unlock(&vm_page_queue_free_lock
);
1389 mutex_lock(&vm_page_queue_free_lock
);
1392 if (--vm_page_free_count
< vm_page_free_count_minimum
)
1393 vm_page_free_count_minimum
= vm_page_free_count
;
1394 mem
= vm_page_queue_free
;
1395 vm_page_queue_free
= (vm_page_t
) mem
->pageq
.next
;
1396 mem
->pageq
.next
= NULL
;
1397 mem
->pageq
.prev
= NULL
;
1398 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1399 assert(mem
->tabled
== FALSE
);
1400 assert(mem
->object
== VM_OBJECT_NULL
);
1401 assert(!mem
->laundry
);
1403 mem
->no_isync
= TRUE
;
1404 mutex_unlock(&vm_page_queue_free_lock
);
1406 assert(pmap_verify_free(mem
->phys_page
));
1409 * Decide if we should poke the pageout daemon.
1410 * We do this if the free count is less than the low
1411 * water mark, or if the free count is less than the high
1412 * water mark (but above the low water mark) and the inactive
1413 * count is less than its target.
1415 * We don't have the counts locked ... if they change a little,
1416 * it doesn't really matter.
1420 if ((vm_page_free_count
< vm_page_free_min
) ||
1421 ((vm_page_free_count
< vm_page_free_target
) &&
1422 (vm_page_inactive_count
< vm_page_inactive_target
)))
1423 thread_wakeup((event_t
) &vm_page_free_wanted
);
1425 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1433 * Return a page to the free list.
1438 register vm_page_t mem
)
1442 unsigned int pindex
;
1443 phys_entry
*physent
;
1445 physent
= mapping_phys_lookup(mem
->phys_page
, &pindex
); /* (BRINGUP) */
1446 if(physent
->ppLink
& ppN
) { /* (BRINGUP) */
1447 panic("vm_page_release: already released - %08X %08X\n", mem
, mem
->phys_page
);
1449 physent
->ppLink
= physent
->ppLink
| ppN
; /* (BRINGUP) */
1451 assert(!mem
->private && !mem
->fictitious
);
1453 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1455 mutex_lock(&vm_page_queue_free_lock
);
1458 panic("vm_page_release");
1461 assert(!mem
->laundry
);
1462 assert(mem
->object
== VM_OBJECT_NULL
);
1463 assert(mem
->pageq
.next
== NULL
&&
1464 mem
->pageq
.prev
== NULL
);
1465 mem
->pageq
.next
= (queue_entry_t
) vm_page_queue_free
;
1466 vm_page_queue_free
= mem
;
1467 vm_page_free_count
++;
1470 * Check if we should wake up someone waiting for page.
1471 * But don't bother waking them unless they can allocate.
1473 * We wakeup only one thread, to prevent starvation.
1474 * Because the scheduling system handles wait queues FIFO,
1475 * if we wakeup all waiting threads, one greedy thread
1476 * can starve multiple niceguy threads. When the threads
1477 * all wakeup, the greedy threads runs first, grabs the page,
1478 * and waits for another page. It will be the first to run
1479 * when the next page is freed.
1481 * However, there is a slight danger here.
1482 * The thread we wake might not use the free page.
1483 * Then the other threads could wait indefinitely
1484 * while the page goes unused. To forestall this,
1485 * the pageout daemon will keep making free pages
1486 * as long as vm_page_free_wanted is non-zero.
1489 if ((vm_page_free_wanted
> 0) &&
1490 (vm_page_free_count
>= vm_page_free_reserved
)) {
1491 vm_page_free_wanted
--;
1492 thread_wakeup_one((event_t
) &vm_page_free_count
);
1495 mutex_unlock(&vm_page_queue_free_lock
);
1501 * Wait for a page to become available.
1502 * If there are plenty of free pages, then we don't sleep.
1505 * TRUE: There may be another page, try again
1506 * FALSE: We were interrupted out of our wait, don't try again
1514 * We can't use vm_page_free_reserved to make this
1515 * determination. Consider: some thread might
1516 * need to allocate two pages. The first allocation
1517 * succeeds, the second fails. After the first page is freed,
1518 * a call to vm_page_wait must really block.
1520 kern_return_t wait_result
;
1521 int need_wakeup
= 0;
1523 mutex_lock(&vm_page_queue_free_lock
);
1524 if (vm_page_free_count
< vm_page_free_target
) {
1525 if (vm_page_free_wanted
++ == 0)
1527 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
1528 mutex_unlock(&vm_page_queue_free_lock
);
1529 counter(c_vm_page_wait_block
++);
1532 thread_wakeup((event_t
)&vm_page_free_wanted
);
1534 if (wait_result
== THREAD_WAITING
)
1535 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1537 return(wait_result
== THREAD_AWAKENED
);
1539 mutex_unlock(&vm_page_queue_free_lock
);
1547 * Allocate and return a memory cell associated
1548 * with this VM object/offset pair.
1550 * Object must be locked.
1556 vm_object_offset_t offset
)
1558 register vm_page_t mem
;
1561 _mutex_assert(&object
->Lock
, MA_OWNED
);
1563 mem
= vm_page_grab();
1564 if (mem
== VM_PAGE_NULL
)
1565 return VM_PAGE_NULL
;
1567 vm_page_insert(mem
, object
, offset
);
1572 counter(unsigned int c_laundry_pages_freed
= 0;)
1574 int vm_pagein_cluster_unused
= 0;
1575 boolean_t vm_page_free_verify
= TRUE
;
1579 * Returns the given page to the free list,
1580 * disassociating it with any VM object.
1582 * Object and page queues must be locked prior to entry.
1586 register vm_page_t mem
)
1588 vm_object_t object
= mem
->object
;
1591 assert(!mem
->cleaning
);
1592 assert(!mem
->pageout
);
1593 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
1594 assert(pmap_verify_free(mem
->phys_page
));
1599 _mutex_assert(&mem
->object
->Lock
, MA_OWNED
);
1600 _mutex_assert(&vm_page_queue_lock
, MA_OWNED
);
1603 panic("vm_page_free: freeing page on free list\n");
1606 vm_page_remove(mem
); /* clears tabled, object, offset */
1607 VM_PAGE_QUEUES_REMOVE(mem
); /* clears active or inactive */
1609 if (mem
->clustered
) {
1610 mem
->clustered
= FALSE
;
1611 vm_pagein_cluster_unused
++;
1614 if (mem
->wire_count
) {
1615 if (!mem
->private && !mem
->fictitious
)
1616 vm_page_wire_count
--;
1617 mem
->wire_count
= 0;
1618 assert(!mem
->gobbled
);
1619 } else if (mem
->gobbled
) {
1620 if (!mem
->private && !mem
->fictitious
)
1621 vm_page_wire_count
--;
1622 vm_page_gobble_count
--;
1624 mem
->gobbled
= FALSE
;
1627 vm_pageout_throttle_up(mem
);
1628 counter(++c_laundry_pages_freed
);
1631 PAGE_WAKEUP(mem
); /* clears wanted */
1634 vm_object_absent_release(object
);
1636 /* Some of these may be unnecessary */
1638 mem
->unlock_request
= 0;
1640 mem
->absent
= FALSE
;
1643 mem
->precious
= FALSE
;
1644 mem
->reference
= FALSE
;
1645 mem
->encrypted
= FALSE
;
1647 mem
->page_error
= KERN_SUCCESS
;
1650 mem
->private = FALSE
;
1651 mem
->fictitious
= TRUE
;
1652 mem
->phys_page
= vm_page_fictitious_addr
;
1654 if (mem
->fictitious
) {
1655 vm_page_release_fictitious(mem
);
1657 /* depends on the queues lock */
1658 if(mem
->zero_fill
) {
1660 mem
->zero_fill
= FALSE
;
1662 vm_page_init(mem
, mem
->phys_page
);
1663 vm_page_release(mem
);
1670 register vm_page_t mem
)
1672 register vm_page_t nxt
;
1673 register vm_page_t first
= NULL
;
1674 register vm_page_t last
= VM_PAGE_NULL
;
1675 register int pg_count
= 0;
1678 _mutex_assert(&vm_page_queue_lock
, MA_OWNED
);
1682 if (mem
->tabled
|| mem
->object
)
1683 panic("vm_page_free_list: freeing tabled page\n");
1684 if (mem
->inactive
|| mem
->active
|| mem
->free
)
1685 panic("vm_page_free_list: freeing page on list\n");
1687 assert(mem
->pageq
.prev
== NULL
);
1688 nxt
= (vm_page_t
)(mem
->pageq
.next
);
1691 vm_pagein_cluster_unused
++;
1694 vm_pageout_throttle_up(mem
);
1695 counter(++c_laundry_pages_freed
);
1699 PAGE_WAKEUP(mem
); /* clears wanted */
1702 mem
->fictitious
= TRUE
;
1704 if (!mem
->fictitious
) {
1705 /* depends on the queues lock */
1708 assert(!mem
->laundry
);
1709 vm_page_init(mem
, mem
->phys_page
);
1715 mem
->pageq
.next
= (queue_t
) first
;
1720 mem
->phys_page
= vm_page_fictitious_addr
;
1721 vm_page_release_fictitious(mem
);
1727 mutex_lock(&vm_page_queue_free_lock
);
1729 last
->pageq
.next
= (queue_entry_t
) vm_page_queue_free
;
1730 vm_page_queue_free
= first
;
1732 vm_page_free_count
+= pg_count
;
1734 if ((vm_page_free_wanted
> 0) &&
1735 (vm_page_free_count
>= vm_page_free_reserved
)) {
1736 unsigned int available_pages
;
1738 if (vm_page_free_count
>= vm_page_free_reserved
) {
1739 available_pages
= (vm_page_free_count
1740 - vm_page_free_reserved
);
1742 available_pages
= 0;
1745 if (available_pages
>= vm_page_free_wanted
) {
1746 vm_page_free_wanted
= 0;
1747 thread_wakeup((event_t
) &vm_page_free_count
);
1749 while (available_pages
--) {
1750 vm_page_free_wanted
--;
1751 thread_wakeup_one((event_t
) &vm_page_free_count
);
1755 mutex_unlock(&vm_page_queue_free_lock
);
1763 * Mark this page as wired down by yet
1764 * another map, removing it from paging queues
1767 * The page's object and the page queues must be locked.
1771 register vm_page_t mem
)
1774 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1779 _mutex_assert(&mem
->object
->Lock
, MA_OWNED
);
1780 _mutex_assert(&vm_page_queue_lock
, MA_OWNED
);
1782 if (mem
->wire_count
== 0) {
1783 VM_PAGE_QUEUES_REMOVE(mem
);
1784 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
1785 vm_page_wire_count
++;
1787 vm_page_gobble_count
--;
1788 mem
->gobbled
= FALSE
;
1789 if(mem
->zero_fill
) {
1790 /* depends on the queues lock */
1792 mem
->zero_fill
= FALSE
;
1796 * The page could be encrypted, but
1797 * We don't have to decrypt it here
1798 * because we don't guarantee that the
1799 * data is actually valid at this point.
1800 * The page will get decrypted in
1801 * vm_fault_wire() if needed.
1804 assert(!mem
->gobbled
);
1811 * Mark this page as consumed by the vm/ipc/xmm subsystems.
1813 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
1817 register vm_page_t mem
)
1819 vm_page_lock_queues();
1822 assert(!mem
->gobbled
);
1823 assert(mem
->wire_count
== 0);
1825 if (!mem
->gobbled
&& mem
->wire_count
== 0) {
1826 if (!mem
->private && !mem
->fictitious
)
1827 vm_page_wire_count
++;
1829 vm_page_gobble_count
++;
1830 mem
->gobbled
= TRUE
;
1831 vm_page_unlock_queues();
1837 * Release one wiring of this page, potentially
1838 * enabling it to be paged again.
1840 * The page's object and the page queues must be locked.
1844 register vm_page_t mem
)
1847 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1850 assert(mem
->wire_count
> 0);
1853 _mutex_assert(&mem
->object
->Lock
, MA_OWNED
);
1854 _mutex_assert(&vm_page_queue_lock
, MA_OWNED
);
1856 if (--mem
->wire_count
== 0) {
1857 assert(!mem
->private && !mem
->fictitious
);
1858 vm_page_wire_count
--;
1859 assert(!mem
->laundry
);
1860 assert(mem
->object
!= kernel_object
);
1861 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
1862 queue_enter(&vm_page_queue_active
, mem
, vm_page_t
, pageq
);
1863 vm_page_active_count
++;
1865 mem
->reference
= TRUE
;
1870 * vm_page_deactivate:
1872 * Returns the given page to the inactive list,
1873 * indicating that no physical maps have access
1874 * to this page. [Used by the physical mapping system.]
1876 * The page queues must be locked.
1880 register vm_page_t m
)
1883 assert(m
->object
!= kernel_object
);
1885 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
1887 _mutex_assert(&vm_page_queue_lock
, MA_OWNED
);
1890 * This page is no longer very interesting. If it was
1891 * interesting (active or inactive/referenced), then we
1892 * clear the reference bit and (re)enter it in the
1893 * inactive queue. Note wired pages should not have
1894 * their reference bit cleared.
1896 if (m
->gobbled
) { /* can this happen? */
1897 assert(m
->wire_count
== 0);
1898 if (!m
->private && !m
->fictitious
)
1899 vm_page_wire_count
--;
1900 vm_page_gobble_count
--;
1903 if (m
->private || (m
->wire_count
!= 0))
1905 if (m
->active
|| (m
->inactive
&& m
->reference
)) {
1906 if (!m
->fictitious
&& !m
->absent
)
1907 pmap_clear_reference(m
->phys_page
);
1908 m
->reference
= FALSE
;
1909 VM_PAGE_QUEUES_REMOVE(m
);
1911 if (m
->wire_count
== 0 && !m
->inactive
) {
1912 m
->page_ticket
= vm_page_ticket
;
1913 vm_page_ticket_roll
++;
1915 if(vm_page_ticket_roll
== VM_PAGE_TICKETS_IN_ROLL
) {
1916 vm_page_ticket_roll
= 0;
1917 if(vm_page_ticket
== VM_PAGE_TICKET_ROLL_IDS
)
1923 assert(!m
->laundry
);
1924 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
1926 queue_enter(&vm_page_queue_zf
, m
, vm_page_t
, pageq
);
1928 queue_enter(&vm_page_queue_inactive
,
1929 m
, vm_page_t
, pageq
);
1934 vm_page_inactive_count
++;
1941 * Put the specified page on the active list (if appropriate).
1943 * The page queues must be locked.
1948 register vm_page_t m
)
1951 assert(m
->object
!= kernel_object
);
1953 _mutex_assert(&vm_page_queue_lock
, MA_OWNED
);
1956 assert(m
->wire_count
== 0);
1957 if (!m
->private && !m
->fictitious
)
1958 vm_page_wire_count
--;
1959 vm_page_gobble_count
--;
1966 assert(!m
->laundry
);
1968 queue_remove(&vm_page_queue_zf
, m
, vm_page_t
, pageq
);
1970 queue_remove(&vm_page_queue_inactive
,
1971 m
, vm_page_t
, pageq
);
1973 m
->pageq
.next
= NULL
;
1974 m
->pageq
.prev
= NULL
;
1976 vm_page_inactive_count
--;
1977 m
->inactive
= FALSE
;
1979 if (m
->wire_count
== 0) {
1982 panic("vm_page_activate: already active");
1984 assert(!m
->laundry
);
1985 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
1986 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
1988 m
->reference
= TRUE
;
1990 vm_page_active_count
++;
1995 * vm_page_part_zero_fill:
1997 * Zero-fill a part of the page.
2000 vm_page_part_zero_fill(
2008 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
2009 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
2012 tmp
= vm_page_grab();
2013 if (tmp
== VM_PAGE_NULL
) {
2014 vm_page_wait(THREAD_UNINT
);
2019 vm_page_zero_fill(tmp
);
2021 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
2023 if((m_pa
+ len
) < PAGE_SIZE
) {
2024 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
2025 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
2027 vm_page_copy(tmp
,m
);
2028 vm_page_lock_queues();
2030 vm_page_unlock_queues();
2036 * vm_page_zero_fill:
2038 * Zero-fill the specified page.
2045 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
2046 (integer_t
)m
->object
, (integer_t
)m
->offset
, (integer_t
)m
, 0,0);
2050 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
2051 pmap_zero_page(m
->phys_page
);
2055 * vm_page_part_copy:
2057 * copy part of one page to another
2068 VM_PAGE_CHECK(src_m
);
2069 VM_PAGE_CHECK(dst_m
);
2071 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
2072 dst_m
->phys_page
, dst_pa
, len
);
2078 * Copy one page to another
2081 * The source page should not be encrypted. The caller should
2082 * make sure the page is decrypted first, if necessary.
2091 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
2092 (integer_t
)src_m
->object
, src_m
->offset
,
2093 (integer_t
)dest_m
->object
, dest_m
->offset
,
2096 VM_PAGE_CHECK(src_m
);
2097 VM_PAGE_CHECK(dest_m
);
2101 * The source page should not be encrypted at this point.
2102 * The destination page will therefore not contain encrypted
2103 * data after the copy.
2105 if (src_m
->encrypted
) {
2106 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
2108 dest_m
->encrypted
= FALSE
;
2110 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
2114 * Currently, this is a primitive allocator that grabs
2115 * free pages from the system, sorts them by physical
2116 * address, then searches for a region large enough to
2117 * satisfy the user's request.
2119 * Additional levels of effort:
2120 * + steal clean active/inactive pages
2121 * + force pageouts of dirty pages
2122 * + maintain a map of available physical
2128 * Check that the list of pages is ordered by
2129 * ascending physical address and has no holes.
2131 int vm_page_verify_contiguous(
2133 unsigned int npages
);
2136 vm_page_verify_contiguous(
2138 unsigned int npages
)
2140 register vm_page_t m
;
2141 unsigned int page_count
;
2142 vm_offset_t prev_addr
;
2144 prev_addr
= pages
->phys_page
;
2146 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
2147 if (m
->phys_page
!= prev_addr
+ 1) {
2148 printf("m 0x%x prev_addr 0x%x, current addr 0x%x\n",
2149 m
, prev_addr
, m
->phys_page
);
2150 printf("pages 0x%x page_count %d\n", pages
, page_count
);
2151 panic("vm_page_verify_contiguous: not contiguous!");
2153 prev_addr
= m
->phys_page
;
2156 if (page_count
!= npages
) {
2157 printf("pages 0x%x actual count 0x%x but requested 0x%x\n",
2158 pages
, page_count
, npages
);
2159 panic("vm_page_verify_contiguous: count error");
2163 #endif /* MACH_ASSERT */
2166 cpm_counter(unsigned int vpfls_pages_handled
= 0;)
2167 cpm_counter(unsigned int vpfls_head_insertions
= 0;)
2168 cpm_counter(unsigned int vpfls_tail_insertions
= 0;)
2169 cpm_counter(unsigned int vpfls_general_insertions
= 0;)
2170 cpm_counter(unsigned int vpfc_failed
= 0;)
2171 cpm_counter(unsigned int vpfc_satisfied
= 0;)
2174 * Find a region large enough to contain at least npages
2175 * of contiguous physical memory.
2178 * - Called while holding vm_page_queue_free_lock.
2179 * - Doesn't respect vm_page_free_reserved; caller
2180 * must not ask for more pages than are legal to grab.
2182 * Returns a pointer to a list of gobbled pages or VM_PAGE_NULL.
2185 * Loop over the free list, extracting one page at a time and
2186 * inserting those into a sorted sub-list. We stop as soon as
2187 * there's a contiguous range within the sorted list that can
2188 * satisfy the contiguous memory request. This contiguous sub-
2189 * list is chopped out of the sorted sub-list and the remainder
2190 * of the sorted sub-list is put back onto the beginning of the
2194 vm_page_find_contiguous(
2195 unsigned int contig_pages
)
2197 vm_page_t sort_list
;
2198 vm_page_t
*contfirstprev
, contlast
;
2200 ppnum_t prevcontaddr
;
2201 ppnum_t nextcontaddr
;
2202 unsigned int npages
;
2206 _mutex_assert(&vm_page_queue_free_lock
, MA_OWNED
);
2210 * Verify pages in the free list..
2213 for (m
= vm_page_queue_free
; m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
))
2215 if (npages
!= vm_page_free_count
)
2216 panic("vm_sort_free_list: prelim: npages %u free_count %d",
2217 npages
, vm_page_free_count
);
2218 #endif /* MACH_ASSERT */
2220 if (contig_pages
== 0 || vm_page_queue_free
== VM_PAGE_NULL
)
2221 return VM_PAGE_NULL
;
2223 #define PPNUM_PREV(x) (((x) > 0) ? ((x) - 1) : 0)
2224 #define PPNUM_NEXT(x) (((x) < PPNUM_MAX) ? ((x) + 1) : PPNUM_MAX)
2225 #define SET_NEXT_PAGE(m,n) ((m)->pageq.next = (struct queue_entry *) (n))
2228 contfirstprev
= &sort_list
;
2229 contlast
= sort_list
= vm_page_queue_free
;
2230 vm_page_queue_free
= NEXT_PAGE(sort_list
);
2231 SET_NEXT_PAGE(sort_list
, VM_PAGE_NULL
);
2232 prevcontaddr
= PPNUM_PREV(sort_list
->phys_page
);
2233 nextcontaddr
= PPNUM_NEXT(sort_list
->phys_page
);
2235 while (npages
< contig_pages
&&
2236 (m
= vm_page_queue_free
) != VM_PAGE_NULL
)
2238 cpm_counter(++vpfls_pages_handled
);
2240 /* prepend to existing run? */
2241 if (m
->phys_page
== prevcontaddr
)
2243 vm_page_queue_free
= NEXT_PAGE(m
);
2244 cpm_counter(++vpfls_head_insertions
);
2245 prevcontaddr
= PPNUM_PREV(prevcontaddr
);
2246 SET_NEXT_PAGE(m
, *contfirstprev
);
2249 continue; /* no tail expansion check needed */
2252 /* append to tail of existing run? */
2253 else if (m
->phys_page
== nextcontaddr
)
2255 vm_page_queue_free
= NEXT_PAGE(m
);
2256 cpm_counter(++vpfls_tail_insertions
);
2257 nextcontaddr
= PPNUM_NEXT(nextcontaddr
);
2258 SET_NEXT_PAGE(m
, NEXT_PAGE(contlast
));
2259 SET_NEXT_PAGE(contlast
, m
);
2264 /* prepend to the very front of sorted list? */
2265 else if (m
->phys_page
< sort_list
->phys_page
)
2267 vm_page_queue_free
= NEXT_PAGE(m
);
2268 cpm_counter(++vpfls_general_insertions
);
2269 prevcontaddr
= PPNUM_PREV(m
->phys_page
);
2270 nextcontaddr
= PPNUM_NEXT(m
->phys_page
);
2271 SET_NEXT_PAGE(m
, sort_list
);
2272 contfirstprev
= &sort_list
;
2273 contlast
= sort_list
= m
;
2277 else /* get to proper place for insertion */
2279 if (m
->phys_page
< nextcontaddr
)
2281 prevcontaddr
= PPNUM_PREV(sort_list
->phys_page
);
2282 nextcontaddr
= PPNUM_NEXT(sort_list
->phys_page
);
2283 contfirstprev
= &sort_list
;
2284 contlast
= sort_list
;
2287 for (m1
= NEXT_PAGE(contlast
);
2288 npages
< contig_pages
&&
2289 m1
!= VM_PAGE_NULL
&& m1
->phys_page
< m
->phys_page
;
2292 if (m1
->phys_page
!= nextcontaddr
) {
2293 prevcontaddr
= PPNUM_PREV(m1
->phys_page
);
2294 contfirstprev
= NEXT_PAGE_PTR(contlast
);
2299 nextcontaddr
= PPNUM_NEXT(m1
->phys_page
);
2304 * We may actually already have enough.
2305 * This could happen if a previous prepend
2306 * joined up two runs to meet our needs.
2307 * If so, bail before we take the current
2308 * page off the free queue.
2310 if (npages
== contig_pages
)
2313 if (m
->phys_page
!= nextcontaddr
)
2315 contfirstprev
= NEXT_PAGE_PTR(contlast
);
2316 prevcontaddr
= PPNUM_PREV(m
->phys_page
);
2317 nextcontaddr
= PPNUM_NEXT(m
->phys_page
);
2320 nextcontaddr
= PPNUM_NEXT(nextcontaddr
);
2323 vm_page_queue_free
= NEXT_PAGE(m
);
2324 cpm_counter(++vpfls_general_insertions
);
2325 SET_NEXT_PAGE(m
, NEXT_PAGE(contlast
));
2326 SET_NEXT_PAGE(contlast
, m
);
2330 /* See how many pages are now contiguous after the insertion */
2331 for (m1
= NEXT_PAGE(m
);
2332 npages
< contig_pages
&&
2333 m1
!= VM_PAGE_NULL
&& m1
->phys_page
== nextcontaddr
;
2336 nextcontaddr
= PPNUM_NEXT(nextcontaddr
);
2342 /* how did we do? */
2343 if (npages
== contig_pages
)
2345 cpm_counter(++vpfc_satisfied
);
2347 /* remove the contiguous range from the sorted list */
2349 *contfirstprev
= NEXT_PAGE(contlast
);
2350 SET_NEXT_PAGE(contlast
, VM_PAGE_NULL
);
2351 assert(vm_page_verify_contiguous(m
, npages
));
2353 /* inline vm_page_gobble() for each returned page */
2354 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
2356 assert(!m1
->wanted
);
2357 assert(!m1
->laundry
);
2359 m1
->no_isync
= TRUE
;
2362 vm_page_wire_count
+= npages
;
2363 vm_page_gobble_count
+= npages
;
2364 vm_page_free_count
-= npages
;
2366 /* stick free list at the tail of the sorted list */
2367 while ((m1
= *contfirstprev
) != VM_PAGE_NULL
)
2368 contfirstprev
= (vm_page_t
*)&m1
->pageq
.next
;
2369 *contfirstprev
= vm_page_queue_free
;
2372 vm_page_queue_free
= sort_list
;
2377 * Allocate a list of contiguous, wired pages.
2385 register vm_page_t m
;
2387 unsigned int npages
;
2388 unsigned int vm_pages_available
;
2391 if (size
% page_size
!= 0)
2392 return KERN_INVALID_ARGUMENT
;
2394 vm_page_lock_queues();
2395 mutex_lock(&vm_page_queue_free_lock
);
2398 * Should also take active and inactive pages
2399 * into account... One day...
2401 npages
= size
/ page_size
;
2402 vm_pages_available
= vm_page_free_count
- vm_page_free_reserved
;
2404 if (npages
> vm_pages_available
) {
2405 mutex_unlock(&vm_page_queue_free_lock
);
2406 vm_page_unlock_queues();
2407 return KERN_RESOURCE_SHORTAGE
;
2411 * Obtain a pointer to a subset of the free
2412 * list large enough to satisfy the request;
2413 * the region will be physically contiguous.
2415 pages
= vm_page_find_contiguous(npages
);
2417 /* adjust global freelist counts and determine need for wakeups */
2418 if (vm_page_free_count
< vm_page_free_count_minimum
)
2419 vm_page_free_count_minimum
= vm_page_free_count
;
2421 wakeup
= ((vm_page_free_count
< vm_page_free_min
) ||
2422 ((vm_page_free_count
< vm_page_free_target
) &&
2423 (vm_page_inactive_count
< vm_page_inactive_target
)));
2425 mutex_unlock(&vm_page_queue_free_lock
);
2427 if (pages
== VM_PAGE_NULL
) {
2428 vm_page_unlock_queues();
2429 return KERN_NO_SPACE
;
2433 * Walk the returned list, wiring the pages.
2436 for (m
= pages
; m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
2438 * Essentially inlined vm_page_wire.
2441 assert(!m
->inactive
);
2442 assert(!m
->private);
2443 assert(!m
->fictitious
);
2444 assert(m
->wire_count
== 0);
2448 --vm_page_gobble_count
;
2450 vm_page_unlock_queues();
2453 thread_wakeup((event_t
) &vm_page_free_wanted
);
2456 * The CPM pages should now be available and
2457 * ordered by ascending physical address.
2459 assert(vm_page_verify_contiguous(pages
, npages
));
2462 return KERN_SUCCESS
;
2466 #include <mach_vm_debug.h>
2469 #include <mach_debug/hash_info.h>
2470 #include <vm/vm_debug.h>
2473 * Routine: vm_page_info
2475 * Return information about the global VP table.
2476 * Fills the buffer with as much information as possible
2477 * and returns the desired size of the buffer.
2479 * Nothing locked. The caller should provide
2480 * possibly-pageable memory.
2485 hash_info_bucket_t
*info
,
2490 if (vm_page_bucket_count
< count
)
2491 count
= vm_page_bucket_count
;
2493 for (i
= 0; i
< count
; i
++) {
2494 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
2495 unsigned int bucket_count
= 0;
2498 simple_lock(&vm_page_bucket_lock
);
2499 for (m
= bucket
->pages
; m
!= VM_PAGE_NULL
; m
= m
->next
)
2501 simple_unlock(&vm_page_bucket_lock
);
2503 /* don't touch pageable memory while holding locks */
2504 info
[i
].hib_count
= bucket_count
;
2507 return vm_page_bucket_count
;
2509 #endif /* MACH_VM_DEBUG */
2511 #include <mach_kdb.h>
2514 #include <ddb/db_output.h>
2515 #include <vm/vm_print.h>
2516 #define printf kdbprintf
2519 * Routine: vm_page_print [exported]
2527 p
= (vm_page_t
) (long) db_addr
;
2529 iprintf("page 0x%x\n", p
);
2533 iprintf("object=0x%x", p
->object
);
2534 printf(", offset=0x%x", p
->offset
);
2535 printf(", wire_count=%d", p
->wire_count
);
2537 iprintf("%sinactive, %sactive, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
2538 (p
->inactive
? "" : "!"),
2539 (p
->active
? "" : "!"),
2540 (p
->gobbled
? "" : "!"),
2541 (p
->laundry
? "" : "!"),
2542 (p
->free
? "" : "!"),
2543 (p
->reference
? "" : "!"),
2544 (p
->encrypted
? "" : "!"));
2545 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
2546 (p
->busy
? "" : "!"),
2547 (p
->wanted
? "" : "!"),
2548 (p
->tabled
? "" : "!"),
2549 (p
->fictitious
? "" : "!"),
2550 (p
->private ? "" : "!"),
2551 (p
->precious
? "" : "!"));
2552 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
2553 (p
->absent
? "" : "!"),
2554 (p
->error
? "" : "!"),
2555 (p
->dirty
? "" : "!"),
2556 (p
->cleaning
? "" : "!"),
2557 (p
->pageout
? "" : "!"),
2558 (p
->clustered
? "" : "!"));
2559 iprintf("%slock_supplied, %soverwriting, %srestart, %sunusual\n",
2560 (p
->lock_supplied
? "" : "!"),
2561 (p
->overwriting
? "" : "!"),
2562 (p
->restart
? "" : "!"),
2563 (p
->unusual
? "" : "!"));
2565 iprintf("phys_page=0x%x", p
->phys_page
);
2566 printf(", page_error=0x%x", p
->page_error
);
2567 printf(", page_lock=0x%x", p
->page_lock
);
2568 printf(", unlock_request=%d\n", p
->unlock_request
);
2572 #endif /* MACH_KDB */