2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * Resident memory management module.
59 #include <mach/vm_prot.h>
60 #include <mach/vm_statistics.h>
61 #include <kern/counters.h>
62 #include <kern/sched_prim.h>
63 #include <kern/task.h>
64 #include <kern/thread.h>
65 #include <kern/zalloc.h>
68 #include <vm/vm_init.h>
69 #include <vm/vm_map.h>
70 #include <vm/vm_page.h>
71 #include <vm/vm_pageout.h>
72 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
73 #include <kern/misc_protos.h>
74 #include <zone_debug.h>
78 * Associated with page of user-allocatable memory is a
83 * These variables record the values returned by vm_page_bootstrap,
84 * for debugging purposes. The implementation of pmap_steal_memory
85 * and pmap_startup here also uses them internally.
88 vm_offset_t virtual_space_start
;
89 vm_offset_t virtual_space_end
;
93 * The vm_page_lookup() routine, which provides for fast
94 * (virtual memory object, offset) to page lookup, employs
95 * the following hash table. The vm_page_{insert,remove}
96 * routines install and remove associations in the table.
97 * [This table is often called the virtual-to-physical,
102 #if MACH_PAGE_HASH_STATS
103 int cur_count
; /* current count */
104 int hi_count
; /* high water mark */
105 #endif /* MACH_PAGE_HASH_STATS */
108 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
109 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
110 unsigned int vm_page_hash_mask
; /* Mask for hash function */
111 unsigned int vm_page_hash_shift
; /* Shift for hash function */
112 decl_simple_lock_data(,vm_page_bucket_lock
)
114 #if MACH_PAGE_HASH_STATS
115 /* This routine is only for debug. It is intended to be called by
116 * hand by a developer using a kernel debugger. This routine prints
117 * out vm_page_hash table statistics to the kernel debug console.
127 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
128 if (vm_page_buckets
[i
].hi_count
) {
130 highsum
+= vm_page_buckets
[i
].hi_count
;
131 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
132 maxdepth
= vm_page_buckets
[i
].hi_count
;
135 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
136 printf("Number used buckets: %d = %d%%\n",
137 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
138 printf("Number unused buckets: %d = %d%%\n",
139 vm_page_bucket_count
- numbuckets
,
140 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
141 printf("Sum of bucket max depth: %d\n", highsum
);
142 printf("Average bucket depth: %d.%2d\n",
143 highsum
/vm_page_bucket_count
,
144 highsum%vm_page_bucket_count
);
145 printf("Maximum bucket depth: %d\n", maxdepth
);
147 #endif /* MACH_PAGE_HASH_STATS */
150 * The virtual page size is currently implemented as a runtime
151 * variable, but is constant once initialized using vm_set_page_size.
152 * This initialization must be done in the machine-dependent
153 * bootstrap sequence, before calling other machine-independent
156 * All references to the virtual page size outside this
157 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
160 #ifndef PAGE_SIZE_FIXED
161 vm_size_t page_size
= 4096;
162 vm_size_t page_mask
= 4095;
164 #endif /* PAGE_SIZE_FIXED */
167 * Resident page structures are initialized from
168 * a template (see vm_page_alloc).
170 * When adding a new field to the virtual memory
171 * object structure, be sure to add initialization
172 * (see vm_page_bootstrap).
174 struct vm_page vm_page_template
;
177 * Resident pages that represent real memory
178 * are allocated from a free list.
180 vm_page_t vm_page_queue_free
;
181 vm_page_t vm_page_queue_fictitious
;
182 decl_mutex_data(,vm_page_queue_free_lock
)
183 unsigned int vm_page_free_wanted
;
184 int vm_page_free_count
;
185 int vm_page_fictitious_count
;
187 unsigned int vm_page_free_count_minimum
; /* debugging */
190 * Occasionally, the virtual memory system uses
191 * resident page structures that do not refer to
192 * real pages, for example to leave a page with
193 * important state information in the VP table.
195 * These page structures are allocated the way
196 * most other kernel structures are.
199 decl_mutex_data(,vm_page_alloc_lock
)
202 * Fictitious pages don't have a physical address,
203 * but we must initialize phys_addr to something.
204 * For debugging, this should be a strange value
205 * that the pmap module can recognize in assertions.
207 vm_offset_t vm_page_fictitious_addr
= (vm_offset_t
) -1;
210 * Resident page structures are also chained on
211 * queues that are used by the page replacement
212 * system (pageout daemon). These queues are
213 * defined here, but are shared by the pageout
216 queue_head_t vm_page_queue_active
;
217 queue_head_t vm_page_queue_inactive
;
218 decl_mutex_data(,vm_page_queue_lock
)
219 int vm_page_active_count
;
220 int vm_page_inactive_count
;
221 int vm_page_wire_count
;
222 int vm_page_gobble_count
= 0;
223 int vm_page_wire_count_warning
= 0;
224 int vm_page_gobble_count_warning
= 0;
226 /* the following fields are protected by the vm_page_queue_lock */
227 queue_head_t vm_page_queue_limbo
;
228 int vm_page_limbo_count
= 0; /* total pages in limbo */
229 int vm_page_limbo_real_count
= 0; /* real pages in limbo */
230 int vm_page_pin_count
= 0; /* number of pinned pages */
232 decl_simple_lock_data(,vm_page_preppin_lock
)
235 * Several page replacement parameters are also
236 * shared with this module, so that page allocation
237 * (done here in vm_page_alloc) can trigger the
240 int vm_page_free_target
= 0;
241 int vm_page_free_min
= 0;
242 int vm_page_inactive_target
= 0;
243 int vm_page_free_reserved
= 0;
244 int vm_page_laundry_count
= 0;
247 * The VM system has a couple of heuristics for deciding
248 * that pages are "uninteresting" and should be placed
249 * on the inactive queue as likely candidates for replacement.
250 * These variables let the heuristics be controlled at run-time
251 * to make experimentation easier.
254 boolean_t vm_page_deactivate_hint
= TRUE
;
259 * Sets the page size, perhaps based upon the memory
260 * size. Must be called before any use of page-size
261 * dependent functions.
263 * Sets page_shift and page_mask from page_size.
266 vm_set_page_size(void)
268 #ifndef PAGE_SIZE_FIXED
269 page_mask
= page_size
- 1;
271 if ((page_mask
& page_size
) != 0)
272 panic("vm_set_page_size: page size not a power of two");
274 for (page_shift
= 0; ; page_shift
++)
275 if ((1 << page_shift
) == page_size
)
277 #endif /* PAGE_SIZE_FIXED */
283 * Initializes the resident memory module.
285 * Allocates memory for the page cells, and
286 * for the object/offset-to-page hash table headers.
287 * Each page cell is initialized and placed on the free list.
288 * Returns the range of available kernel virtual memory.
296 register vm_page_t m
;
303 * Initialize the vm_page template.
306 m
= &vm_page_template
;
307 m
->object
= VM_OBJECT_NULL
; /* reset later */
308 m
->offset
= 0; /* reset later */
315 m
->reference
= FALSE
;
317 m
->list_req_pending
= FALSE
;
322 m
->fictitious
= FALSE
;
329 m
->clustered
= FALSE
;
330 m
->lock_supplied
= FALSE
;
335 m
->phys_addr
= 0; /* reset later */
337 m
->page_lock
= VM_PROT_NONE
;
338 m
->unlock_request
= VM_PROT_NONE
;
339 m
->page_error
= KERN_SUCCESS
;
342 * Initialize the page queues.
345 mutex_init(&vm_page_queue_free_lock
, ETAP_VM_PAGEQ_FREE
);
346 mutex_init(&vm_page_queue_lock
, ETAP_VM_PAGEQ
);
347 simple_lock_init(&vm_page_preppin_lock
, ETAP_VM_PREPPIN
);
349 vm_page_queue_free
= VM_PAGE_NULL
;
350 vm_page_queue_fictitious
= VM_PAGE_NULL
;
351 queue_init(&vm_page_queue_active
);
352 queue_init(&vm_page_queue_inactive
);
353 queue_init(&vm_page_queue_limbo
);
355 vm_page_free_wanted
= 0;
358 * Steal memory for the map and zone subsystems.
361 vm_map_steal_memory();
365 * Allocate (and initialize) the virtual-to-physical
366 * table hash buckets.
368 * The number of buckets should be a power of two to
369 * get a good hash function. The following computation
370 * chooses the first power of two that is greater
371 * than the number of physical pages in the system.
374 simple_lock_init(&vm_page_bucket_lock
, ETAP_VM_BUCKET
);
376 if (vm_page_bucket_count
== 0) {
377 unsigned int npages
= pmap_free_pages();
379 vm_page_bucket_count
= 1;
380 while (vm_page_bucket_count
< npages
)
381 vm_page_bucket_count
<<= 1;
384 vm_page_hash_mask
= vm_page_bucket_count
- 1;
387 * Calculate object shift value for hashing algorithm:
388 * O = log2(sizeof(struct vm_object))
389 * B = log2(vm_page_bucket_count)
390 * hash shifts the object left by
393 size
= vm_page_bucket_count
;
394 for (log1
= 0; size
> 1; log1
++)
396 size
= sizeof(struct vm_object
);
397 for (log2
= 0; size
> 1; log2
++)
399 vm_page_hash_shift
= log1
/2 - log2
+ 1;
401 if (vm_page_hash_mask
& vm_page_bucket_count
)
402 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
404 vm_page_buckets
= (vm_page_bucket_t
*)
405 pmap_steal_memory(vm_page_bucket_count
*
406 sizeof(vm_page_bucket_t
));
408 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
409 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
411 bucket
->pages
= VM_PAGE_NULL
;
412 #if MACH_PAGE_HASH_STATS
413 bucket
->cur_count
= 0;
414 bucket
->hi_count
= 0;
415 #endif /* MACH_PAGE_HASH_STATS */
419 * Machine-dependent code allocates the resident page table.
420 * It uses vm_page_init to initialize the page frames.
421 * The code also returns to us the virtual space available
422 * to the kernel. We don't trust the pmap module
423 * to get the alignment right.
426 pmap_startup(&virtual_space_start
, &virtual_space_end
);
427 virtual_space_start
= round_page(virtual_space_start
);
428 virtual_space_end
= trunc_page(virtual_space_end
);
430 *startp
= virtual_space_start
;
431 *endp
= virtual_space_end
;
434 * Compute the initial "wire" count.
435 * Up until now, the pages which have been set aside are not under
436 * the VM system's control, so although they aren't explicitly
437 * wired, they nonetheless can't be moved. At this moment,
438 * all VM managed pages are "free", courtesy of pmap_startup.
440 vm_page_wire_count
= atop(mem_size
) - vm_page_free_count
; /* initial value */
442 printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count
);
443 vm_page_free_count_minimum
= vm_page_free_count
;
446 #ifndef MACHINE_PAGES
448 * We implement pmap_steal_memory and pmap_startup with the help
449 * of two simpler functions, pmap_virtual_space and pmap_next_page.
456 vm_offset_t addr
, vaddr
, paddr
;
459 * We round the size to a round multiple.
462 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
465 * If this is the first call to pmap_steal_memory,
466 * we have to initialize ourself.
469 if (virtual_space_start
== virtual_space_end
) {
470 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
473 * The initial values must be aligned properly, and
474 * we don't trust the pmap module to do it right.
477 virtual_space_start
= round_page(virtual_space_start
);
478 virtual_space_end
= trunc_page(virtual_space_end
);
482 * Allocate virtual memory for this request.
485 addr
= virtual_space_start
;
486 virtual_space_start
+= size
;
488 kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr
, virtual_space_start
, size
); /* (TEST/DEBUG) */
491 * Allocate and map physical pages to back new virtual pages.
494 for (vaddr
= round_page(addr
);
496 vaddr
+= PAGE_SIZE
) {
497 if (!pmap_next_page(&paddr
))
498 panic("pmap_steal_memory");
501 * XXX Logically, these mappings should be wired,
502 * but some pmap modules barf if they are.
505 pmap_enter(kernel_pmap
, vaddr
, paddr
,
506 VM_PROT_READ
|VM_PROT_WRITE
, FALSE
);
508 * Account for newly stolen memory
510 vm_page_wire_count
++;
522 unsigned int i
, npages
, pages_initialized
;
527 * We calculate how many page frames we will have
528 * and then allocate the page structures in one chunk.
531 npages
= ((PAGE_SIZE
* pmap_free_pages() +
532 (round_page(virtual_space_start
) - virtual_space_start
)) /
533 (PAGE_SIZE
+ sizeof *pages
));
535 pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *pages
);
538 * Initialize the page frames.
541 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
542 if (!pmap_next_page(&paddr
))
545 vm_page_init(&pages
[i
], paddr
);
551 * Release pages in reverse order so that physical pages
552 * initially get allocated in ascending addresses. This keeps
553 * the devices (which must address physical memory) happy if
554 * they require several consecutive pages.
557 for (i
= pages_initialized
; i
> 0; i
--) {
558 vm_page_release(&pages
[i
- 1]);
562 * We have to re-align virtual_space_start,
563 * because pmap_steal_memory has been using it.
566 virtual_space_start
= round_page(virtual_space_start
);
568 *startp
= virtual_space_start
;
569 *endp
= virtual_space_end
;
571 #endif /* MACHINE_PAGES */
574 * Routine: vm_page_module_init
576 * Second initialization pass, to be done after
577 * the basic VM system is ready.
580 vm_page_module_init(void)
582 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
583 0, PAGE_SIZE
, "vm pages");
586 zone_debug_disable(vm_page_zone
);
587 #endif /* ZONE_DEBUG */
589 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
590 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
591 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
594 * Adjust zone statistics to account for the real pages allocated
595 * in vm_page_create(). [Q: is this really what we want?]
597 vm_page_zone
->count
+= vm_page_pages
;
598 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
600 mutex_init(&vm_page_alloc_lock
, ETAP_VM_PAGE_ALLOC
);
604 * Routine: vm_page_create
606 * After the VM system is up, machine-dependent code
607 * may stumble across more physical memory. For example,
608 * memory that it was reserving for a frame buffer.
609 * vm_page_create turns this memory into available pages.
620 for (paddr
= round_page(start
);
621 paddr
< trunc_page(end
);
622 paddr
+= PAGE_SIZE
) {
623 while ((m
= (vm_page_t
) vm_page_grab_fictitious())
625 vm_page_more_fictitious();
627 vm_page_init(m
, paddr
);
636 * Distributes the object/offset key pair among hash buckets.
638 * NOTE: To get a good hash function, the bucket count should
641 #define vm_page_hash(object, offset) (\
642 ( ((natural_t)(vm_offset_t)object<<vm_page_hash_shift) + (natural_t)atop(offset))\
646 * vm_page_insert: [ internal use only ]
648 * Inserts the given mem entry into the object/object-page
649 * table and object list.
651 * The object must be locked.
656 register vm_page_t mem
,
657 register vm_object_t object
,
658 register vm_object_offset_t offset
)
660 register vm_page_bucket_t
*bucket
;
663 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
664 (integer_t
)object
, (integer_t
)offset
, (integer_t
)mem
, 0,0);
669 panic("vm_page_insert");
671 assert(!object
->internal
|| offset
< object
->size
);
673 /* only insert "pageout" pages into "pageout" objects,
674 * and normal pages into normal objects */
675 assert(object
->pageout
== mem
->pageout
);
678 * Record the object/offset pair in this page
681 mem
->object
= object
;
682 mem
->offset
= offset
;
685 * Insert it into the object_object/offset hash table
688 bucket
= &vm_page_buckets
[vm_page_hash(object
, offset
)];
689 simple_lock(&vm_page_bucket_lock
);
690 mem
->next
= bucket
->pages
;
692 #if MACH_PAGE_HASH_STATS
693 if (++bucket
->cur_count
> bucket
->hi_count
)
694 bucket
->hi_count
= bucket
->cur_count
;
695 #endif /* MACH_PAGE_HASH_STATS */
696 simple_unlock(&vm_page_bucket_lock
);
699 * Now link into the object's list of backed pages.
702 queue_enter(&object
->memq
, mem
, vm_page_t
, listq
);
706 * Show that the object has one more resident page.
709 object
->resident_page_count
++;
715 * Exactly like vm_page_insert, except that we first
716 * remove any existing page at the given offset in object.
718 * The object and page queues must be locked.
723 register vm_page_t mem
,
724 register vm_object_t object
,
725 register vm_object_offset_t offset
)
727 register vm_page_bucket_t
*bucket
;
732 panic("vm_page_replace");
735 * Record the object/offset pair in this page
738 mem
->object
= object
;
739 mem
->offset
= offset
;
742 * Insert it into the object_object/offset hash table,
743 * replacing any page that might have been there.
746 bucket
= &vm_page_buckets
[vm_page_hash(object
, offset
)];
747 simple_lock(&vm_page_bucket_lock
);
749 vm_page_t
*mp
= &bucket
->pages
;
750 register vm_page_t m
= *mp
;
752 if (m
->object
== object
&& m
->offset
== offset
) {
754 * Remove page from bucket and from object,
755 * and return it to the free list.
758 queue_remove(&object
->memq
, m
, vm_page_t
,
761 object
->resident_page_count
--;
764 * Return page to the free list.
765 * Note the page is not tabled now, so this
766 * won't self-deadlock on the bucket lock.
774 mem
->next
= bucket
->pages
;
776 mem
->next
= VM_PAGE_NULL
;
779 simple_unlock(&vm_page_bucket_lock
);
782 * Now link into the object's list of backed pages.
785 queue_enter(&object
->memq
, mem
, vm_page_t
, listq
);
789 * And show that the object has one more resident
793 object
->resident_page_count
++;
797 * vm_page_remove: [ internal use only ]
799 * Removes the given mem entry from the object/offset-page
800 * table and the object page list.
802 * The object and page must be locked.
807 register vm_page_t mem
)
809 register vm_page_bucket_t
*bucket
;
810 register vm_page_t
this;
813 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
814 (integer_t
)mem
->object
, (integer_t
)mem
->offset
,
815 (integer_t
)mem
, 0,0);
818 assert(!mem
->cleaning
);
822 * Remove from the object_object/offset hash table
825 bucket
= &vm_page_buckets
[vm_page_hash(mem
->object
, mem
->offset
)];
826 simple_lock(&vm_page_bucket_lock
);
827 if ((this = bucket
->pages
) == mem
) {
828 /* optimize for common case */
830 bucket
->pages
= mem
->next
;
832 register vm_page_t
*prev
;
834 for (prev
= &this->next
;
835 (this = *prev
) != mem
;
840 #if MACH_PAGE_HASH_STATS
842 #endif /* MACH_PAGE_HASH_STATS */
843 simple_unlock(&vm_page_bucket_lock
);
846 * Now remove from the object's list of backed pages.
849 queue_remove(&mem
->object
->memq
, mem
, vm_page_t
, listq
);
852 * And show that the object has one fewer resident
856 mem
->object
->resident_page_count
--;
859 mem
->object
= VM_OBJECT_NULL
;
866 * Returns the page associated with the object/offset
867 * pair specified; if none is found, VM_PAGE_NULL is returned.
869 * The object must be locked. No side effects.
874 register vm_object_t object
,
875 register vm_object_offset_t offset
)
877 register vm_page_t mem
;
878 register vm_page_bucket_t
*bucket
;
881 * Search the hash table for this object/offset pair
884 bucket
= &vm_page_buckets
[vm_page_hash(object
, offset
)];
886 simple_lock(&vm_page_bucket_lock
);
887 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
889 if ((mem
->object
== object
) && (mem
->offset
== offset
))
892 simple_unlock(&vm_page_bucket_lock
);
899 * Move the given memory entry from its
900 * current object to the specified target object/offset.
902 * The object must be locked.
906 register vm_page_t mem
,
907 register vm_object_t new_object
,
908 vm_object_offset_t new_offset
)
910 assert(mem
->object
!= new_object
);
912 * Changes to mem->object require the page lock because
913 * the pageout daemon uses that lock to get the object.
917 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
918 (integer_t
)new_object
, (integer_t
)new_offset
,
919 (integer_t
)mem
, 0,0);
921 vm_page_lock_queues();
923 vm_page_insert(mem
, new_object
, new_offset
);
924 vm_page_unlock_queues();
930 * Initialize the fields in a new page.
931 * This takes a structure with random values and initializes it
932 * so that it can be given to vm_page_release or vm_page_insert.
937 vm_offset_t phys_addr
)
939 *mem
= vm_page_template
;
940 mem
->phys_addr
= phys_addr
;
944 * vm_page_grab_fictitious:
946 * Remove a fictitious page from the free list.
947 * Returns VM_PAGE_NULL if there are no free pages.
949 int c_vm_page_grab_fictitious
= 0;
950 int c_vm_page_release_fictitious
= 0;
951 int c_vm_page_more_fictitious
= 0;
954 vm_page_grab_fictitious(void)
956 register vm_page_t m
;
958 m
= (vm_page_t
)zget(vm_page_zone
);
961 vm_page_init(m
, vm_page_fictitious_addr
);
962 m
->fictitious
= TRUE
;
965 c_vm_page_grab_fictitious
++;
970 * vm_page_release_fictitious:
972 * Release a fictitious page to the free list.
976 vm_page_release_fictitious(
977 register vm_page_t m
)
981 assert(m
->fictitious
);
982 assert(m
->phys_addr
== vm_page_fictitious_addr
);
984 c_vm_page_release_fictitious
++;
987 panic("vm_page_release_fictitious");
989 zfree(vm_page_zone
, (vm_offset_t
)m
);
993 * vm_page_more_fictitious:
995 * Add more fictitious pages to the free list.
996 * Allowed to block. This routine is way intimate
997 * with the zones code, for several reasons:
998 * 1. we need to carve some page structures out of physical
999 * memory before zones work, so they _cannot_ come from
1001 * 2. the zone needs to be collectable in order to prevent
1002 * growth without bound. These structures are used by
1003 * the device pager (by the hundreds and thousands), as
1004 * private pages for pageout, and as blocking pages for
1005 * pagein. Temporary bursts in demand should not result in
1006 * permanent allocation of a resource.
1007 * 3. To smooth allocation humps, we allocate single pages
1008 * with kernel_memory_allocate(), and cram them into the
1009 * zone. This also allows us to initialize the vm_page_t's
1010 * on the way into the zone, so that zget() always returns
1011 * an initialized structure. The zone free element pointer
1012 * and the free page pointer are both the first item in the
1014 * 4. By having the pages in the zone pre-initialized, we need
1015 * not keep 2 levels of lists. The garbage collector simply
1016 * scans our list, and reduces physical memory usage as it
1020 void vm_page_more_fictitious(void)
1022 extern vm_map_t zone_map
;
1023 register vm_page_t m
;
1025 kern_return_t retval
;
1028 c_vm_page_more_fictitious
++;
1030 /* this may free up some fictitious pages */
1031 cleanup_limbo_queue();
1034 * Allocate a single page from the zone_map. Do not wait if no physical
1035 * pages are immediately available, and do not zero the space. We need
1036 * our own blocking lock here to prevent having multiple,
1037 * simultaneous requests from piling up on the zone_map lock. Exactly
1038 * one (of our) threads should be potentially waiting on the map lock.
1039 * If winner is not vm-privileged, then the page allocation will fail,
1040 * and it will temporarily block here in the vm_page_wait().
1042 mutex_lock(&vm_page_alloc_lock
);
1044 * If another thread allocated space, just bail out now.
1046 if (zone_free_count(vm_page_zone
) > 5) {
1048 * The number "5" is a small number that is larger than the
1049 * number of fictitious pages that any single caller will
1050 * attempt to allocate. Otherwise, a thread will attempt to
1051 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1052 * release all of the resources and locks already acquired,
1053 * and then call this routine. This routine finds the pages
1054 * that the caller released, so fails to allocate new space.
1055 * The process repeats infinitely. The largest known number
1056 * of fictitious pages required in this manner is 2. 5 is
1057 * simply a somewhat larger number.
1059 mutex_unlock(&vm_page_alloc_lock
);
1063 if ((retval
= kernel_memory_allocate(zone_map
,
1064 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1065 KMA_KOBJECT
|KMA_NOPAGEWAIT
)) != KERN_SUCCESS
) {
1067 * No page was available. Tell the pageout daemon, drop the
1068 * lock to give another thread a chance at it, and
1069 * wait for the pageout daemon to make progress.
1071 mutex_unlock(&vm_page_alloc_lock
);
1072 vm_page_wait(THREAD_UNINT
);
1076 * Initialize as many vm_page_t's as will fit on this page. This
1077 * depends on the zone code disturbing ONLY the first item of
1078 * each zone element.
1080 m
= (vm_page_t
)addr
;
1081 for (i
= PAGE_SIZE
/sizeof(struct vm_page
); i
> 0; i
--) {
1082 vm_page_init(m
, vm_page_fictitious_addr
);
1083 m
->fictitious
= TRUE
;
1086 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
1087 mutex_unlock(&vm_page_alloc_lock
);
1093 * Attempt to convert a fictitious page into a real page.
1098 register vm_page_t m
)
1100 register vm_page_t real_m
;
1103 assert(m
->fictitious
);
1106 real_m
= vm_page_grab();
1107 if (real_m
== VM_PAGE_NULL
)
1110 m
->phys_addr
= real_m
->phys_addr
;
1111 m
->fictitious
= FALSE
;
1113 vm_page_lock_queues();
1115 vm_page_active_count
++;
1116 else if (m
->inactive
)
1117 vm_page_inactive_count
++;
1118 vm_page_unlock_queues();
1120 real_m
->phys_addr
= vm_page_fictitious_addr
;
1121 real_m
->fictitious
= TRUE
;
1123 vm_page_release_fictitious(real_m
);
1130 * Return true if it is not likely that a non-vm_privileged thread
1131 * can get memory without blocking. Advisory only, since the
1132 * situation may change under us.
1137 /* No locking, at worst we will fib. */
1138 return( vm_page_free_count
< vm_page_free_reserved
);
1144 * Remove a page from the free list.
1145 * Returns VM_PAGE_NULL if the free list is too small.
1148 unsigned long vm_page_grab_count
= 0; /* measure demand */
1153 register vm_page_t mem
;
1155 mutex_lock(&vm_page_queue_free_lock
);
1156 vm_page_grab_count
++;
1159 * Optionally produce warnings if the wire or gobble
1160 * counts exceed some threshold.
1162 if (vm_page_wire_count_warning
> 0
1163 && vm_page_wire_count
>= vm_page_wire_count_warning
) {
1164 printf("mk: vm_page_grab(): high wired page count of %d\n",
1165 vm_page_wire_count
);
1166 assert(vm_page_wire_count
< vm_page_wire_count_warning
);
1168 if (vm_page_gobble_count_warning
> 0
1169 && vm_page_gobble_count
>= vm_page_gobble_count_warning
) {
1170 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1171 vm_page_gobble_count
);
1172 assert(vm_page_gobble_count
< vm_page_gobble_count_warning
);
1176 * Only let privileged threads (involved in pageout)
1177 * dip into the reserved pool.
1180 if ((vm_page_free_count
< vm_page_free_reserved
) &&
1181 !current_thread()->vm_privilege
) {
1182 mutex_unlock(&vm_page_queue_free_lock
);
1184 goto wakeup_pageout
;
1187 while (vm_page_queue_free
== VM_PAGE_NULL
) {
1188 printf("vm_page_grab: no free pages, trouble expected...\n");
1189 mutex_unlock(&vm_page_queue_free_lock
);
1191 mutex_lock(&vm_page_queue_free_lock
);
1194 if (--vm_page_free_count
< vm_page_free_count_minimum
)
1195 vm_page_free_count_minimum
= vm_page_free_count
;
1196 mem
= vm_page_queue_free
;
1197 vm_page_queue_free
= (vm_page_t
) mem
->pageq
.next
;
1199 mutex_unlock(&vm_page_queue_free_lock
);
1202 * Decide if we should poke the pageout daemon.
1203 * We do this if the free count is less than the low
1204 * water mark, or if the free count is less than the high
1205 * water mark (but above the low water mark) and the inactive
1206 * count is less than its target.
1208 * We don't have the counts locked ... if they change a little,
1209 * it doesn't really matter.
1213 if ((vm_page_free_count
< vm_page_free_min
) ||
1214 ((vm_page_free_count
< vm_page_free_target
) &&
1215 (vm_page_inactive_count
< vm_page_inactive_target
)))
1216 thread_wakeup((event_t
) &vm_page_free_wanted
);
1218 // dbgLog(mem->phys_addr, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1226 * Return a page to the free list.
1231 register vm_page_t mem
)
1233 assert(!mem
->private && !mem
->fictitious
);
1235 // dbgLog(mem->phys_addr, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1237 mutex_lock(&vm_page_queue_free_lock
);
1239 panic("vm_page_release");
1241 mem
->pageq
.next
= (queue_entry_t
) vm_page_queue_free
;
1242 vm_page_queue_free
= mem
;
1243 vm_page_free_count
++;
1246 * Check if we should wake up someone waiting for page.
1247 * But don't bother waking them unless they can allocate.
1249 * We wakeup only one thread, to prevent starvation.
1250 * Because the scheduling system handles wait queues FIFO,
1251 * if we wakeup all waiting threads, one greedy thread
1252 * can starve multiple niceguy threads. When the threads
1253 * all wakeup, the greedy threads runs first, grabs the page,
1254 * and waits for another page. It will be the first to run
1255 * when the next page is freed.
1257 * However, there is a slight danger here.
1258 * The thread we wake might not use the free page.
1259 * Then the other threads could wait indefinitely
1260 * while the page goes unused. To forestall this,
1261 * the pageout daemon will keep making free pages
1262 * as long as vm_page_free_wanted is non-zero.
1265 if ((vm_page_free_wanted
> 0) &&
1266 (vm_page_free_count
>= vm_page_free_reserved
)) {
1267 vm_page_free_wanted
--;
1268 thread_wakeup_one((event_t
) &vm_page_free_count
);
1271 mutex_unlock(&vm_page_queue_free_lock
);
1275 * Release a page to the limbo list.
1276 * Put real pages at the head of the queue, fictitious at the tail.
1277 * Page queues must be locked.
1280 vm_page_release_limbo(
1281 register vm_page_t m
)
1284 vm_page_limbo_count
++;
1285 if (m
->fictitious
) {
1286 queue_enter(&vm_page_queue_limbo
, m
, vm_page_t
, pageq
);
1288 vm_page_limbo_real_count
++;
1289 queue_enter_first(&vm_page_queue_limbo
, m
, vm_page_t
, pageq
);
1294 * Exchange a real page in limbo (limbo_m) with a fictitious page (new_m).
1295 * The end result is that limbo_m is fictitious and still in limbo, and new_m
1296 * is the real page. The prep and pin counts remain with the page in limbo
1297 * although they will be briefly cleared by vm_page_init. This is OK since
1298 * there will be no interrupt-level interactions (the page is in limbo) and
1299 * vm_page_unprep must lock the page queues before changing the prep count.
1301 * Page queues must be locked, and limbo_m must have been removed from its
1305 vm_page_limbo_exchange(
1306 register vm_page_t limbo_m
,
1307 register vm_page_t new_m
)
1309 assert(limbo_m
->limbo
&& !limbo_m
->fictitious
);
1310 assert(!limbo_m
->tabled
);
1311 assert(new_m
->fictitious
);
1314 vm_page_init(limbo_m
, vm_page_fictitious_addr
);
1316 limbo_m
->fictitious
= TRUE
;
1317 limbo_m
->limbo
= TRUE
;
1318 new_m
->limbo
= FALSE
;
1320 limbo_m
->prep_pin_count
= new_m
->prep_pin_count
;
1321 new_m
->prep_pin_count
= 0;
1327 * Wait for a page to become available.
1328 * If there are plenty of free pages, then we don't sleep.
1331 * TRUE: There may be another page, try again
1332 * FALSE: We were interrupted out of our wait, don't try again
1340 * We can't use vm_page_free_reserved to make this
1341 * determination. Consider: some thread might
1342 * need to allocate two pages. The first allocation
1343 * succeeds, the second fails. After the first page is freed,
1344 * a call to vm_page_wait must really block.
1346 kern_return_t wait_result
;
1348 mutex_lock(&vm_page_queue_free_lock
);
1349 if (vm_page_free_count
< vm_page_free_target
) {
1350 if (vm_page_free_wanted
++ == 0)
1351 thread_wakeup((event_t
)&vm_page_free_wanted
);
1352 assert_wait((event_t
)&vm_page_free_count
, interruptible
);
1353 mutex_unlock(&vm_page_queue_free_lock
);
1354 counter(c_vm_page_wait_block
++);
1355 wait_result
= thread_block((void (*)(void))0);
1356 return(wait_result
== THREAD_AWAKENED
);
1358 mutex_unlock(&vm_page_queue_free_lock
);
1366 * Allocate and return a memory cell associated
1367 * with this VM object/offset pair.
1369 * Object must be locked.
1375 vm_object_offset_t offset
)
1377 register vm_page_t mem
;
1379 mem
= vm_page_grab();
1380 if (mem
== VM_PAGE_NULL
)
1381 return VM_PAGE_NULL
;
1383 vm_page_insert(mem
, object
, offset
);
1388 int c_limbo_page_free
= 0; /* debugging */
1389 int c_limbo_convert
= 0; /* debugging */
1390 counter(unsigned int c_laundry_pages_freed
= 0;)
1392 int vm_pagein_cluster_unused
= 0;
1393 boolean_t vm_page_free_verify
= FALSE
;
1397 * Returns the given page to the free list,
1398 * disassociating it with any VM object.
1400 * Object and page queues must be locked prior to entry.
1404 register vm_page_t mem
)
1406 vm_object_t object
= mem
->object
;
1409 assert(!mem
->cleaning
);
1410 assert(!mem
->pageout
);
1411 assert(!vm_page_free_verify
|| pmap_verify_free(mem
->phys_addr
));
1414 vm_page_remove(mem
); /* clears tabled, object, offset */
1415 VM_PAGE_QUEUES_REMOVE(mem
); /* clears active or inactive */
1417 if (mem
->clustered
) {
1418 mem
->clustered
= FALSE
;
1419 vm_pagein_cluster_unused
++;
1422 if (mem
->wire_count
) {
1423 if (!mem
->private && !mem
->fictitious
)
1424 vm_page_wire_count
--;
1425 mem
->wire_count
= 0;
1426 assert(!mem
->gobbled
);
1427 } else if (mem
->gobbled
) {
1428 if (!mem
->private && !mem
->fictitious
)
1429 vm_page_wire_count
--;
1430 vm_page_gobble_count
--;
1432 mem
->gobbled
= FALSE
;
1435 extern int vm_page_laundry_min
;
1436 vm_page_laundry_count
--;
1437 mem
->laundry
= FALSE
; /* laundry is now clear */
1438 counter(++c_laundry_pages_freed
);
1439 if (vm_page_laundry_count
< vm_page_laundry_min
) {
1440 vm_page_laundry_min
= 0;
1441 thread_wakeup((event_t
) &vm_page_laundry_count
);
1445 mem
->discard_request
= FALSE
;
1447 PAGE_WAKEUP(mem
); /* clears wanted */
1450 vm_object_absent_release(object
);
1454 * The pageout daemon put this page into limbo and then freed
1455 * it. The page has already been removed from the object and
1456 * queues, so any attempt to look it up will fail. Put it
1457 * on the limbo queue; the pageout daemon will convert it to a
1458 * fictitious page and/or free the real one later.
1460 /* assert that it came from pageout daemon (how?) */
1461 assert(!mem
->fictitious
&& !mem
->absent
);
1462 c_limbo_page_free
++;
1463 vm_page_release_limbo(mem
);
1466 assert(mem
->prep_pin_count
== 0);
1468 /* Some of these may be unnecessary */
1470 mem
->unlock_request
= 0;
1472 mem
->absent
= FALSE
;
1475 mem
->precious
= FALSE
;
1476 mem
->reference
= FALSE
;
1478 mem
->page_error
= KERN_SUCCESS
;
1481 mem
->private = FALSE
;
1482 mem
->fictitious
= TRUE
;
1483 mem
->phys_addr
= vm_page_fictitious_addr
;
1485 if (mem
->fictitious
) {
1486 vm_page_release_fictitious(mem
);
1488 vm_page_init(mem
, mem
->phys_addr
);
1489 vm_page_release(mem
);
1496 * Mark this page as wired down by yet
1497 * another map, removing it from paging queues
1500 * The page's object and the page queues must be locked.
1504 register vm_page_t mem
)
1507 // dbgLog(current_act(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1511 if (mem
->wire_count
== 0) {
1512 VM_PAGE_QUEUES_REMOVE(mem
);
1513 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
1514 vm_page_wire_count
++;
1516 vm_page_gobble_count
--;
1517 mem
->gobbled
= FALSE
;
1519 assert(!mem
->gobbled
);
1526 * Mark this page as consumed by the vm/ipc/xmm subsystems.
1528 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
1532 register vm_page_t mem
)
1534 vm_page_lock_queues();
1537 assert(!mem
->gobbled
);
1538 assert(mem
->wire_count
== 0);
1540 if (!mem
->gobbled
&& mem
->wire_count
== 0) {
1541 if (!mem
->private && !mem
->fictitious
)
1542 vm_page_wire_count
++;
1544 vm_page_gobble_count
++;
1545 mem
->gobbled
= TRUE
;
1546 vm_page_unlock_queues();
1552 * Release one wiring of this page, potentially
1553 * enabling it to be paged again.
1555 * The page's object and the page queues must be locked.
1559 register vm_page_t mem
)
1562 // dbgLog(current_act(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1565 assert(mem
->wire_count
> 0);
1567 if (--mem
->wire_count
== 0) {
1568 assert(!mem
->private && !mem
->fictitious
);
1569 vm_page_wire_count
--;
1570 queue_enter(&vm_page_queue_active
, mem
, vm_page_t
, pageq
);
1571 vm_page_active_count
++;
1573 mem
->reference
= TRUE
;
1578 * vm_page_deactivate:
1580 * Returns the given page to the inactive list,
1581 * indicating that no physical maps have access
1582 * to this page. [Used by the physical mapping system.]
1584 * The page queues must be locked.
1588 register vm_page_t m
)
1592 // dbgLog(m->phys_addr, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
1595 * This page is no longer very interesting. If it was
1596 * interesting (active or inactive/referenced), then we
1597 * clear the reference bit and (re)enter it in the
1598 * inactive queue. Note wired pages should not have
1599 * their reference bit cleared.
1601 if (m
->gobbled
) { /* can this happen? */
1602 assert(m
->wire_count
== 0);
1603 if (!m
->private && !m
->fictitious
)
1604 vm_page_wire_count
--;
1605 vm_page_gobble_count
--;
1608 if (m
->private || (m
->wire_count
!= 0))
1610 if (m
->active
|| (m
->inactive
&& m
->reference
)) {
1611 if (!m
->fictitious
&& !m
->absent
)
1612 pmap_clear_reference(m
->phys_addr
);
1613 m
->reference
= FALSE
;
1614 VM_PAGE_QUEUES_REMOVE(m
);
1616 if (m
->wire_count
== 0 && !m
->inactive
) {
1617 queue_enter(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
1620 vm_page_inactive_count
++;
1627 * Put the specified page on the active list (if appropriate).
1629 * The page queues must be locked.
1634 register vm_page_t m
)
1639 assert(m
->wire_count
== 0);
1640 if (!m
->private && !m
->fictitious
)
1641 vm_page_wire_count
--;
1642 vm_page_gobble_count
--;
1649 queue_remove(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
1651 vm_page_inactive_count
--;
1652 m
->inactive
= FALSE
;
1654 if (m
->wire_count
== 0) {
1656 panic("vm_page_activate: already active");
1658 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
1660 m
->reference
= TRUE
;
1662 vm_page_active_count
++;
1667 * vm_page_part_zero_fill:
1669 * Zero-fill a part of the page.
1672 vm_page_part_zero_fill(
1680 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
1681 pmap_zero_part_page(m
->phys_addr
, m_pa
, len
);
1684 tmp
= vm_page_grab();
1685 if (tmp
== VM_PAGE_NULL
) {
1686 vm_page_wait(THREAD_UNINT
);
1691 vm_page_zero_fill(tmp
);
1693 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
1695 if((m_pa
+ len
) < PAGE_SIZE
) {
1696 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
1697 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
1699 vm_page_copy(tmp
,m
);
1700 vm_page_lock_queues();
1702 vm_page_unlock_queues();
1708 * vm_page_zero_fill:
1710 * Zero-fill the specified page.
1717 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
1718 (integer_t
)m
->object
, (integer_t
)m
->offset
, (integer_t
)m
, 0,0);
1722 pmap_zero_page(m
->phys_addr
);
1726 * vm_page_part_copy:
1728 * copy part of one page to another
1739 VM_PAGE_CHECK(src_m
);
1740 VM_PAGE_CHECK(dst_m
);
1742 pmap_copy_part_page(src_m
->phys_addr
, src_pa
,
1743 dst_m
->phys_addr
, dst_pa
, len
);
1749 * Copy one page to another
1758 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
1759 (integer_t
)src_m
->object
, src_m
->offset
,
1760 (integer_t
)dest_m
->object
, dest_m
->offset
,
1763 VM_PAGE_CHECK(src_m
);
1764 VM_PAGE_CHECK(dest_m
);
1766 pmap_copy_page(src_m
->phys_addr
, dest_m
->phys_addr
);
1770 * Limbo pages are placed on the limbo queue to await their prep count
1771 * going to zero. A page is put into limbo by the pageout daemon. If the
1772 * page is real, then the pageout daemon did not need to page out the page,
1773 * it just freed it. When the prep_pin_count is zero the page can be freed.
1774 * Real pages with a non-zero prep count are converted to fictitious pages
1775 * so that the memory can be reclaimed; the fictitious page will remain on
1776 * the limbo queue until its prep count reaches zero.
1778 * cleanup_limbo_queue is called by vm_page_more_fictitious and the pageout
1779 * daemon since it can free both real and fictitious pages.
1780 * It returns the number of fictitious pages freed.
1783 cleanup_limbo_queue(void)
1785 register vm_page_t free_m
, m
;
1786 vm_offset_t phys_addr
;
1788 vm_page_lock_queues();
1789 assert(vm_page_limbo_count
>= vm_page_limbo_real_count
);
1792 * first free up all pages with prep/pin counts of zero. This
1793 * may free both real and fictitious pages, which may be needed
1794 * later to convert real ones.
1796 m
= (vm_page_t
)queue_first(&vm_page_queue_limbo
);
1797 while (!queue_end(&vm_page_queue_limbo
, (queue_entry_t
)m
)) {
1798 if (m
->prep_pin_count
== 0) {
1800 m
= (vm_page_t
)queue_next(&m
->pageq
);
1801 queue_remove(&vm_page_queue_limbo
, free_m
, vm_page_t
,
1803 vm_page_limbo_count
--;
1804 if (!free_m
->fictitious
)
1805 vm_page_limbo_real_count
--;
1806 free_m
->limbo
= FALSE
;
1807 vm_page_free(free_m
);
1808 assert(vm_page_limbo_count
>= 0);
1809 assert(vm_page_limbo_real_count
>= 0);
1811 m
= (vm_page_t
)queue_next(&m
->pageq
);
1816 * now convert any remaining real pages to fictitious and free the
1819 while (vm_page_limbo_real_count
> 0) {
1820 queue_remove_first(&vm_page_queue_limbo
, m
, vm_page_t
, pageq
);
1821 assert(!m
->fictitious
);
1825 * Try to get a fictitious page. If impossible,
1826 * requeue the real one and give up.
1828 free_m
= vm_page_grab_fictitious();
1829 if (free_m
== VM_PAGE_NULL
) {
1830 queue_enter_first(&vm_page_queue_limbo
, m
, vm_page_t
,
1835 vm_page_limbo_exchange(m
, free_m
);
1836 assert(m
->limbo
&& m
->fictitious
);
1837 assert(!free_m
->limbo
&& !free_m
->fictitious
);
1838 queue_enter(&vm_page_queue_limbo
, m
, vm_page_t
, pageq
);
1839 vm_page_free(free_m
);
1840 vm_page_limbo_real_count
--;
1843 vm_page_unlock_queues();
1847 * Increment prep_count on a page.
1848 * Must be called in thread context. Page must not disappear: object
1853 register vm_page_t m
)
1855 kern_return_t retval
= KERN_SUCCESS
;
1857 assert(m
!= VM_PAGE_NULL
);
1858 vm_page_lock_queues();
1859 if (!m
->busy
&& !m
->error
&& !m
->fictitious
&& !m
->absent
) {
1860 if (m
->prep_pin_count
!= 0) {
1863 vm_page_pin_unlock();
1867 assert(m
->prep_count
!= 0); /* check for wraparound */
1869 retval
= KERN_FAILURE
;
1871 vm_page_unlock_queues();
1877 * Pin a page (increment pin count).
1878 * Must have been previously prepped.
1880 * MUST BE CALLED AT SPLVM.
1882 * May be called from thread or interrupt context.
1883 * If page is in "limbo" it cannot be pinned.
1887 register vm_page_t m
)
1889 kern_return_t retval
= KERN_SUCCESS
;
1891 assert(m
!= VM_PAGE_NULL
);
1893 if (m
->limbo
|| m
->prep_count
== 0) {
1894 retval
= KERN_FAILURE
;
1896 assert(!m
->fictitious
);
1897 if (m
->pin_count
== 0)
1898 vm_page_pin_count
++;
1901 vm_page_pin_unlock();
1907 * Unprep a page (decrement prep count).
1908 * Must have been previously prepped.
1909 * Called to decrement prep count after an attempt to pin failed.
1910 * Must be called from thread context.
1914 register vm_page_t m
)
1916 kern_return_t retval
= KERN_SUCCESS
;
1918 assert(m
!= VM_PAGE_NULL
);
1919 vm_page_lock_queues();
1921 assert(m
->prep_count
!= 0);
1922 if (m
->prep_count
== 0)
1923 retval
= KERN_FAILURE
; /* shouldn't happen */
1926 vm_page_pin_unlock();
1927 vm_page_unlock_queues();
1933 * Unpin a page: decrement pin AND prep counts.
1934 * Must have been previously prepped AND pinned.
1936 * MUST BE CALLED AT SPLVM.
1938 * May be called from thread or interrupt context.
1942 register vm_page_t m
)
1944 kern_return_t retval
= KERN_SUCCESS
;
1946 assert(m
!= VM_PAGE_NULL
);
1948 assert(m
->prep_count
!= 0 && m
->pin_count
!= 0);
1949 assert(m
->prep_count
>= m
->pin_count
);
1950 assert(!m
->limbo
&& !m
->fictitious
);
1951 if (m
->prep_count
!= 0 && m
->pin_count
!= 0) {
1954 if (m
->pin_count
== 0)
1955 vm_page_pin_count
--;
1957 retval
= KERN_FAILURE
; /* shouldn't happen */
1959 vm_page_pin_unlock();
1964 * Currently, this is a primitive allocator that grabs
1965 * free pages from the system, sorts them by physical
1966 * address, then searches for a region large enough to
1967 * satisfy the user's request.
1969 * Additional levels of effort:
1970 * + steal clean active/inactive pages
1971 * + force pageouts of dirty pages
1972 * + maintain a map of available physical
1976 #define SET_NEXT_PAGE(m,n) ((m)->pageq.next = (struct queue_entry *) (n))
1979 int vm_page_verify_contiguous(
1981 unsigned int npages
);
1982 #endif /* MACH_ASSERT */
1984 cpm_counter(unsigned int vpfls_pages_handled
= 0;)
1985 cpm_counter(unsigned int vpfls_head_insertions
= 0;)
1986 cpm_counter(unsigned int vpfls_tail_insertions
= 0;)
1987 cpm_counter(unsigned int vpfls_general_insertions
= 0;)
1988 cpm_counter(unsigned int vpfc_failed
= 0;)
1989 cpm_counter(unsigned int vpfc_satisfied
= 0;)
1992 * Sort free list by ascending physical address,
1993 * using a not-particularly-bright sort algorithm.
1994 * Caller holds vm_page_queue_free_lock.
1997 vm_page_free_list_sort(void)
1999 vm_page_t sort_list
;
2000 vm_page_t sort_list_end
;
2001 vm_page_t m
, m1
, *prev
, next_m
;
2004 unsigned int npages
;
2006 #endif /* MACH_ASSERT */
2010 * Verify pages in the free list..
2013 for (m
= vm_page_queue_free
; m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
))
2015 if (npages
!= vm_page_free_count
)
2016 panic("vm_sort_free_list: prelim: npages %d free_count %d",
2017 npages
, vm_page_free_count
);
2018 old_free_count
= vm_page_free_count
;
2019 #endif /* MACH_ASSERT */
2021 sort_list
= sort_list_end
= vm_page_queue_free
;
2022 m
= NEXT_PAGE(vm_page_queue_free
);
2023 SET_NEXT_PAGE(vm_page_queue_free
, VM_PAGE_NULL
);
2024 cpm_counter(vpfls_pages_handled
= 0);
2025 while (m
!= VM_PAGE_NULL
) {
2026 cpm_counter(++vpfls_pages_handled
);
2027 next_m
= NEXT_PAGE(m
);
2028 if (m
->phys_addr
< sort_list
->phys_addr
) {
2029 cpm_counter(++vpfls_head_insertions
);
2030 SET_NEXT_PAGE(m
, sort_list
);
2032 } else if (m
->phys_addr
> sort_list_end
->phys_addr
) {
2033 cpm_counter(++vpfls_tail_insertions
);
2034 SET_NEXT_PAGE(sort_list_end
, m
);
2035 SET_NEXT_PAGE(m
, VM_PAGE_NULL
);
2038 cpm_counter(++vpfls_general_insertions
);
2039 /* general sorted list insertion */
2041 for (m1
=sort_list
; m1
!=VM_PAGE_NULL
; m1
=NEXT_PAGE(m1
)) {
2042 if (m1
->phys_addr
> m
->phys_addr
) {
2044 panic("vm_sort_free_list: ugh");
2045 SET_NEXT_PAGE(m
, *prev
);
2049 prev
= (vm_page_t
*) &m1
->pageq
.next
;
2057 * Verify that pages are sorted into ascending order.
2059 for (m
= sort_list
, npages
= 0; m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
2060 if (m
!= sort_list
&&
2061 m
->phys_addr
<= addr
) {
2062 printf("m 0x%x addr 0x%x\n", m
, addr
);
2063 panic("vm_sort_free_list");
2065 addr
= m
->phys_addr
;
2068 if (old_free_count
!= vm_page_free_count
)
2069 panic("vm_sort_free_list: old_free %d free_count %d",
2070 old_free_count
, vm_page_free_count
);
2071 if (npages
!= vm_page_free_count
)
2072 panic("vm_sort_free_list: npages %d free_count %d",
2073 npages
, vm_page_free_count
);
2074 #endif /* MACH_ASSERT */
2076 vm_page_queue_free
= sort_list
;
2082 * Check that the list of pages is ordered by
2083 * ascending physical address and has no holes.
2086 vm_page_verify_contiguous(
2088 unsigned int npages
)
2090 register vm_page_t m
;
2091 unsigned int page_count
;
2092 vm_offset_t prev_addr
;
2094 prev_addr
= pages
->phys_addr
;
2096 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
2097 if (m
->phys_addr
!= prev_addr
+ page_size
) {
2098 printf("m 0x%x prev_addr 0x%x, current addr 0x%x\n",
2099 m
, prev_addr
, m
->phys_addr
);
2100 printf("pages 0x%x page_count %d\n", pages
, page_count
);
2101 panic("vm_page_verify_contiguous: not contiguous!");
2103 prev_addr
= m
->phys_addr
;
2106 if (page_count
!= npages
) {
2107 printf("pages 0x%x actual count 0x%x but requested 0x%x\n",
2108 pages
, page_count
, npages
);
2109 panic("vm_page_verify_contiguous: count error");
2113 #endif /* MACH_ASSERT */
2117 * Find a region large enough to contain at least npages
2118 * of contiguous physical memory.
2121 * - Called while holding vm_page_queue_free_lock.
2122 * - Doesn't respect vm_page_free_reserved; caller
2123 * must not ask for more pages than are legal to grab.
2125 * Returns a pointer to a list of gobbled pages or VM_PAGE_NULL.
2129 vm_page_find_contiguous(
2132 vm_page_t m
, *contig_prev
, *prev_ptr
;
2133 vm_offset_t prev_addr
;
2134 unsigned int contig_npages
;
2138 return VM_PAGE_NULL
;
2140 prev_addr
= vm_page_queue_free
->phys_addr
- (page_size
+ 1);
2141 prev_ptr
= &vm_page_queue_free
;
2142 for (m
= vm_page_queue_free
; m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
2144 if (m
->phys_addr
!= prev_addr
+ page_size
) {
2146 * Whoops! Pages aren't contiguous. Start over.
2149 contig_prev
= prev_ptr
;
2152 if (++contig_npages
== npages
) {
2154 * Chop these pages out of the free list.
2155 * Mark them all as gobbled.
2157 list
= *contig_prev
;
2158 *contig_prev
= NEXT_PAGE(m
);
2159 SET_NEXT_PAGE(m
, VM_PAGE_NULL
);
2160 for (m
= list
; m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
2166 vm_page_free_count
-= npages
;
2167 if (vm_page_free_count
< vm_page_free_count_minimum
)
2168 vm_page_free_count_minimum
= vm_page_free_count
;
2169 vm_page_wire_count
+= npages
;
2170 vm_page_gobble_count
+= npages
;
2171 cpm_counter(++vpfc_satisfied
);
2172 assert(vm_page_verify_contiguous(list
, contig_npages
));
2176 assert(contig_npages
< npages
);
2177 prev_ptr
= (vm_page_t
*) &m
->pageq
.next
;
2178 prev_addr
= m
->phys_addr
;
2180 cpm_counter(++vpfc_failed
);
2181 return VM_PAGE_NULL
;
2185 * Allocate a list of contiguous, wired pages.
2193 register vm_page_t m
;
2194 vm_page_t
*first_contig
;
2195 vm_page_t free_list
, pages
;
2196 unsigned int npages
, n1pages
;
2197 int vm_pages_available
;
2199 if (size
% page_size
!= 0)
2200 return KERN_INVALID_ARGUMENT
;
2202 vm_page_lock_queues();
2203 mutex_lock(&vm_page_queue_free_lock
);
2206 * Should also take active and inactive pages
2207 * into account... One day...
2209 vm_pages_available
= vm_page_free_count
- vm_page_free_reserved
;
2211 if (size
> vm_pages_available
* page_size
) {
2212 mutex_unlock(&vm_page_queue_free_lock
);
2213 return KERN_RESOURCE_SHORTAGE
;
2216 vm_page_free_list_sort();
2218 npages
= size
/ page_size
;
2221 * Obtain a pointer to a subset of the free
2222 * list large enough to satisfy the request;
2223 * the region will be physically contiguous.
2225 pages
= vm_page_find_contiguous(npages
);
2226 if (pages
== VM_PAGE_NULL
) {
2227 mutex_unlock(&vm_page_queue_free_lock
);
2228 vm_page_unlock_queues();
2229 return KERN_NO_SPACE
;
2232 mutex_unlock(&vm_page_queue_free_lock
);
2235 * Walk the returned list, wiring the pages.
2238 for (m
= pages
; m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
2240 * Essentially inlined vm_page_wire.
2243 assert(!m
->inactive
);
2244 assert(!m
->private);
2245 assert(!m
->fictitious
);
2246 assert(m
->wire_count
== 0);
2250 --vm_page_gobble_count
;
2252 vm_page_unlock_queues();
2255 * The CPM pages should now be available and
2256 * ordered by ascending physical address.
2258 assert(vm_page_verify_contiguous(pages
, npages
));
2261 return KERN_SUCCESS
;
2265 #include <mach_vm_debug.h>
2268 #include <mach_debug/hash_info.h>
2269 #include <vm/vm_debug.h>
2272 * Routine: vm_page_info
2274 * Return information about the global VP table.
2275 * Fills the buffer with as much information as possible
2276 * and returns the desired size of the buffer.
2278 * Nothing locked. The caller should provide
2279 * possibly-pageable memory.
2284 hash_info_bucket_t
*info
,
2289 if (vm_page_bucket_count
< count
)
2290 count
= vm_page_bucket_count
;
2292 for (i
= 0; i
< count
; i
++) {
2293 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
2294 unsigned int bucket_count
= 0;
2297 simple_lock(&vm_page_bucket_lock
);
2298 for (m
= bucket
->pages
; m
!= VM_PAGE_NULL
; m
= m
->next
)
2300 simple_unlock(&vm_page_bucket_lock
);
2302 /* don't touch pageable memory while holding locks */
2303 info
[i
].hib_count
= bucket_count
;
2306 return vm_page_bucket_count
;
2308 #endif /* MACH_VM_DEBUG */
2310 #include <mach_kdb.h>
2313 #include <ddb/db_output.h>
2314 #include <vm/vm_print.h>
2315 #define printf kdbprintf
2318 * Routine: vm_page_print [exported]
2326 iprintf("page 0x%x\n", p
);
2330 iprintf("object=0x%x", p
->object
);
2331 printf(", offset=0x%x", p
->offset
);
2332 printf(", wire_count=%d", p
->wire_count
);
2333 printf(", prep_count=%d", p
->prep_count
);
2334 printf(", pin_count=%d\n", p
->pin_count
);
2336 iprintf("%sinactive, %sactive, %sgobbled, %slaundry, %sfree, %sref, %sdiscard\n",
2337 (p
->inactive
? "" : "!"),
2338 (p
->active
? "" : "!"),
2339 (p
->gobbled
? "" : "!"),
2340 (p
->laundry
? "" : "!"),
2341 (p
->free
? "" : "!"),
2342 (p
->reference
? "" : "!"),
2343 (p
->discard_request
? "" : "!"));
2344 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
2345 (p
->busy
? "" : "!"),
2346 (p
->wanted
? "" : "!"),
2347 (p
->tabled
? "" : "!"),
2348 (p
->fictitious
? "" : "!"),
2349 (p
->private ? "" : "!"),
2350 (p
->precious
? "" : "!"));
2351 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
2352 (p
->absent
? "" : "!"),
2353 (p
->error
? "" : "!"),
2354 (p
->dirty
? "" : "!"),
2355 (p
->cleaning
? "" : "!"),
2356 (p
->pageout
? "" : "!"),
2357 (p
->clustered
? "" : "!"));
2358 iprintf("%slock_supplied, %soverwriting, %srestart, %sunusual, %slimbo\n",
2359 (p
->lock_supplied
? "" : "!"),
2360 (p
->overwriting
? "" : "!"),
2361 (p
->restart
? "" : "!"),
2362 (p
->unusual
? "" : "!"),
2363 (p
->limbo
? "" : "!"));
2365 iprintf("phys_addr=0x%x", p
->phys_addr
);
2366 printf(", page_error=0x%x", p
->page_error
);
2367 printf(", page_lock=0x%x", p
->page_lock
);
2368 printf(", unlock_request=%d\n", p
->unlock_request
);
2372 #endif /* MACH_KDB */