2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
88 #include <ppc/mappings.h> /* (BRINGUP) */
89 #include <pexpert/pexpert.h> /* (BRINGUP) */
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
95 #include <IOKit/IOHibernatePrivate.h>
99 #include <sys/kern_memorystatus.h>
102 #include <sys/kdebug.h>
104 boolean_t vm_page_free_verify
= TRUE
;
106 int speculative_age_index
= 0;
107 int speculative_steal_index
= 0;
108 lck_mtx_ext_t vm_page_queue_lock_ext
;
109 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
110 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
112 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
115 __private_extern__
void vm_page_init_lck_grp(void);
117 static void vm_page_free_prepare(vm_page_t page
);
122 * Associated with page of user-allocatable memory is a
127 * These variables record the values returned by vm_page_bootstrap,
128 * for debugging purposes. The implementation of pmap_steal_memory
129 * and pmap_startup here also uses them internally.
132 vm_offset_t virtual_space_start
;
133 vm_offset_t virtual_space_end
;
137 * The vm_page_lookup() routine, which provides for fast
138 * (virtual memory object, offset) to page lookup, employs
139 * the following hash table. The vm_page_{insert,remove}
140 * routines install and remove associations in the table.
141 * [This table is often called the virtual-to-physical,
146 #if MACH_PAGE_HASH_STATS
147 int cur_count
; /* current count */
148 int hi_count
; /* high water mark */
149 #endif /* MACH_PAGE_HASH_STATS */
153 #define BUCKETS_PER_LOCK 16
155 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
156 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
157 unsigned int vm_page_hash_mask
; /* Mask for hash function */
158 unsigned int vm_page_hash_shift
; /* Shift for hash function */
159 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
160 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
162 lck_spin_t
*vm_page_bucket_locks
;
165 #if MACH_PAGE_HASH_STATS
166 /* This routine is only for debug. It is intended to be called by
167 * hand by a developer using a kernel debugger. This routine prints
168 * out vm_page_hash table statistics to the kernel debug console.
178 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
179 if (vm_page_buckets
[i
].hi_count
) {
181 highsum
+= vm_page_buckets
[i
].hi_count
;
182 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
183 maxdepth
= vm_page_buckets
[i
].hi_count
;
186 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
187 printf("Number used buckets: %d = %d%%\n",
188 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
189 printf("Number unused buckets: %d = %d%%\n",
190 vm_page_bucket_count
- numbuckets
,
191 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
192 printf("Sum of bucket max depth: %d\n", highsum
);
193 printf("Average bucket depth: %d.%2d\n",
194 highsum
/vm_page_bucket_count
,
195 highsum%vm_page_bucket_count
);
196 printf("Maximum bucket depth: %d\n", maxdepth
);
198 #endif /* MACH_PAGE_HASH_STATS */
201 * The virtual page size is currently implemented as a runtime
202 * variable, but is constant once initialized using vm_set_page_size.
203 * This initialization must be done in the machine-dependent
204 * bootstrap sequence, before calling other machine-independent
207 * All references to the virtual page size outside this
208 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
211 vm_size_t page_size
= PAGE_SIZE
;
212 vm_size_t page_mask
= PAGE_MASK
;
213 int page_shift
= PAGE_SHIFT
;
216 * Resident page structures are initialized from
217 * a template (see vm_page_alloc).
219 * When adding a new field to the virtual memory
220 * object structure, be sure to add initialization
221 * (see vm_page_bootstrap).
223 struct vm_page vm_page_template
;
225 vm_page_t vm_pages
= VM_PAGE_NULL
;
226 unsigned int vm_pages_count
= 0;
229 * Resident pages that represent real memory
230 * are allocated from a set of free lists,
233 unsigned int vm_colors
;
234 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
235 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
236 queue_head_t vm_page_queue_free
[MAX_COLORS
];
237 vm_page_t vm_page_queue_fictitious
;
238 unsigned int vm_page_free_wanted
;
239 unsigned int vm_page_free_wanted_privileged
;
240 unsigned int vm_page_free_count
;
241 unsigned int vm_page_fictitious_count
;
243 unsigned int vm_page_free_count_minimum
; /* debugging */
246 * Occasionally, the virtual memory system uses
247 * resident page structures that do not refer to
248 * real pages, for example to leave a page with
249 * important state information in the VP table.
251 * These page structures are allocated the way
252 * most other kernel structures are.
255 vm_locks_array_t vm_page_locks
;
256 decl_lck_mtx_data(,vm_page_alloc_lock
)
257 unsigned int io_throttle_zero_fill
;
259 unsigned int vm_page_local_q_count
= 0;
260 unsigned int vm_page_local_q_soft_limit
= 250;
261 unsigned int vm_page_local_q_hard_limit
= 500;
262 struct vplq
*vm_page_local_q
= NULL
;
265 * Fictitious pages don't have a physical address,
266 * but we must initialize phys_page to something.
267 * For debugging, this should be a strange value
268 * that the pmap module can recognize in assertions.
270 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
273 * Guard pages are not accessible so they don't
274 * need a physical address, but we need to enter
276 * Let's make it recognizable and make sure that
277 * we don't use a real physical page with that
280 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
283 * Resident page structures are also chained on
284 * queues that are used by the page replacement
285 * system (pageout daemon). These queues are
286 * defined here, but are shared by the pageout
287 * module. The inactive queue is broken into
288 * inactive and zf for convenience as the
289 * pageout daemon often assignes a higher
290 * affinity to zf pages
292 queue_head_t vm_page_queue_active
;
293 queue_head_t vm_page_queue_inactive
;
294 queue_head_t vm_page_queue_zf
; /* inactive memory queue for zero fill */
295 queue_head_t vm_page_queue_throttled
;
297 unsigned int vm_page_active_count
;
298 unsigned int vm_page_inactive_count
;
299 unsigned int vm_page_throttled_count
;
300 unsigned int vm_page_speculative_count
;
301 unsigned int vm_page_wire_count
;
302 unsigned int vm_page_gobble_count
= 0;
303 unsigned int vm_page_wire_count_warning
= 0;
304 unsigned int vm_page_gobble_count_warning
= 0;
306 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
307 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
308 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
310 #if DEVELOPMENT || DEBUG
311 unsigned int vm_page_speculative_recreated
= 0;
312 unsigned int vm_page_speculative_created
= 0;
313 unsigned int vm_page_speculative_used
= 0;
316 ppnum_t vm_lopage_poolstart
= 0;
317 ppnum_t vm_lopage_poolend
= 0;
318 int vm_lopage_poolsize
= 0;
319 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
323 * Several page replacement parameters are also
324 * shared with this module, so that page allocation
325 * (done here in vm_page_alloc) can trigger the
328 unsigned int vm_page_free_target
= 0;
329 unsigned int vm_page_free_min
= 0;
330 unsigned int vm_page_throttle_limit
= 0;
331 uint32_t vm_page_creation_throttle
= 0;
332 unsigned int vm_page_inactive_target
= 0;
333 unsigned int vm_page_inactive_min
= 0;
334 unsigned int vm_page_free_reserved
= 0;
335 unsigned int vm_page_throttle_count
= 0;
338 * The VM system has a couple of heuristics for deciding
339 * that pages are "uninteresting" and should be placed
340 * on the inactive queue as likely candidates for replacement.
341 * These variables let the heuristics be controlled at run-time
342 * to make experimentation easier.
345 boolean_t vm_page_deactivate_hint
= TRUE
;
347 struct vm_page_stats_reusable vm_page_stats_reusable
;
352 * Sets the page size, perhaps based upon the memory
353 * size. Must be called before any use of page-size
354 * dependent functions.
356 * Sets page_shift and page_mask from page_size.
359 vm_set_page_size(void)
361 page_mask
= page_size
- 1;
363 if ((page_mask
& page_size
) != 0)
364 panic("vm_set_page_size: page size not a power of two");
366 for (page_shift
= 0; ; page_shift
++)
367 if ((1U << page_shift
) == page_size
)
372 /* Called once during statup, once the cache geometry is known.
375 vm_page_set_colors( void )
377 unsigned int n
, override
;
379 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
381 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
382 n
= vm_cache_geometry_colors
;
383 else n
= DEFAULT_COLORS
; /* use default if all else fails */
387 if ( n
> MAX_COLORS
)
390 /* the count must be a power of 2 */
391 if ( ( n
& (n
- 1)) != 0 )
392 panic("vm_page_set_colors");
395 vm_color_mask
= n
- 1;
399 lck_grp_t vm_page_lck_grp_free
;
400 lck_grp_t vm_page_lck_grp_queue
;
401 lck_grp_t vm_page_lck_grp_local
;
402 lck_grp_t vm_page_lck_grp_purge
;
403 lck_grp_t vm_page_lck_grp_alloc
;
404 lck_grp_t vm_page_lck_grp_bucket
;
405 lck_grp_attr_t vm_page_lck_grp_attr
;
406 lck_attr_t vm_page_lck_attr
;
409 __private_extern__
void
410 vm_page_init_lck_grp(void)
413 * initialze the vm_page lock world
415 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
416 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
417 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
418 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
419 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
420 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
421 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
422 lck_attr_setdefault(&vm_page_lck_attr
);
426 vm_page_init_local_q()
428 unsigned int num_cpus
;
430 struct vplq
*t_local_q
;
432 num_cpus
= ml_get_max_cpus();
435 * no point in this for a uni-processor system
438 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
440 for (i
= 0; i
< num_cpus
; i
++) {
443 lq
= &t_local_q
[i
].vpl_un
.vpl
;
444 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
445 queue_init(&lq
->vpl_queue
);
448 vm_page_local_q_count
= num_cpus
;
450 vm_page_local_q
= (struct vplq
*)t_local_q
;
458 * Initializes the resident memory module.
460 * Allocates memory for the page cells, and
461 * for the object/offset-to-page hash table headers.
462 * Each page cell is initialized and placed on the free list.
463 * Returns the range of available kernel virtual memory.
471 register vm_page_t m
;
478 * Initialize the vm_page template.
481 m
= &vm_page_template
;
482 bzero(m
, sizeof (*m
));
484 m
->pageq
.next
= NULL
;
485 m
->pageq
.prev
= NULL
;
486 m
->listq
.next
= NULL
;
487 m
->listq
.prev
= NULL
;
488 m
->next
= VM_PAGE_NULL
;
490 m
->object
= VM_OBJECT_NULL
; /* reset later */
491 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
497 m
->pageout_queue
= FALSE
;
498 m
->speculative
= FALSE
;
501 m
->reference
= FALSE
;
504 m
->throttled
= FALSE
;
505 m
->__unused_pageq_bits
= 0;
507 m
->phys_page
= 0; /* reset later */
512 m
->fictitious
= FALSE
;
521 m
->clustered
= FALSE
;
522 m
->overwriting
= FALSE
;
525 m
->encrypted
= FALSE
;
526 m
->encrypted_cleaning
= FALSE
;
527 m
->list_req_pending
= FALSE
;
528 m
->dump_cleaning
= FALSE
;
529 m
->cs_validated
= FALSE
;
530 m
->cs_tainted
= FALSE
;
532 m
->zero_fill
= FALSE
;
534 m
->__unused_object_bits
= 0;
538 * Initialize the page queues.
540 vm_page_init_lck_grp();
542 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
543 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
544 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
546 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
549 purgeable_queues
[i
].token_q_head
= 0;
550 purgeable_queues
[i
].token_q_tail
= 0;
551 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
552 queue_init(&purgeable_queues
[i
].objq
[group
]);
554 purgeable_queues
[i
].type
= i
;
555 purgeable_queues
[i
].new_pages
= 0;
557 purgeable_queues
[i
].debug_count_tokens
= 0;
558 purgeable_queues
[i
].debug_count_objects
= 0;
562 for (i
= 0; i
< MAX_COLORS
; i
++ )
563 queue_init(&vm_page_queue_free
[i
]);
564 queue_init(&vm_lopage_queue_free
);
565 vm_page_queue_fictitious
= VM_PAGE_NULL
;
566 queue_init(&vm_page_queue_active
);
567 queue_init(&vm_page_queue_inactive
);
568 queue_init(&vm_page_queue_throttled
);
569 queue_init(&vm_page_queue_zf
);
571 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
572 queue_init(&vm_page_queue_speculative
[i
].age_q
);
574 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
575 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
577 vm_page_free_wanted
= 0;
578 vm_page_free_wanted_privileged
= 0;
580 vm_page_set_colors();
584 * Steal memory for the map and zone subsystems.
587 vm_map_steal_memory();
591 * Allocate (and initialize) the virtual-to-physical
592 * table hash buckets.
594 * The number of buckets should be a power of two to
595 * get a good hash function. The following computation
596 * chooses the first power of two that is greater
597 * than the number of physical pages in the system.
600 if (vm_page_bucket_count
== 0) {
601 unsigned int npages
= pmap_free_pages();
603 vm_page_bucket_count
= 1;
604 while (vm_page_bucket_count
< npages
)
605 vm_page_bucket_count
<<= 1;
607 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
609 vm_page_hash_mask
= vm_page_bucket_count
- 1;
612 * Calculate object shift value for hashing algorithm:
613 * O = log2(sizeof(struct vm_object))
614 * B = log2(vm_page_bucket_count)
615 * hash shifts the object left by
618 size
= vm_page_bucket_count
;
619 for (log1
= 0; size
> 1; log1
++)
621 size
= sizeof(struct vm_object
);
622 for (log2
= 0; size
> 1; log2
++)
624 vm_page_hash_shift
= log1
/2 - log2
+ 1;
626 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
627 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
628 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
630 if (vm_page_hash_mask
& vm_page_bucket_count
)
631 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
633 vm_page_buckets
= (vm_page_bucket_t
*)
634 pmap_steal_memory(vm_page_bucket_count
*
635 sizeof(vm_page_bucket_t
));
637 vm_page_bucket_locks
= (lck_spin_t
*)
638 pmap_steal_memory(vm_page_bucket_lock_count
*
641 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
642 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
644 bucket
->pages
= VM_PAGE_NULL
;
645 #if MACH_PAGE_HASH_STATS
646 bucket
->cur_count
= 0;
647 bucket
->hi_count
= 0;
648 #endif /* MACH_PAGE_HASH_STATS */
651 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
652 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
655 * Machine-dependent code allocates the resident page table.
656 * It uses vm_page_init to initialize the page frames.
657 * The code also returns to us the virtual space available
658 * to the kernel. We don't trust the pmap module
659 * to get the alignment right.
662 pmap_startup(&virtual_space_start
, &virtual_space_end
);
663 virtual_space_start
= round_page(virtual_space_start
);
664 virtual_space_end
= trunc_page(virtual_space_end
);
666 *startp
= virtual_space_start
;
667 *endp
= virtual_space_end
;
670 * Compute the initial "wire" count.
671 * Up until now, the pages which have been set aside are not under
672 * the VM system's control, so although they aren't explicitly
673 * wired, they nonetheless can't be moved. At this moment,
674 * all VM managed pages are "free", courtesy of pmap_startup.
676 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
677 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
; /* initial value */
678 vm_page_free_count_minimum
= vm_page_free_count
;
680 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
681 vm_page_free_count
, vm_page_wire_count
);
683 simple_lock_init(&vm_paging_lock
, 0);
686 #ifndef MACHINE_PAGES
688 * We implement pmap_steal_memory and pmap_startup with the help
689 * of two simpler functions, pmap_virtual_space and pmap_next_page.
696 vm_offset_t addr
, vaddr
;
700 * We round the size to a round multiple.
703 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
706 * If this is the first call to pmap_steal_memory,
707 * we have to initialize ourself.
710 if (virtual_space_start
== virtual_space_end
) {
711 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
714 * The initial values must be aligned properly, and
715 * we don't trust the pmap module to do it right.
718 virtual_space_start
= round_page(virtual_space_start
);
719 virtual_space_end
= trunc_page(virtual_space_end
);
723 * Allocate virtual memory for this request.
726 addr
= virtual_space_start
;
727 virtual_space_start
+= size
;
729 kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr
, (long)virtual_space_start
, (long)size
); /* (TEST/DEBUG) */
732 * Allocate and map physical pages to back new virtual pages.
735 for (vaddr
= round_page(addr
);
737 vaddr
+= PAGE_SIZE
) {
738 #if defined(__LP64__)
739 if (!pmap_next_page_k64(&phys_page
))
741 if (!pmap_next_page(&phys_page
))
744 panic("pmap_steal_memory");
747 * XXX Logically, these mappings should be wired,
748 * but some pmap modules barf if they are.
750 #if defined(__LP64__)
751 pmap_pre_expand(kernel_pmap
, vaddr
);
754 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
755 VM_PROT_READ
|VM_PROT_WRITE
,
756 VM_WIMG_USE_DEFAULT
, FALSE
);
758 * Account for newly stolen memory
760 vm_page_wire_count
++;
764 return (void *) addr
;
772 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
775 unsigned int num_of_lopages
= 0;
776 unsigned int last_index
;
779 * We calculate how many page frames we will have
780 * and then allocate the page structures in one chunk.
783 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
784 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
785 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
787 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
790 * Initialize the page frames.
792 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
793 if (!pmap_next_page(&phys_page
))
796 vm_page_init(&vm_pages
[i
], phys_page
);
800 vm_pages_count
= pages_initialized
;
803 * Check if we want to initialize pages to a known value
805 fill
= 0; /* Assume no fill */
806 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
810 * if vm_lopage_poolsize is non-zero, than we need to reserve
811 * a pool of pages whose addresess are less than 4G... this pool
812 * is used by drivers whose hardware can't DMA beyond 32 bits...
814 * note that I'm assuming that the page list is ascending and
815 * ordered w/r to the physical address
817 for (i
= 0, num_of_lopages
= vm_lopage_poolsize
; num_of_lopages
&& i
< pages_initialized
; num_of_lopages
--, i
++) {
822 if (m
->phys_page
>= (1 << (32 - PAGE_SHIFT
)))
823 panic("couldn't reserve the lopage pool: not enough lo pages\n");
825 if (m
->phys_page
< vm_lopage_poolend
)
826 panic("couldn't reserve the lopage pool: page list out of order\n");
828 vm_lopage_poolend
= m
->phys_page
;
830 if (vm_lopage_poolstart
== 0)
831 vm_lopage_poolstart
= m
->phys_page
;
833 if (m
->phys_page
< vm_lopage_poolstart
)
834 panic("couldn't reserve the lopage pool: page list out of order\n");
838 fillPage(m
->phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
844 // -debug code remove
845 if (2 == vm_himemory_mode
) {
846 // free low -> high so high is preferred
847 for (i
= last_index
+ 1; i
<= pages_initialized
; i
++) {
848 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
849 vm_page_release(&vm_pages
[i
- 1]);
853 // debug code remove-
856 * Release pages in reverse order so that physical pages
857 * initially get allocated in ascending addresses. This keeps
858 * the devices (which must address physical memory) happy if
859 * they require several consecutive pages.
861 for (i
= pages_initialized
; i
> last_index
; i
--) {
862 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
863 vm_page_release(&vm_pages
[i
- 1]);
868 vm_page_t xx
, xxo
, xxl
;
871 j
= 0; /* (BRINGUP) */
874 for( i
= 0; i
< vm_colors
; i
++ ) {
875 queue_iterate(&vm_page_queue_free
[i
],
878 pageq
) { /* BRINGUP */
880 if(j
> vm_page_free_count
) { /* (BRINGUP) */
881 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
884 l
= vm_page_free_count
- j
; /* (BRINGUP) */
885 k
= 0; /* (BRINGUP) */
887 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
889 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
891 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
892 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
893 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
901 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
902 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
909 * We have to re-align virtual_space_start,
910 * because pmap_steal_memory has been using it.
913 virtual_space_start
= round_page(virtual_space_start
);
915 *startp
= virtual_space_start
;
916 *endp
= virtual_space_end
;
918 #endif /* MACHINE_PAGES */
921 * Routine: vm_page_module_init
923 * Second initialization pass, to be done after
924 * the basic VM system is ready.
927 vm_page_module_init(void)
929 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
930 0, PAGE_SIZE
, "vm pages");
933 zone_debug_disable(vm_page_zone
);
934 #endif /* ZONE_DEBUG */
936 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
937 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
938 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
941 * Adjust zone statistics to account for the real pages allocated
942 * in vm_page_create(). [Q: is this really what we want?]
944 vm_page_zone
->count
+= vm_page_pages
;
945 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
947 lck_mtx_init(&vm_page_alloc_lock
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
951 * Routine: vm_page_create
953 * After the VM system is up, machine-dependent code
954 * may stumble across more physical memory. For example,
955 * memory that it was reserving for a frame buffer.
956 * vm_page_create turns this memory into available pages.
967 for (phys_page
= start
;
970 while ((m
= (vm_page_t
) vm_page_grab_fictitious())
972 vm_page_more_fictitious();
974 vm_page_init(m
, phys_page
);
983 * Distributes the object/offset key pair among hash buckets.
985 * NOTE: The bucket count must be a power of 2
987 #define vm_page_hash(object, offset) (\
988 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
993 * vm_page_insert: [ internal use only ]
995 * Inserts the given mem entry into the object/object-page
996 * table and object list.
998 * The object must be locked.
1004 vm_object_offset_t offset
)
1006 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
);
1010 vm_page_insert_internal(
1013 vm_object_offset_t offset
,
1014 boolean_t queues_lock_held
,
1015 boolean_t insert_in_hash
)
1017 vm_page_bucket_t
*bucket
;
1018 lck_spin_t
*bucket_lock
;
1022 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1023 object
, offset
, mem
, 0,0);
1027 if (object
== vm_submap_object
) {
1028 /* the vm_submap_object is only a placeholder for submaps */
1029 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset
);
1032 vm_object_lock_assert_exclusive(object
);
1034 lck_mtx_assert(&vm_page_queue_lock
,
1035 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1036 : LCK_MTX_ASSERT_NOTOWNED
);
1039 if (insert_in_hash
== TRUE
) {
1041 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1042 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1043 "already in (obj=%p,off=0x%llx)",
1044 mem
, object
, offset
, mem
->object
, mem
->offset
);
1046 assert(!object
->internal
|| offset
< object
->size
);
1048 /* only insert "pageout" pages into "pageout" objects,
1049 * and normal pages into normal objects */
1050 assert(object
->pageout
== mem
->pageout
);
1052 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1055 * Record the object/offset pair in this page
1058 mem
->object
= object
;
1059 mem
->offset
= offset
;
1062 * Insert it into the object_object/offset hash table
1064 hash_id
= vm_page_hash(object
, offset
);
1065 bucket
= &vm_page_buckets
[hash_id
];
1066 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1068 lck_spin_lock(bucket_lock
);
1070 mem
->next
= bucket
->pages
;
1071 bucket
->pages
= mem
;
1072 #if MACH_PAGE_HASH_STATS
1073 if (++bucket
->cur_count
> bucket
->hi_count
)
1074 bucket
->hi_count
= bucket
->cur_count
;
1075 #endif /* MACH_PAGE_HASH_STATS */
1077 lck_spin_unlock(bucket_lock
);
1080 * Now link into the object's list of backed pages.
1083 VM_PAGE_INSERT(mem
, object
);
1087 * Show that the object has one more resident page.
1090 object
->resident_page_count
++;
1091 if (VM_PAGE_WIRED(mem
)) {
1092 object
->wired_page_count
++;
1094 assert(object
->resident_page_count
>= object
->wired_page_count
);
1096 assert(!mem
->reusable
);
1098 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1099 if (VM_PAGE_WIRED(mem
)) {
1100 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
1102 OSAddAtomic(1, &vm_page_purgeable_count
);
1104 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1107 * This page belongs to a purged VM object but hasn't
1108 * been purged (because it was "busy").
1109 * It's in the "throttled" queue and hence not
1110 * visible to vm_pageout_scan(). Move it to a pageable
1111 * queue, so that it can eventually be reclaimed, instead
1112 * of lingering in the "empty" object.
1114 if (queues_lock_held
== FALSE
)
1115 vm_page_lockspin_queues();
1116 vm_page_deactivate(mem
);
1117 if (queues_lock_held
== FALSE
)
1118 vm_page_unlock_queues();
1125 * Exactly like vm_page_insert, except that we first
1126 * remove any existing page at the given offset in object.
1128 * The object must be locked.
1132 register vm_page_t mem
,
1133 register vm_object_t object
,
1134 register vm_object_offset_t offset
)
1136 vm_page_bucket_t
*bucket
;
1137 vm_page_t found_m
= VM_PAGE_NULL
;
1138 lck_spin_t
*bucket_lock
;
1142 vm_object_lock_assert_exclusive(object
);
1144 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1145 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1146 "already in (obj=%p,off=0x%llx)",
1147 mem
, object
, offset
, mem
->object
, mem
->offset
);
1148 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1151 * Record the object/offset pair in this page
1154 mem
->object
= object
;
1155 mem
->offset
= offset
;
1158 * Insert it into the object_object/offset hash table,
1159 * replacing any page that might have been there.
1162 hash_id
= vm_page_hash(object
, offset
);
1163 bucket
= &vm_page_buckets
[hash_id
];
1164 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1166 lck_spin_lock(bucket_lock
);
1168 if (bucket
->pages
) {
1169 vm_page_t
*mp
= &bucket
->pages
;
1173 if (m
->object
== object
&& m
->offset
== offset
) {
1175 * Remove old page from hash list
1183 } while ((m
= *mp
));
1185 mem
->next
= bucket
->pages
;
1187 mem
->next
= VM_PAGE_NULL
;
1190 * insert new page at head of hash list
1192 bucket
->pages
= mem
;
1194 lck_spin_unlock(bucket_lock
);
1198 * there was already a page at the specified
1199 * offset for this object... remove it from
1200 * the object and free it back to the free list
1202 vm_page_free_unlocked(found_m
, FALSE
);
1204 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
);
1208 * vm_page_remove: [ internal use only ]
1210 * Removes the given mem entry from the object/offset-page
1211 * table and the object page list.
1213 * The object must be locked.
1219 boolean_t remove_from_hash
)
1221 vm_page_bucket_t
*bucket
;
1223 lck_spin_t
*bucket_lock
;
1227 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1228 mem
->object
, mem
->offset
,
1231 vm_object_lock_assert_exclusive(mem
->object
);
1232 assert(mem
->tabled
);
1233 assert(!mem
->cleaning
);
1236 if (remove_from_hash
== TRUE
) {
1238 * Remove from the object_object/offset hash table
1240 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1241 bucket
= &vm_page_buckets
[hash_id
];
1242 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1244 lck_spin_lock(bucket_lock
);
1246 if ((this = bucket
->pages
) == mem
) {
1247 /* optimize for common case */
1249 bucket
->pages
= mem
->next
;
1253 for (prev
= &this->next
;
1254 (this = *prev
) != mem
;
1259 #if MACH_PAGE_HASH_STATS
1260 bucket
->cur_count
--;
1261 #endif /* MACH_PAGE_HASH_STATS */
1263 lck_spin_unlock(bucket_lock
);
1266 * Now remove from the object's list of backed pages.
1269 VM_PAGE_REMOVE(mem
);
1272 * And show that the object has one fewer resident
1276 assert(mem
->object
->resident_page_count
> 0);
1277 mem
->object
->resident_page_count
--;
1278 if (VM_PAGE_WIRED(mem
)) {
1279 assert(mem
->object
->wired_page_count
> 0);
1280 mem
->object
->wired_page_count
--;
1282 assert(mem
->object
->resident_page_count
>=
1283 mem
->object
->wired_page_count
);
1284 if (mem
->reusable
) {
1285 assert(mem
->object
->reusable_page_count
> 0);
1286 mem
->object
->reusable_page_count
--;
1287 assert(mem
->object
->reusable_page_count
<=
1288 mem
->object
->resident_page_count
);
1289 mem
->reusable
= FALSE
;
1290 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1291 vm_page_stats_reusable
.reused_remove
++;
1292 } else if (mem
->object
->all_reusable
) {
1293 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1294 vm_page_stats_reusable
.reused_remove
++;
1297 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1298 if (VM_PAGE_WIRED(mem
)) {
1299 assert(vm_page_purgeable_wired_count
> 0);
1300 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1302 assert(vm_page_purgeable_count
> 0);
1303 OSAddAtomic(-1, &vm_page_purgeable_count
);
1306 mem
->tabled
= FALSE
;
1307 mem
->object
= VM_OBJECT_NULL
;
1308 mem
->offset
= (vm_object_offset_t
) -1;
1315 * Returns the page associated with the object/offset
1316 * pair specified; if none is found, VM_PAGE_NULL is returned.
1318 * The object must be locked. No side effects.
1321 unsigned long vm_page_lookup_hint
= 0;
1322 unsigned long vm_page_lookup_hint_next
= 0;
1323 unsigned long vm_page_lookup_hint_prev
= 0;
1324 unsigned long vm_page_lookup_hint_miss
= 0;
1325 unsigned long vm_page_lookup_bucket_NULL
= 0;
1326 unsigned long vm_page_lookup_miss
= 0;
1332 vm_object_offset_t offset
)
1335 vm_page_bucket_t
*bucket
;
1337 lck_spin_t
*bucket_lock
;
1340 vm_object_lock_assert_held(object
);
1341 mem
= object
->memq_hint
;
1343 if (mem
!= VM_PAGE_NULL
) {
1344 assert(mem
->object
== object
);
1346 if (mem
->offset
== offset
) {
1347 vm_page_lookup_hint
++;
1350 qe
= queue_next(&mem
->listq
);
1352 if (! queue_end(&object
->memq
, qe
)) {
1353 vm_page_t next_page
;
1355 next_page
= (vm_page_t
) qe
;
1356 assert(next_page
->object
== object
);
1358 if (next_page
->offset
== offset
) {
1359 vm_page_lookup_hint_next
++;
1360 object
->memq_hint
= next_page
; /* new hint */
1364 qe
= queue_prev(&mem
->listq
);
1366 if (! queue_end(&object
->memq
, qe
)) {
1367 vm_page_t prev_page
;
1369 prev_page
= (vm_page_t
) qe
;
1370 assert(prev_page
->object
== object
);
1372 if (prev_page
->offset
== offset
) {
1373 vm_page_lookup_hint_prev
++;
1374 object
->memq_hint
= prev_page
; /* new hint */
1380 * Search the hash table for this object/offset pair
1382 hash_id
= vm_page_hash(object
, offset
);
1383 bucket
= &vm_page_buckets
[hash_id
];
1386 * since we hold the object lock, we are guaranteed that no
1387 * new pages can be inserted into this object... this in turn
1388 * guarantess that the page we're looking for can't exist
1389 * if the bucket it hashes to is currently NULL even when looked
1390 * at outside the scope of the hash bucket lock... this is a
1391 * really cheap optimiztion to avoid taking the lock
1393 if (bucket
->pages
== VM_PAGE_NULL
) {
1394 vm_page_lookup_bucket_NULL
++;
1396 return (VM_PAGE_NULL
);
1398 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1400 lck_spin_lock(bucket_lock
);
1402 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
1404 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1407 lck_spin_unlock(bucket_lock
);
1409 if (mem
!= VM_PAGE_NULL
) {
1410 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1411 vm_page_lookup_hint_miss
++;
1413 assert(mem
->object
== object
);
1414 object
->memq_hint
= mem
;
1416 vm_page_lookup_miss
++;
1425 * Move the given memory entry from its
1426 * current object to the specified target object/offset.
1428 * The object must be locked.
1432 register vm_page_t mem
,
1433 register vm_object_t new_object
,
1434 vm_object_offset_t new_offset
,
1435 boolean_t encrypted_ok
)
1437 assert(mem
->object
!= new_object
);
1441 * The encryption key is based on the page's memory object
1442 * (aka "pager") and paging offset. Moving the page to
1443 * another VM object changes its "pager" and "paging_offset"
1444 * so it has to be decrypted first, or we would lose the key.
1446 * One exception is VM object collapsing, where we transfer pages
1447 * from one backing object to its parent object. This operation also
1448 * transfers the paging information, so the <pager,paging_offset> info
1449 * should remain consistent. The caller (vm_object_do_collapse())
1450 * sets "encrypted_ok" in this case.
1452 if (!encrypted_ok
&& mem
->encrypted
) {
1453 panic("vm_page_rename: page %p is encrypted\n", mem
);
1457 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1458 new_object
, new_offset
,
1462 * Changes to mem->object require the page lock because
1463 * the pageout daemon uses that lock to get the object.
1465 vm_page_lockspin_queues();
1467 vm_page_remove(mem
, TRUE
);
1468 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
);
1470 vm_page_unlock_queues();
1476 * Initialize the fields in a new page.
1477 * This takes a structure with random values and initializes it
1478 * so that it can be given to vm_page_release or vm_page_insert.
1486 *mem
= vm_page_template
;
1487 mem
->phys_page
= phys_page
;
1491 * vm_page_grab_fictitious:
1493 * Remove a fictitious page from the free list.
1494 * Returns VM_PAGE_NULL if there are no free pages.
1496 int c_vm_page_grab_fictitious
= 0;
1497 int c_vm_page_release_fictitious
= 0;
1498 int c_vm_page_more_fictitious
= 0;
1500 extern vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
1503 vm_page_grab_fictitious_common(
1506 register vm_page_t m
;
1508 m
= (vm_page_t
)zget(vm_page_zone
);
1510 vm_page_init(m
, phys_addr
);
1511 m
->fictitious
= TRUE
;
1514 c_vm_page_grab_fictitious
++;
1519 vm_page_grab_fictitious(void)
1521 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1525 vm_page_grab_guard(void)
1527 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1531 * vm_page_release_fictitious:
1533 * Release a fictitious page to the free list.
1537 vm_page_release_fictitious(
1538 register vm_page_t m
)
1542 assert(m
->fictitious
);
1543 assert(m
->phys_page
== vm_page_fictitious_addr
||
1544 m
->phys_page
== vm_page_guard_addr
);
1546 c_vm_page_release_fictitious
++;
1549 panic("vm_page_release_fictitious");
1552 zfree(vm_page_zone
, m
);
1556 * vm_page_more_fictitious:
1558 * Add more fictitious pages to the free list.
1559 * Allowed to block. This routine is way intimate
1560 * with the zones code, for several reasons:
1561 * 1. we need to carve some page structures out of physical
1562 * memory before zones work, so they _cannot_ come from
1564 * 2. the zone needs to be collectable in order to prevent
1565 * growth without bound. These structures are used by
1566 * the device pager (by the hundreds and thousands), as
1567 * private pages for pageout, and as blocking pages for
1568 * pagein. Temporary bursts in demand should not result in
1569 * permanent allocation of a resource.
1570 * 3. To smooth allocation humps, we allocate single pages
1571 * with kernel_memory_allocate(), and cram them into the
1572 * zone. This also allows us to initialize the vm_page_t's
1573 * on the way into the zone, so that zget() always returns
1574 * an initialized structure. The zone free element pointer
1575 * and the free page pointer are both the first item in the
1577 * 4. By having the pages in the zone pre-initialized, we need
1578 * not keep 2 levels of lists. The garbage collector simply
1579 * scans our list, and reduces physical memory usage as it
1583 void vm_page_more_fictitious(void)
1585 register vm_page_t m
;
1587 kern_return_t retval
;
1590 c_vm_page_more_fictitious
++;
1593 * Allocate a single page from the zone_map. Do not wait if no physical
1594 * pages are immediately available, and do not zero the space. We need
1595 * our own blocking lock here to prevent having multiple,
1596 * simultaneous requests from piling up on the zone_map lock. Exactly
1597 * one (of our) threads should be potentially waiting on the map lock.
1598 * If winner is not vm-privileged, then the page allocation will fail,
1599 * and it will temporarily block here in the vm_page_wait().
1601 lck_mtx_lock(&vm_page_alloc_lock
);
1603 * If another thread allocated space, just bail out now.
1605 if (zone_free_count(vm_page_zone
) > 5) {
1607 * The number "5" is a small number that is larger than the
1608 * number of fictitious pages that any single caller will
1609 * attempt to allocate. Otherwise, a thread will attempt to
1610 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1611 * release all of the resources and locks already acquired,
1612 * and then call this routine. This routine finds the pages
1613 * that the caller released, so fails to allocate new space.
1614 * The process repeats infinitely. The largest known number
1615 * of fictitious pages required in this manner is 2. 5 is
1616 * simply a somewhat larger number.
1618 lck_mtx_unlock(&vm_page_alloc_lock
);
1622 retval
= kernel_memory_allocate(zone_map
,
1623 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1624 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1625 if (retval
!= KERN_SUCCESS
) {
1627 * No page was available. Tell the pageout daemon, drop the
1628 * lock to give another thread a chance at it, and
1629 * wait for the pageout daemon to make progress.
1631 lck_mtx_unlock(&vm_page_alloc_lock
);
1632 vm_page_wait(THREAD_UNINT
);
1636 * Initialize as many vm_page_t's as will fit on this page. This
1637 * depends on the zone code disturbing ONLY the first item of
1638 * each zone element.
1640 m
= (vm_page_t
)addr
;
1641 for (i
= PAGE_SIZE
/sizeof(struct vm_page
); i
> 0; i
--) {
1642 vm_page_init(m
, vm_page_fictitious_addr
);
1643 m
->fictitious
= TRUE
;
1646 zcram(vm_page_zone
, (void *) addr
, PAGE_SIZE
);
1647 lck_mtx_unlock(&vm_page_alloc_lock
);
1654 * Return true if it is not likely that a non-vm_privileged thread
1655 * can get memory without blocking. Advisory only, since the
1656 * situation may change under us.
1661 /* No locking, at worst we will fib. */
1662 return( vm_page_free_count
<= vm_page_free_reserved
);
1668 * this is an interface to support bring-up of drivers
1669 * on platforms with physical memory > 4G...
1671 int vm_himemory_mode
= 0;
1675 * this interface exists to support hardware controllers
1676 * incapable of generating DMAs with more than 32 bits
1677 * of address on platforms with physical memory > 4G...
1679 unsigned int vm_lopage_free_count
= 0;
1680 unsigned int vm_lopage_max_count
= 0;
1681 queue_head_t vm_lopage_queue_free
;
1684 vm_page_grablo(void)
1686 register vm_page_t mem
;
1687 unsigned int vm_lopage_alloc_count
;
1689 if (vm_lopage_poolsize
== 0)
1690 return (vm_page_grab());
1692 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1694 if (! queue_empty(&vm_lopage_queue_free
)) {
1695 queue_remove_first(&vm_lopage_queue_free
,
1701 assert(!mem
->pmapped
);
1702 assert(!mem
->wpmapped
);
1704 mem
->pageq
.next
= NULL
;
1705 mem
->pageq
.prev
= NULL
;
1708 vm_lopage_free_count
--;
1709 vm_lopage_alloc_count
= (vm_lopage_poolend
- vm_lopage_poolstart
) - vm_lopage_free_count
;
1710 if (vm_lopage_alloc_count
> vm_lopage_max_count
)
1711 vm_lopage_max_count
= vm_lopage_alloc_count
;
1715 lck_mtx_unlock(&vm_page_queue_free_lock
);
1724 * first try to grab a page from the per-cpu free list...
1725 * this must be done while pre-emption is disabled... if
1726 * a page is available, we're done...
1727 * if no page is available, grab the vm_page_queue_free_lock
1728 * and see if current number of free pages would allow us
1729 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1730 * if there are pages available, disable preemption and
1731 * recheck the state of the per-cpu free list... we could
1732 * have been preempted and moved to a different cpu, or
1733 * some other thread could have re-filled it... if still
1734 * empty, figure out how many pages we can steal from the
1735 * global free queue and move to the per-cpu queue...
1736 * return 1 of these pages when done... only wakeup the
1737 * pageout_scan thread if we moved pages from the global
1738 * list... no need for the wakeup if we've satisfied the
1739 * request from the per-cpu queue.
1742 #define COLOR_GROUPS_TO_STEAL 4
1746 vm_page_grab( void )
1751 disable_preemption();
1753 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1754 return_page_from_cpu_list
:
1755 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1756 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
1757 mem
->pageq
.next
= NULL
;
1759 enable_preemption();
1761 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1762 assert(mem
->tabled
== FALSE
);
1763 assert(mem
->object
== VM_OBJECT_NULL
);
1764 assert(!mem
->laundry
);
1766 assert(pmap_verify_free(mem
->phys_page
));
1768 assert(!mem
->encrypted
);
1769 assert(!mem
->pmapped
);
1770 assert(!mem
->wpmapped
);
1774 enable_preemption();
1778 * Optionally produce warnings if the wire or gobble
1779 * counts exceed some threshold.
1781 if (vm_page_wire_count_warning
> 0
1782 && vm_page_wire_count
>= vm_page_wire_count_warning
) {
1783 printf("mk: vm_page_grab(): high wired page count of %d\n",
1784 vm_page_wire_count
);
1785 assert(vm_page_wire_count
< vm_page_wire_count_warning
);
1787 if (vm_page_gobble_count_warning
> 0
1788 && vm_page_gobble_count
>= vm_page_gobble_count_warning
) {
1789 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1790 vm_page_gobble_count
);
1791 assert(vm_page_gobble_count
< vm_page_gobble_count_warning
);
1794 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1797 * Only let privileged threads (involved in pageout)
1798 * dip into the reserved pool.
1800 if ((vm_page_free_count
< vm_page_free_reserved
) &&
1801 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
1802 lck_mtx_unlock(&vm_page_queue_free_lock
);
1808 unsigned int pages_to_steal
;
1811 while ( vm_page_free_count
== 0 ) {
1813 lck_mtx_unlock(&vm_page_queue_free_lock
);
1815 * must be a privileged thread to be
1816 * in this state since a non-privileged
1817 * thread would have bailed if we were
1818 * under the vm_page_free_reserved mark
1821 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1824 disable_preemption();
1826 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1827 lck_mtx_unlock(&vm_page_queue_free_lock
);
1830 * we got preempted and moved to another processor
1831 * or we got preempted and someone else ran and filled the cache
1833 goto return_page_from_cpu_list
;
1835 if (vm_page_free_count
<= vm_page_free_reserved
)
1838 pages_to_steal
= COLOR_GROUPS_TO_STEAL
* vm_colors
;
1840 if (pages_to_steal
> (vm_page_free_count
- vm_page_free_reserved
))
1841 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
1843 color
= PROCESSOR_DATA(current_processor(), start_color
);
1846 while (pages_to_steal
--) {
1847 if (--vm_page_free_count
< vm_page_free_count_minimum
)
1848 vm_page_free_count_minimum
= vm_page_free_count
;
1850 while (queue_empty(&vm_page_queue_free
[color
]))
1851 color
= (color
+ 1) & vm_color_mask
;
1853 queue_remove_first(&vm_page_queue_free
[color
],
1857 mem
->pageq
.next
= NULL
;
1858 mem
->pageq
.prev
= NULL
;
1860 color
= (color
+ 1) & vm_color_mask
;
1865 tail
->pageq
.next
= (queue_t
)mem
;
1868 mem
->pageq
.prev
= NULL
;
1869 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1870 assert(mem
->tabled
== FALSE
);
1871 assert(mem
->object
== VM_OBJECT_NULL
);
1872 assert(!mem
->laundry
);
1876 assert(pmap_verify_free(mem
->phys_page
));
1879 assert(!mem
->encrypted
);
1880 assert(!mem
->pmapped
);
1881 assert(!mem
->wpmapped
);
1883 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
1884 PROCESSOR_DATA(current_processor(), start_color
) = color
;
1887 * satisfy this request
1889 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1891 mem
->pageq
.next
= NULL
;
1893 lck_mtx_unlock(&vm_page_queue_free_lock
);
1895 enable_preemption();
1898 * Decide if we should poke the pageout daemon.
1899 * We do this if the free count is less than the low
1900 * water mark, or if the free count is less than the high
1901 * water mark (but above the low water mark) and the inactive
1902 * count is less than its target.
1904 * We don't have the counts locked ... if they change a little,
1905 * it doesn't really matter.
1907 if ((vm_page_free_count
< vm_page_free_min
) ||
1908 ((vm_page_free_count
< vm_page_free_target
) &&
1909 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
1910 thread_wakeup((event_t
) &vm_page_free_wanted
);
1917 * Decide if we need to poke the memorystatus notification thread.
1920 (vm_page_active_count
+ vm_page_inactive_count
+
1921 vm_page_speculative_count
+ vm_page_free_count
+
1922 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
1924 if (percent_avail
<= (kern_memorystatus_level
- 5)) {
1925 kern_memorystatus_level
= percent_avail
;
1926 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
1931 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1939 * Return a page to the free list.
1944 register vm_page_t mem
)
1947 int need_wakeup
= 0;
1948 int need_priv_wakeup
= 0;
1950 unsigned int pindex
;
1951 phys_entry
*physent
;
1953 physent
= mapping_phys_lookup(mem
->phys_page
, &pindex
); /* (BRINGUP) */
1954 if(physent
->ppLink
& ppN
) { /* (BRINGUP) */
1955 panic("vm_page_release: already released - %08X %08X\n", mem
, mem
->phys_page
);
1957 physent
->ppLink
= physent
->ppLink
| ppN
; /* (BRINGUP) */
1959 assert(!mem
->private && !mem
->fictitious
);
1960 if (vm_page_free_verify
) {
1961 assert(pmap_verify_free(mem
->phys_page
));
1963 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1966 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1969 panic("vm_page_release");
1974 assert(!mem
->laundry
);
1975 assert(mem
->object
== VM_OBJECT_NULL
);
1976 assert(mem
->pageq
.next
== NULL
&&
1977 mem
->pageq
.prev
== NULL
);
1978 assert(mem
->listq
.next
== NULL
&&
1979 mem
->listq
.prev
== NULL
);
1981 if (mem
->phys_page
<= vm_lopage_poolend
&& mem
->phys_page
>= vm_lopage_poolstart
) {
1983 * this exists to support hardware controllers
1984 * incapable of generating DMAs with more than 32 bits
1985 * of address on platforms with physical memory > 4G...
1987 queue_enter_first(&vm_lopage_queue_free
,
1991 vm_lopage_free_count
++;
1993 color
= mem
->phys_page
& vm_color_mask
;
1994 queue_enter_first(&vm_page_queue_free
[color
],
1998 vm_page_free_count
++;
2000 * Check if we should wake up someone waiting for page.
2001 * But don't bother waking them unless they can allocate.
2003 * We wakeup only one thread, to prevent starvation.
2004 * Because the scheduling system handles wait queues FIFO,
2005 * if we wakeup all waiting threads, one greedy thread
2006 * can starve multiple niceguy threads. When the threads
2007 * all wakeup, the greedy threads runs first, grabs the page,
2008 * and waits for another page. It will be the first to run
2009 * when the next page is freed.
2011 * However, there is a slight danger here.
2012 * The thread we wake might not use the free page.
2013 * Then the other threads could wait indefinitely
2014 * while the page goes unused. To forestall this,
2015 * the pageout daemon will keep making free pages
2016 * as long as vm_page_free_wanted is non-zero.
2019 assert(vm_page_free_count
> 0);
2020 if (vm_page_free_wanted_privileged
> 0) {
2021 vm_page_free_wanted_privileged
--;
2022 need_priv_wakeup
= 1;
2023 } else if (vm_page_free_wanted
> 0 &&
2024 vm_page_free_count
> vm_page_free_reserved
) {
2025 vm_page_free_wanted
--;
2029 lck_mtx_unlock(&vm_page_queue_free_lock
);
2031 if (need_priv_wakeup
)
2032 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2033 else if (need_wakeup
)
2034 thread_wakeup_one((event_t
) &vm_page_free_count
);
2041 * Decide if we need to poke the memorystatus notification thread.
2042 * Locking is not a big issue, as only a single thread delivers these.
2045 (vm_page_active_count
+ vm_page_inactive_count
+
2046 vm_page_speculative_count
+ vm_page_free_count
+
2047 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
2049 if (percent_avail
>= (kern_memorystatus_level
+ 5)) {
2050 kern_memorystatus_level
= percent_avail
;
2051 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
2060 * Wait for a page to become available.
2061 * If there are plenty of free pages, then we don't sleep.
2064 * TRUE: There may be another page, try again
2065 * FALSE: We were interrupted out of our wait, don't try again
2073 * We can't use vm_page_free_reserved to make this
2074 * determination. Consider: some thread might
2075 * need to allocate two pages. The first allocation
2076 * succeeds, the second fails. After the first page is freed,
2077 * a call to vm_page_wait must really block.
2079 kern_return_t wait_result
;
2080 int need_wakeup
= 0;
2081 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2083 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2085 if (is_privileged
&& vm_page_free_count
) {
2086 lck_mtx_unlock(&vm_page_queue_free_lock
);
2089 if (vm_page_free_count
< vm_page_free_target
) {
2091 if (is_privileged
) {
2092 if (vm_page_free_wanted_privileged
++ == 0)
2094 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2096 if (vm_page_free_wanted
++ == 0)
2098 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2100 lck_mtx_unlock(&vm_page_queue_free_lock
);
2101 counter(c_vm_page_wait_block
++);
2104 thread_wakeup((event_t
)&vm_page_free_wanted
);
2106 if (wait_result
== THREAD_WAITING
)
2107 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2109 return(wait_result
== THREAD_AWAKENED
);
2111 lck_mtx_unlock(&vm_page_queue_free_lock
);
2119 * Allocate and return a memory cell associated
2120 * with this VM object/offset pair.
2122 * Object must be locked.
2128 vm_object_offset_t offset
)
2130 register vm_page_t mem
;
2132 vm_object_lock_assert_exclusive(object
);
2133 mem
= vm_page_grab();
2134 if (mem
== VM_PAGE_NULL
)
2135 return VM_PAGE_NULL
;
2137 vm_page_insert(mem
, object
, offset
);
2145 vm_object_offset_t offset
)
2147 register vm_page_t mem
;
2149 vm_object_lock_assert_exclusive(object
);
2150 mem
= vm_page_grablo();
2151 if (mem
== VM_PAGE_NULL
)
2152 return VM_PAGE_NULL
;
2154 vm_page_insert(mem
, object
, offset
);
2161 * vm_page_alloc_guard:
2163 * Allocate a fictitious page which will be used
2164 * as a guard page. The page will be inserted into
2165 * the object and returned to the caller.
2169 vm_page_alloc_guard(
2171 vm_object_offset_t offset
)
2173 register vm_page_t mem
;
2175 vm_object_lock_assert_exclusive(object
);
2176 mem
= vm_page_grab_guard();
2177 if (mem
== VM_PAGE_NULL
)
2178 return VM_PAGE_NULL
;
2180 vm_page_insert(mem
, object
, offset
);
2186 counter(unsigned int c_laundry_pages_freed
= 0;)
2191 * Returns the given page to the free list,
2192 * disassociating it with any VM object.
2194 * Object and page queues must be locked prior to entry.
2197 vm_page_free_prepare(
2198 register vm_page_t mem
)
2200 vm_page_free_prepare_queues(mem
);
2201 vm_page_free_prepare_object(mem
, TRUE
);
2206 vm_page_free_prepare_queues(
2211 assert(!mem
->cleaning
);
2212 assert(!mem
->pageout
);
2214 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2216 panic("vm_page_free: freeing page on free list\n");
2219 vm_object_lock_assert_exclusive(mem
->object
);
2224 * We may have to free a page while it's being laundered
2225 * if we lost its pager (due to a forced unmount, for example).
2226 * We need to call vm_pageout_throttle_up() before removing
2227 * the page from its VM object, so that we can find out on
2228 * which pageout queue the page is on.
2230 vm_pageout_throttle_up(mem
);
2231 counter(++c_laundry_pages_freed
);
2233 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2235 if (VM_PAGE_WIRED(mem
)) {
2237 assert(mem
->object
->wired_page_count
> 0);
2238 mem
->object
->wired_page_count
--;
2239 assert(mem
->object
->resident_page_count
>=
2240 mem
->object
->wired_page_count
);
2242 if (!mem
->private && !mem
->fictitious
)
2243 vm_page_wire_count
--;
2244 mem
->wire_count
= 0;
2245 assert(!mem
->gobbled
);
2246 } else if (mem
->gobbled
) {
2247 if (!mem
->private && !mem
->fictitious
)
2248 vm_page_wire_count
--;
2249 vm_page_gobble_count
--;
2255 vm_page_free_prepare_object(
2257 boolean_t remove_from_hash
)
2260 vm_object_lock_assert_exclusive(mem
->object
);
2264 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2266 PAGE_WAKEUP(mem
); /* clears wanted */
2269 mem
->private = FALSE
;
2270 mem
->fictitious
= TRUE
;
2271 mem
->phys_page
= vm_page_fictitious_addr
;
2273 if (mem
->fictitious
) {
2274 /* Some of these may be unnecessary */
2275 mem
->gobbled
= FALSE
;
2277 mem
->absent
= FALSE
;
2280 mem
->precious
= FALSE
;
2281 mem
->reference
= FALSE
;
2282 mem
->encrypted
= FALSE
;
2283 mem
->encrypted_cleaning
= FALSE
;
2284 mem
->pmapped
= FALSE
;
2285 mem
->wpmapped
= FALSE
;
2286 mem
->reusable
= FALSE
;
2288 if (mem
->zero_fill
== TRUE
)
2290 vm_page_init(mem
, mem
->phys_page
);
2299 vm_page_free_prepare(mem
);
2300 if (mem
->fictitious
) {
2301 vm_page_release_fictitious(mem
);
2303 vm_page_release(mem
);
2309 vm_page_free_unlocked(
2311 boolean_t remove_from_hash
)
2313 vm_page_lockspin_queues();
2314 vm_page_free_prepare_queues(mem
);
2315 vm_page_unlock_queues();
2317 vm_page_free_prepare_object(mem
, remove_from_hash
);
2319 if (mem
->fictitious
) {
2320 vm_page_release_fictitious(mem
);
2322 vm_page_release(mem
);
2327 * Free a list of pages. The list can be up to several hundred pages,
2328 * as blocked up by vm_pageout_scan().
2329 * The big win is not having to take the free list lock once
2330 * per page. We sort the incoming pages into n lists, one for
2336 boolean_t prepare_object
)
2341 int inuse_list_head
= -1;
2343 queue_head_t free_list
[MAX_COLORS
];
2344 int inuse
[MAX_COLORS
];
2346 for (color
= 0; color
< (signed) vm_colors
; color
++) {
2347 queue_init(&free_list
[color
]);
2351 assert(!mem
->inactive
);
2352 assert(!mem
->active
);
2353 assert(!mem
->throttled
);
2355 assert(!mem
->speculative
);
2356 assert(mem
->pageq
.prev
== NULL
);
2358 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2360 if (prepare_object
== TRUE
)
2361 vm_page_free_prepare_object(mem
, TRUE
);
2363 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2364 assert(pmap_verify_free(mem
->phys_page
));
2368 if (!mem
->fictitious
) {
2369 if (mem
->phys_page
<= vm_lopage_poolend
&& mem
->phys_page
>= vm_lopage_poolstart
) {
2370 mem
->pageq
.next
= NULL
;
2371 vm_page_release(mem
);
2375 * IMPORTANT: we can't set the page "free" here
2376 * because that would make the page eligible for
2377 * a physically-contiguous allocation (see
2378 * vm_page_find_contiguous()) right away (we don't
2379 * hold the vm_page_queue_free lock). That would
2380 * cause trouble because the page is not actually
2381 * in the free queue yet...
2383 color
= mem
->phys_page
& vm_color_mask
;
2384 if (queue_empty(&free_list
[color
])) {
2385 inuse
[color
] = inuse_list_head
;
2386 inuse_list_head
= color
;
2388 queue_enter_first(&free_list
[color
],
2395 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2396 mem
->phys_page
== vm_page_guard_addr
);
2397 vm_page_release_fictitious(mem
);
2402 unsigned int avail_free_count
;
2403 unsigned int need_wakeup
= 0;
2404 unsigned int need_priv_wakeup
= 0;
2406 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2408 color
= inuse_list_head
;
2410 while( color
!= -1 ) {
2411 vm_page_t first
, last
;
2412 vm_page_t first_free
;
2415 * Now that we hold the vm_page_queue_free lock,
2416 * it's safe to mark all pages in our local queue
2419 queue_iterate(&free_list
[color
],
2429 * ... and insert our local queue at the head of
2430 * the global free queue.
2432 first
= (vm_page_t
) queue_first(&free_list
[color
]);
2433 last
= (vm_page_t
) queue_last(&free_list
[color
]);
2434 first_free
= (vm_page_t
) queue_first(&vm_page_queue_free
[color
]);
2435 if (queue_empty(&vm_page_queue_free
[color
])) {
2436 queue_last(&vm_page_queue_free
[color
]) =
2437 (queue_entry_t
) last
;
2439 queue_prev(&first_free
->pageq
) =
2440 (queue_entry_t
) last
;
2442 queue_first(&vm_page_queue_free
[color
]) =
2443 (queue_entry_t
) first
;
2444 queue_prev(&first
->pageq
) =
2445 (queue_entry_t
) &vm_page_queue_free
[color
];
2446 queue_next(&last
->pageq
) =
2447 (queue_entry_t
) first_free
;
2450 color
= inuse
[color
];
2453 vm_page_free_count
+= pg_count
;
2454 avail_free_count
= vm_page_free_count
;
2456 if (vm_page_free_wanted_privileged
> 0 &&
2457 avail_free_count
> 0) {
2458 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2459 need_priv_wakeup
= avail_free_count
;
2460 vm_page_free_wanted_privileged
-=
2462 avail_free_count
= 0;
2464 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2465 vm_page_free_wanted_privileged
= 0;
2467 vm_page_free_wanted_privileged
;
2471 if (vm_page_free_wanted
> 0 &&
2472 avail_free_count
> vm_page_free_reserved
) {
2473 unsigned int available_pages
;
2475 available_pages
= (avail_free_count
-
2476 vm_page_free_reserved
);
2478 if (available_pages
>= vm_page_free_wanted
) {
2479 need_wakeup
= vm_page_free_wanted
;
2480 vm_page_free_wanted
= 0;
2482 need_wakeup
= available_pages
;
2483 vm_page_free_wanted
-= available_pages
;
2486 lck_mtx_unlock(&vm_page_queue_free_lock
);
2488 if (need_priv_wakeup
!= 0) {
2490 * There shouldn't be that many VM-privileged threads,
2491 * so let's wake them all up, even if we don't quite
2492 * have enough pages to satisfy them all.
2494 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2496 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2498 * We don't expect to have any more waiters
2499 * after this, so let's wake them all up at
2502 thread_wakeup((event_t
) &vm_page_free_count
);
2503 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2505 * Wake up one waiter per page we just released.
2507 thread_wakeup_one((event_t
) &vm_page_free_count
);
2514 * Decide if we need to poke the memorystatus notification thread.
2517 (vm_page_active_count
+ vm_page_inactive_count
+
2518 vm_page_speculative_count
+ vm_page_free_count
+
2519 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
2521 if (percent_avail
>= (kern_memorystatus_level
+ 5)) {
2522 kern_memorystatus_level
= percent_avail
;
2523 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
2534 * Mark this page as wired down by yet
2535 * another map, removing it from paging queues
2538 * The page's object and the page queues must be locked.
2542 register vm_page_t mem
)
2545 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2549 vm_object_lock_assert_exclusive(mem
->object
);
2552 * In theory, the page should be in an object before it
2553 * gets wired, since we need to hold the object lock
2554 * to update some fields in the page structure.
2555 * However, some code (i386 pmap, for example) might want
2556 * to wire a page before it gets inserted into an object.
2557 * That's somewhat OK, as long as nobody else can get to
2558 * that page and update it at the same time.
2562 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2564 if ( !VM_PAGE_WIRED(mem
)) {
2565 VM_PAGE_QUEUES_REMOVE(mem
);
2568 mem
->object
->wired_page_count
++;
2569 assert(mem
->object
->resident_page_count
>=
2570 mem
->object
->wired_page_count
);
2571 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2572 assert(vm_page_purgeable_count
> 0);
2573 OSAddAtomic(-1, &vm_page_purgeable_count
);
2574 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2576 if (mem
->object
->all_reusable
) {
2578 * Wired pages are not counted as "re-usable"
2579 * in "all_reusable" VM objects, so nothing
2582 } else if (mem
->reusable
) {
2584 * This page is not "re-usable" when it's
2585 * wired, so adjust its state and the
2588 vm_object_reuse_pages(mem
->object
,
2590 mem
->offset
+PAGE_SIZE_64
,
2594 assert(!mem
->reusable
);
2596 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2597 vm_page_wire_count
++;
2599 vm_page_gobble_count
--;
2600 mem
->gobbled
= FALSE
;
2601 if (mem
->zero_fill
== TRUE
) {
2602 mem
->zero_fill
= FALSE
;
2610 * Decide if we need to poke the memorystatus notification thread.
2613 (vm_page_active_count
+ vm_page_inactive_count
+
2614 vm_page_speculative_count
+ vm_page_free_count
+
2615 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
2617 if (percent_avail
<= (kern_memorystatus_level
- 5)) {
2618 kern_memorystatus_level
= percent_avail
;
2619 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
2625 * The page could be encrypted, but
2626 * We don't have to decrypt it here
2627 * because we don't guarantee that the
2628 * data is actually valid at this point.
2629 * The page will get decrypted in
2630 * vm_fault_wire() if needed.
2633 assert(!mem
->gobbled
);
2641 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2643 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2647 register vm_page_t mem
)
2649 vm_page_lockspin_queues();
2652 assert(!mem
->gobbled
);
2653 assert( !VM_PAGE_WIRED(mem
));
2655 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2656 if (!mem
->private && !mem
->fictitious
)
2657 vm_page_wire_count
++;
2659 vm_page_gobble_count
++;
2660 mem
->gobbled
= TRUE
;
2661 vm_page_unlock_queues();
2667 * Release one wiring of this page, potentially
2668 * enabling it to be paged again.
2670 * The page's object and the page queues must be locked.
2674 register vm_page_t mem
)
2677 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2680 assert(VM_PAGE_WIRED(mem
));
2681 assert(mem
->object
!= VM_OBJECT_NULL
);
2683 vm_object_lock_assert_exclusive(mem
->object
);
2684 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2686 if (--mem
->wire_count
== 0) {
2687 assert(!mem
->private && !mem
->fictitious
);
2688 vm_page_wire_count
--;
2689 assert(mem
->object
->wired_page_count
> 0);
2690 mem
->object
->wired_page_count
--;
2691 assert(mem
->object
->resident_page_count
>=
2692 mem
->object
->wired_page_count
);
2693 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2694 OSAddAtomic(+1, &vm_page_purgeable_count
);
2695 assert(vm_page_purgeable_wired_count
> 0);
2696 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2698 assert(!mem
->laundry
);
2699 assert(mem
->object
!= kernel_object
);
2700 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
2701 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
2702 vm_page_deactivate(mem
);
2704 vm_page_activate(mem
);
2711 * Decide if we need to poke the memorystatus notification thread.
2714 (vm_page_active_count
+ vm_page_inactive_count
+
2715 vm_page_speculative_count
+ vm_page_free_count
+
2716 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
2718 if (percent_avail
>= (kern_memorystatus_level
+ 5)) {
2719 kern_memorystatus_level
= percent_avail
;
2720 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
2729 * vm_page_deactivate:
2731 * Returns the given page to the inactive list,
2732 * indicating that no physical maps have access
2733 * to this page. [Used by the physical mapping system.]
2735 * The page queues must be locked.
2741 vm_page_deactivate_internal(m
, TRUE
);
2746 vm_page_deactivate_internal(
2748 boolean_t clear_hw_reference
)
2752 assert(m
->object
!= kernel_object
);
2753 assert(m
->phys_page
!= vm_page_guard_addr
);
2755 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2757 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2760 * This page is no longer very interesting. If it was
2761 * interesting (active or inactive/referenced), then we
2762 * clear the reference bit and (re)enter it in the
2763 * inactive queue. Note wired pages should not have
2764 * their reference bit cleared.
2766 if (m
->gobbled
) { /* can this happen? */
2767 assert( !VM_PAGE_WIRED(m
));
2769 if (!m
->private && !m
->fictitious
)
2770 vm_page_wire_count
--;
2771 vm_page_gobble_count
--;
2774 if (m
->private || (VM_PAGE_WIRED(m
)))
2777 if (!m
->fictitious
&& !m
->absent
&& clear_hw_reference
== TRUE
)
2778 pmap_clear_reference(m
->phys_page
);
2780 m
->reference
= FALSE
;
2781 m
->no_cache
= FALSE
;
2784 VM_PAGE_QUEUES_REMOVE(m
);
2786 assert(!m
->laundry
);
2787 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2789 if (!IP_VALID(memory_manager_default
) &&
2790 m
->dirty
&& m
->object
->internal
&&
2791 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2792 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2793 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2794 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2795 m
->throttled
= TRUE
;
2796 vm_page_throttled_count
++;
2798 if (!m
->fictitious
&& m
->object
->named
&& m
->object
->ref_count
== 1) {
2799 vm_page_speculate(m
, FALSE
);
2800 #if DEVELOPMENT || DEBUG
2801 vm_page_speculative_recreated
++;
2806 queue_enter(&vm_page_queue_zf
, m
, vm_page_t
, pageq
);
2807 vm_zf_queue_count
++;
2809 queue_enter(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
2813 if (!m
->fictitious
) {
2814 vm_page_inactive_count
++;
2815 token_new_pagecount
++;
2824 * Put the specified page on the active list (if appropriate).
2826 * The page queues must be locked.
2831 register vm_page_t m
)
2834 #ifdef FIXME_4778297
2835 assert(m
->object
!= kernel_object
);
2837 assert(m
->phys_page
!= vm_page_guard_addr
);
2839 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2842 assert( !VM_PAGE_WIRED(m
));
2843 if (!m
->private && !m
->fictitious
)
2844 vm_page_wire_count
--;
2845 vm_page_gobble_count
--;
2853 panic("vm_page_activate: already active");
2856 if (m
->speculative
) {
2857 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
2858 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
2861 VM_PAGE_QUEUES_REMOVE(m
);
2863 if ( !VM_PAGE_WIRED(m
)) {
2864 assert(!m
->laundry
);
2865 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2866 if (!IP_VALID(memory_manager_default
) &&
2867 !m
->fictitious
&& m
->dirty
&& m
->object
->internal
&&
2868 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2869 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2870 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2871 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2872 m
->throttled
= TRUE
;
2873 vm_page_throttled_count
++;
2875 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
2878 vm_page_active_count
++;
2880 m
->reference
= TRUE
;
2881 m
->no_cache
= FALSE
;
2888 * vm_page_speculate:
2890 * Put the specified page on the speculative list (if appropriate).
2892 * The page queues must be locked.
2899 struct vm_speculative_age_q
*aq
;
2902 assert(m
->object
!= kernel_object
);
2903 assert(m
->phys_page
!= vm_page_guard_addr
);
2905 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2908 VM_PAGE_QUEUES_REMOVE(m
);
2910 if ( !VM_PAGE_WIRED(m
)) {
2915 clock_get_system_nanotime(&sec
, &nsec
);
2916 ts
.tv_sec
= (unsigned int) sec
;
2919 if (vm_page_speculative_count
== 0) {
2921 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2922 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2924 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2927 * set the timer to begin a new group
2929 aq
->age_ts
.tv_sec
= VM_PAGE_SPECULATIVE_Q_AGE_MS
/ 1000;
2930 aq
->age_ts
.tv_nsec
= (VM_PAGE_SPECULATIVE_Q_AGE_MS
% 1000) * 1000 * NSEC_PER_USEC
;
2932 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2934 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2936 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
2938 speculative_age_index
++;
2940 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2941 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2942 if (speculative_age_index
== speculative_steal_index
) {
2943 speculative_steal_index
= speculative_age_index
+ 1;
2945 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2946 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2948 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2950 if (!queue_empty(&aq
->age_q
))
2951 vm_page_speculate_ageit(aq
);
2953 aq
->age_ts
.tv_sec
= VM_PAGE_SPECULATIVE_Q_AGE_MS
/ 1000;
2954 aq
->age_ts
.tv_nsec
= (VM_PAGE_SPECULATIVE_Q_AGE_MS
% 1000) * 1000 * NSEC_PER_USEC
;
2956 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2959 enqueue_tail(&aq
->age_q
, &m
->pageq
);
2960 m
->speculative
= TRUE
;
2961 vm_page_speculative_count
++;
2964 m
->object
->pages_created
++;
2965 #if DEVELOPMENT || DEBUG
2966 vm_page_speculative_created
++;
2975 * move pages from the specified aging bin to
2976 * the speculative bin that pageout_scan claims from
2978 * The page queues must be locked.
2981 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
2983 struct vm_speculative_age_q
*sq
;
2986 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
2988 if (queue_empty(&sq
->age_q
)) {
2989 sq
->age_q
.next
= aq
->age_q
.next
;
2990 sq
->age_q
.prev
= aq
->age_q
.prev
;
2992 t
= (vm_page_t
)sq
->age_q
.next
;
2993 t
->pageq
.prev
= &sq
->age_q
;
2995 t
= (vm_page_t
)sq
->age_q
.prev
;
2996 t
->pageq
.next
= &sq
->age_q
;
2998 t
= (vm_page_t
)sq
->age_q
.prev
;
2999 t
->pageq
.next
= aq
->age_q
.next
;
3001 t
= (vm_page_t
)aq
->age_q
.next
;
3002 t
->pageq
.prev
= sq
->age_q
.prev
;
3004 t
= (vm_page_t
)aq
->age_q
.prev
;
3005 t
->pageq
.next
= &sq
->age_q
;
3007 sq
->age_q
.prev
= aq
->age_q
.prev
;
3009 queue_init(&aq
->age_q
);
3018 assert(m
->object
!= kernel_object
);
3019 assert(m
->phys_page
!= vm_page_guard_addr
);
3022 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3024 if (m
->active
|| m
->reference
)
3027 if (m
->private || (VM_PAGE_WIRED(m
)))
3030 m
->no_cache
= FALSE
;
3032 VM_PAGE_QUEUES_REMOVE(m
);
3034 assert(!m
->laundry
);
3035 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
3037 queue_enter(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
3040 vm_page_inactive_count
++;
3041 token_new_pagecount
++;
3046 vm_page_reactivate_all_throttled(void)
3048 vm_page_t first_throttled
, last_throttled
;
3049 vm_page_t first_active
;
3051 int extra_active_count
;
3053 extra_active_count
= 0;
3054 vm_page_lock_queues();
3055 if (! queue_empty(&vm_page_queue_throttled
)) {
3057 * Switch "throttled" pages to "active".
3059 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3061 assert(m
->throttled
);
3063 assert(!m
->inactive
);
3064 assert(!m
->speculative
);
3065 assert(!VM_PAGE_WIRED(m
));
3066 if (!m
->fictitious
) {
3067 extra_active_count
++;
3069 m
->throttled
= FALSE
;
3075 * Transfer the entire throttled queue to a regular LRU page queues.
3076 * We insert it at the head of the active queue, so that these pages
3077 * get re-evaluated by the LRU algorithm first, since they've been
3078 * completely out of it until now.
3080 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3081 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3082 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3083 if (queue_empty(&vm_page_queue_active
)) {
3084 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3086 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3088 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3089 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3090 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3093 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3095 queue_init(&vm_page_queue_throttled
);
3097 * Adjust the global page counts.
3099 vm_page_active_count
+= extra_active_count
;
3100 vm_page_throttled_count
= 0;
3102 assert(vm_page_throttled_count
== 0);
3103 assert(queue_empty(&vm_page_queue_throttled
));
3104 vm_page_unlock_queues();
3109 * move pages from the indicated local queue to the global active queue
3110 * its ok to fail if we're below the hard limit and force == FALSE
3111 * the nolocks == TRUE case is to allow this function to be run on
3112 * the hibernate path
3116 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3119 vm_page_t first_local
, last_local
;
3120 vm_page_t first_active
;
3124 if (vm_page_local_q
== NULL
)
3127 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3129 if (nolocks
== FALSE
) {
3130 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3131 if ( !vm_page_trylockspin_queues())
3134 vm_page_lockspin_queues();
3136 VPL_LOCK(&lq
->vpl_lock
);
3138 if (lq
->vpl_count
) {
3140 * Switch "local" pages to "active".
3142 assert(!queue_empty(&lq
->vpl_queue
));
3144 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3148 assert(!m
->inactive
);
3149 assert(!m
->speculative
);
3150 assert(!VM_PAGE_WIRED(m
));
3151 assert(!m
->throttled
);
3152 assert(!m
->fictitious
);
3154 if (m
->local_id
!= lid
)
3155 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3164 if (count
!= lq
->vpl_count
)
3165 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3168 * Transfer the entire local queue to a regular LRU page queues.
3170 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3171 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3172 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3174 if (queue_empty(&vm_page_queue_active
)) {
3175 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3177 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3179 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3180 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3181 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3183 queue_init(&lq
->vpl_queue
);
3185 * Adjust the global page counts.
3187 vm_page_active_count
+= lq
->vpl_count
;
3190 assert(queue_empty(&lq
->vpl_queue
));
3192 if (nolocks
== FALSE
) {
3193 VPL_UNLOCK(&lq
->vpl_lock
);
3194 vm_page_unlock_queues();
3199 * vm_page_part_zero_fill:
3201 * Zero-fill a part of the page.
3204 vm_page_part_zero_fill(
3212 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3213 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3216 tmp
= vm_page_grab();
3217 if (tmp
== VM_PAGE_NULL
) {
3218 vm_page_wait(THREAD_UNINT
);
3223 vm_page_zero_fill(tmp
);
3225 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3227 if((m_pa
+ len
) < PAGE_SIZE
) {
3228 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3229 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3231 vm_page_copy(tmp
,m
);
3238 * vm_page_zero_fill:
3240 * Zero-fill the specified page.
3247 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3248 m
->object
, m
->offset
, m
, 0,0);
3252 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3253 pmap_zero_page(m
->phys_page
);
3257 * vm_page_part_copy:
3259 * copy part of one page to another
3270 VM_PAGE_CHECK(src_m
);
3271 VM_PAGE_CHECK(dst_m
);
3273 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3274 dst_m
->phys_page
, dst_pa
, len
);
3280 * Copy one page to another
3283 * The source page should not be encrypted. The caller should
3284 * make sure the page is decrypted first, if necessary.
3287 int vm_page_copy_cs_validations
= 0;
3288 int vm_page_copy_cs_tainted
= 0;
3296 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3297 src_m
->object
, src_m
->offset
,
3298 dest_m
->object
, dest_m
->offset
,
3301 VM_PAGE_CHECK(src_m
);
3302 VM_PAGE_CHECK(dest_m
);
3306 * The source page should not be encrypted at this point.
3307 * The destination page will therefore not contain encrypted
3308 * data after the copy.
3310 if (src_m
->encrypted
) {
3311 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3313 dest_m
->encrypted
= FALSE
;
3315 if (src_m
->object
!= VM_OBJECT_NULL
&&
3316 src_m
->object
->code_signed
) {
3318 * We're copying a page from a code-signed object.
3319 * Whoever ends up mapping the copy page might care about
3320 * the original page's integrity, so let's validate the
3323 vm_page_copy_cs_validations
++;
3324 vm_page_validate_cs(src_m
);
3327 * Propagate the cs_tainted bit to the copy page. Do not propagate
3328 * the cs_validated bit.
3330 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3331 if (dest_m
->cs_tainted
) {
3332 vm_page_copy_cs_tainted
++;
3335 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3343 printf("vm_page %p: \n", p
);
3344 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3345 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3346 printf(" next=%p\n", p
->next
);
3347 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3348 printf(" wire_count=%u\n", p
->wire_count
);
3350 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3351 (p
->local
? "" : "!"),
3352 (p
->inactive
? "" : "!"),
3353 (p
->active
? "" : "!"),
3354 (p
->pageout_queue
? "" : "!"),
3355 (p
->speculative
? "" : "!"),
3356 (p
->laundry
? "" : "!"));
3357 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3358 (p
->free
? "" : "!"),
3359 (p
->reference
? "" : "!"),
3360 (p
->gobbled
? "" : "!"),
3361 (p
->private ? "" : "!"),
3362 (p
->throttled
? "" : "!"));
3363 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3364 (p
->busy
? "" : "!"),
3365 (p
->wanted
? "" : "!"),
3366 (p
->tabled
? "" : "!"),
3367 (p
->fictitious
? "" : "!"),
3368 (p
->pmapped
? "" : "!"),
3369 (p
->wpmapped
? "" : "!"));
3370 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3371 (p
->pageout
? "" : "!"),
3372 (p
->absent
? "" : "!"),
3373 (p
->error
? "" : "!"),
3374 (p
->dirty
? "" : "!"),
3375 (p
->cleaning
? "" : "!"),
3376 (p
->precious
? "" : "!"),
3377 (p
->clustered
? "" : "!"));
3378 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3379 (p
->overwriting
? "" : "!"),
3380 (p
->restart
? "" : "!"),
3381 (p
->unusual
? "" : "!"),
3382 (p
->encrypted
? "" : "!"),
3383 (p
->encrypted_cleaning
? "" : "!"));
3384 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3385 (p
->list_req_pending
? "" : "!"),
3386 (p
->dump_cleaning
? "" : "!"),
3387 (p
->cs_validated
? "" : "!"),
3388 (p
->cs_tainted
? "" : "!"),
3389 (p
->no_cache
? "" : "!"));
3390 printf(" %szero_fill\n",
3391 (p
->zero_fill
? "" : "!"));
3393 printf("phys_page=0x%x\n", p
->phys_page
);
3397 * Check that the list of pages is ordered by
3398 * ascending physical address and has no holes.
3401 vm_page_verify_contiguous(
3403 unsigned int npages
)
3405 register vm_page_t m
;
3406 unsigned int page_count
;
3407 vm_offset_t prev_addr
;
3409 prev_addr
= pages
->phys_page
;
3411 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3412 if (m
->phys_page
!= prev_addr
+ 1) {
3413 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3414 m
, (long)prev_addr
, m
->phys_page
);
3415 printf("pages %p page_count %d\n", pages
, page_count
);
3416 panic("vm_page_verify_contiguous: not contiguous!");
3418 prev_addr
= m
->phys_page
;
3421 if (page_count
!= npages
) {
3422 printf("pages %p actual count 0x%x but requested 0x%x\n",
3423 pages
, page_count
, npages
);
3424 panic("vm_page_verify_contiguous: count error");
3431 * Check the free lists for proper length etc.
3434 vm_page_verify_free_list(
3436 vm_page_t look_for_page
,
3437 boolean_t expect_page
)
3439 unsigned int npages
;
3442 boolean_t found_page
;
3446 prev_m
= (vm_page_t
) &vm_page_queue_free
[color
];
3447 queue_iterate(&vm_page_queue_free
[color
],
3451 if (m
== look_for_page
) {
3454 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3455 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3456 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3458 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3461 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3463 if ( (m
->phys_page
& vm_color_mask
) != color
)
3464 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3465 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3469 if (look_for_page
!= VM_PAGE_NULL
) {
3470 unsigned int other_color
;
3472 if (expect_page
&& !found_page
) {
3473 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3474 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3475 _vm_page_print(look_for_page
);
3476 for (other_color
= 0;
3477 other_color
< vm_colors
;
3479 if (other_color
== color
)
3481 vm_page_verify_free_list(other_color
, look_for_page
, FALSE
);
3483 panic("vm_page_verify_free_list(color=%u)\n", color
);
3485 if (!expect_page
&& found_page
) {
3486 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3487 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3493 static boolean_t vm_page_verify_free_lists_enabled
= FALSE
;
3495 vm_page_verify_free_lists( void )
3497 unsigned int color
, npages
;
3499 if (! vm_page_verify_free_lists_enabled
)
3504 lck_mtx_lock(&vm_page_queue_free_lock
);
3506 for( color
= 0; color
< vm_colors
; color
++ ) {
3507 npages
+= vm_page_verify_free_list(color
, VM_PAGE_NULL
, FALSE
);
3509 if (npages
!= vm_page_free_count
)
3510 panic("vm_page_verify_free_lists: npages %u free_count %d",
3511 npages
, vm_page_free_count
);
3513 lck_mtx_unlock(&vm_page_queue_free_lock
);
3517 vm_page_queues_assert(
3521 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3522 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3523 _vm_page_print(mem
);
3524 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3526 if (VM_PAGE_WIRED(mem
)) {
3527 assert(!mem
->active
);
3528 assert(!mem
->inactive
);
3529 assert(!mem
->speculative
);
3530 assert(!mem
->throttled
);
3533 #endif /* MACH_ASSERT */
3537 * CONTIGUOUS PAGE ALLOCATION
3539 * Find a region large enough to contain at least n pages
3540 * of contiguous physical memory.
3542 * This is done by traversing the vm_page_t array in a linear fashion
3543 * we assume that the vm_page_t array has the avaiable physical pages in an
3544 * ordered, ascending list... this is currently true of all our implementations
3545 * and must remain so... there can be 'holes' in the array... we also can
3546 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3547 * which use to happen via 'vm_page_convert'... that function was no longer
3548 * being called and was removed...
3550 * The basic flow consists of stabilizing some of the interesting state of
3551 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3552 * sweep at the beginning of the array looking for pages that meet our criterea
3553 * for a 'stealable' page... currently we are pretty conservative... if the page
3554 * meets this criterea and is physically contiguous to the previous page in the 'run'
3555 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3556 * and start to develop a new run... if at this point we've already considered
3557 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3558 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3559 * to other threads trying to acquire free pages (or move pages from q to q),
3560 * and then continue from the spot we left off... we only make 1 pass through the
3561 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3562 * which steals the pages from the queues they're currently on... pages on the free
3563 * queue can be stolen directly... pages that are on any of the other queues
3564 * must be removed from the object they are tabled on... this requires taking the
3565 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3566 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3567 * dump the pages we've currently stolen back to the free list, and pick up our
3568 * scan from the point where we aborted the 'current' run.
3572 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3574 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3579 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3582 #define RESET_STATE_OF_RUN() \
3584 prevcontaddr = -2; \
3586 free_considered = 0; \
3587 substitute_needed = 0; \
3592 * Can we steal in-use (i.e. not free) pages when searching for
3593 * physically-contiguous pages ?
3595 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3597 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
3599 int vm_page_find_contig_debug
= 0;
3603 vm_page_find_contiguous(
3604 unsigned int contig_pages
,
3611 ppnum_t prevcontaddr
;
3613 unsigned int npages
, considered
, scanned
;
3614 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
3615 unsigned int idx_last_contig_page_found
= 0;
3616 int free_considered
, free_available
;
3617 int substitute_needed
;
3620 clock_sec_t tv_start_sec
, tv_end_sec
;
3621 clock_usec_t tv_start_usec
, tv_end_usec
;
3626 int stolen_pages
= 0;
3629 if (contig_pages
== 0)
3630 return VM_PAGE_NULL
;
3633 vm_page_verify_free_lists();
3636 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
3638 vm_page_lock_queues();
3639 lck_mtx_lock(&vm_page_queue_free_lock
);
3641 RESET_STATE_OF_RUN();
3645 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3649 if(flags
& KMA_LOMEM
)
3650 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
3652 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
3654 orig_last_idx
= idx_last_contig_page_found
;
3655 last_idx
= orig_last_idx
;
3657 for (page_idx
= last_idx
, start_idx
= last_idx
;
3658 npages
< contig_pages
&& page_idx
< vm_pages_count
;
3663 page_idx
>= orig_last_idx
) {
3665 * We're back where we started and we haven't
3666 * found any suitable contiguous range. Let's
3672 m
= &vm_pages
[page_idx
];
3674 assert(!m
->fictitious
);
3675 assert(!m
->private);
3677 if (max_pnum
&& m
->phys_page
> max_pnum
) {
3678 /* no more low pages... */
3681 if ( !(flags
& KMA_LOMEM
) && m
->phys_page
<= vm_lopage_poolend
&&
3682 m
->phys_page
>= vm_lopage_poolstart
) {
3684 * don't want to take pages from our
3685 * reserved pool of low memory
3686 * so don't consider it which
3687 * means starting a new run
3689 RESET_STATE_OF_RUN();
3691 } else if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
3695 RESET_STATE_OF_RUN();
3697 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
3698 m
->encrypted
|| m
->encrypted_cleaning
|| m
->cs_validated
|| m
->cs_tainted
||
3699 m
->error
|| m
->absent
|| m
->pageout_queue
|| m
->laundry
|| m
->wanted
|| m
->precious
||
3700 m
->cleaning
|| m
->overwriting
|| m
->restart
|| m
->unusual
|| m
->list_req_pending
||
3703 * page is in a transient state
3704 * or a state we don't want to deal
3705 * with, so don't consider it which
3706 * means starting a new run
3708 RESET_STATE_OF_RUN();
3710 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
) {
3712 * page needs to be on one of our queues
3713 * in order for it to be stable behind the
3714 * locks we hold at this point...
3715 * if not, don't consider it which
3716 * means starting a new run
3718 RESET_STATE_OF_RUN();
3720 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
3722 * pages on the free list are always 'busy'
3723 * so we couldn't test for 'busy' in the check
3724 * for the transient states... pages that are
3725 * 'free' are never 'tabled', so we also couldn't
3726 * test for 'tabled'. So we check here to make
3727 * sure that a non-free page is not busy and is
3728 * tabled on an object...
3729 * if not, don't consider it which
3730 * means starting a new run
3732 RESET_STATE_OF_RUN();
3735 if (m
->phys_page
!= prevcontaddr
+ 1) {
3736 if ((m
->phys_page
& pnum_mask
) != 0) {
3737 RESET_STATE_OF_RUN();
3741 start_idx
= page_idx
;
3742 start_pnum
= m
->phys_page
;
3747 prevcontaddr
= m
->phys_page
;
3754 * This page is not free.
3755 * If we can't steal used pages,
3756 * we have to give up this run
3758 * Otherwise, we might need to
3759 * move the contents of this page
3760 * into a substitute page.
3762 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3763 if (m
->pmapped
|| m
->dirty
) {
3764 substitute_needed
++;
3767 RESET_STATE_OF_RUN();
3771 if ((free_considered
+ substitute_needed
) > free_available
) {
3773 * if we let this run continue
3774 * we will end up dropping the vm_page_free_count
3775 * below the reserve limit... we need to abort
3776 * this run, but we can at least re-consider this
3777 * page... thus the jump back to 'retry'
3779 RESET_STATE_OF_RUN();
3781 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
3786 * free_available == 0
3787 * so can't consider any free pages... if
3788 * we went to retry in this case, we'd
3789 * get stuck looking at the same page
3790 * w/o making any forward progress
3791 * we also want to take this path if we've already
3792 * reached our limit that controls the lock latency
3797 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
3799 lck_mtx_unlock(&vm_page_queue_free_lock
);
3800 vm_page_unlock_queues();
3804 vm_page_lock_queues();
3805 lck_mtx_lock(&vm_page_queue_free_lock
);
3807 RESET_STATE_OF_RUN();
3809 * reset our free page limit since we
3810 * dropped the lock protecting the vm_page_free_queue
3812 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3823 if (npages
!= contig_pages
) {
3826 * We didn't find a contiguous range but we didn't
3827 * start from the very first page.
3828 * Start again from the very first page.
3830 RESET_STATE_OF_RUN();
3831 if( flags
& KMA_LOMEM
)
3832 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
3834 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
3836 page_idx
= last_idx
;
3840 lck_mtx_unlock(&vm_page_queue_free_lock
);
3844 unsigned int cur_idx
;
3845 unsigned int tmp_start_idx
;
3846 vm_object_t locked_object
= VM_OBJECT_NULL
;
3847 boolean_t abort_run
= FALSE
;
3849 assert(page_idx
- start_idx
== contig_pages
);
3851 tmp_start_idx
= start_idx
;
3854 * first pass through to pull the free pages
3855 * off of the free queue so that in case we
3856 * need substitute pages, we won't grab any
3857 * of the free pages in the run... we'll clear
3858 * the 'free' bit in the 2nd pass, and even in
3859 * an abort_run case, we'll collect all of the
3860 * free pages in this run and return them to the free list
3862 while (start_idx
< page_idx
) {
3864 m1
= &vm_pages
[start_idx
++];
3866 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3873 color
= m1
->phys_page
& vm_color_mask
;
3875 vm_page_verify_free_list(color
, m1
, TRUE
);
3877 queue_remove(&vm_page_queue_free
[color
],
3881 m1
->pageq
.next
= NULL
;
3882 m1
->pageq
.prev
= NULL
;
3884 vm_page_verify_free_list(color
, VM_PAGE_NULL
, FALSE
);
3887 * Clear the "free" bit so that this page
3888 * does not get considered for another
3889 * concurrent physically-contiguous allocation.
3894 vm_page_free_count
--;
3898 * adjust global freelist counts
3900 if (vm_page_free_count
< vm_page_free_count_minimum
)
3901 vm_page_free_count_minimum
= vm_page_free_count
;
3903 if( flags
& KMA_LOMEM
)
3904 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
3906 vm_page_find_contiguous_last_idx
= page_idx
;
3909 * we can drop the free queue lock at this point since
3910 * we've pulled any 'free' candidates off of the list
3911 * we need it dropped so that we can do a vm_page_grab
3912 * when substituing for pmapped/dirty pages
3914 lck_mtx_unlock(&vm_page_queue_free_lock
);
3916 start_idx
= tmp_start_idx
;
3917 cur_idx
= page_idx
- 1;
3919 while (start_idx
++ < page_idx
) {
3921 * must go through the list from back to front
3922 * so that the page list is created in the
3923 * correct order - low -> high phys addresses
3925 m1
= &vm_pages
[cur_idx
--];
3928 if (m1
->object
== VM_OBJECT_NULL
) {
3930 * page has already been removed from
3931 * the free list in the 1st pass
3933 assert(m1
->offset
== (vm_object_offset_t
) -1);
3935 assert(!m1
->wanted
);
3936 assert(!m1
->laundry
);
3940 if (abort_run
== TRUE
)
3943 object
= m1
->object
;
3945 if (object
!= locked_object
) {
3946 if (locked_object
) {
3947 vm_object_unlock(locked_object
);
3948 locked_object
= VM_OBJECT_NULL
;
3950 if (vm_object_lock_try(object
))
3951 locked_object
= object
;
3953 if (locked_object
== VM_OBJECT_NULL
||
3954 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
3955 m1
->encrypted
|| m1
->encrypted_cleaning
|| m1
->cs_validated
|| m1
->cs_tainted
||
3956 m1
->error
|| m1
->absent
|| m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
|| m1
->precious
||
3957 m1
->cleaning
|| m1
->overwriting
|| m1
->restart
|| m1
->unusual
|| m1
->list_req_pending
|| m1
->busy
)) {
3959 if (locked_object
) {
3960 vm_object_unlock(locked_object
);
3961 locked_object
= VM_OBJECT_NULL
;
3963 tmp_start_idx
= cur_idx
;
3967 if (m1
->pmapped
|| m1
->dirty
) {
3969 vm_object_offset_t offset
;
3971 m2
= vm_page_grab();
3973 if (m2
== VM_PAGE_NULL
) {
3974 if (locked_object
) {
3975 vm_object_unlock(locked_object
);
3976 locked_object
= VM_OBJECT_NULL
;
3978 tmp_start_idx
= cur_idx
;
3983 refmod
= pmap_disconnect(m1
->phys_page
);
3986 vm_page_copy(m1
, m2
);
3988 m2
->reference
= m1
->reference
;
3989 m2
->dirty
= m1
->dirty
;
3991 if (refmod
& VM_MEM_REFERENCED
)
3992 m2
->reference
= TRUE
;
3993 if (refmod
& VM_MEM_MODIFIED
)
3995 offset
= m1
->offset
;
3998 * completely cleans up the state
3999 * of the page so that it is ready
4000 * to be put onto the free list, or
4001 * for this purpose it looks like it
4002 * just came off of the free list
4004 vm_page_free_prepare(m1
);
4007 * make sure we clear the ref/mod state
4008 * from the pmap layer... else we risk
4009 * inheriting state from the last time
4010 * this page was used...
4012 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
4014 * now put the substitute page on the object
4016 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
);
4019 vm_page_activate(m2
);
4021 vm_page_deactivate(m2
);
4023 PAGE_WAKEUP_DONE(m2
);
4027 * completely cleans up the state
4028 * of the page so that it is ready
4029 * to be put onto the free list, or
4030 * for this purpose it looks like it
4031 * just came off of the free list
4033 vm_page_free_prepare(m1
);
4039 m1
->pageq
.next
= (queue_entry_t
) m
;
4040 m1
->pageq
.prev
= NULL
;
4043 if (locked_object
) {
4044 vm_object_unlock(locked_object
);
4045 locked_object
= VM_OBJECT_NULL
;
4048 if (abort_run
== TRUE
) {
4049 if (m
!= VM_PAGE_NULL
) {
4050 vm_page_free_list(m
, FALSE
);
4056 * want the index of the last
4057 * page in this run that was
4058 * successfully 'stolen', so back
4059 * it up 1 for the auto-decrement on use
4060 * and 1 more to bump back over this page
4062 page_idx
= tmp_start_idx
+ 2;
4063 if (page_idx
>= vm_pages_count
) {
4066 page_idx
= last_idx
= 0;
4072 * We didn't find a contiguous range but we didn't
4073 * start from the very first page.
4074 * Start again from the very first page.
4076 RESET_STATE_OF_RUN();
4078 if( flags
& KMA_LOMEM
)
4079 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4081 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4083 last_idx
= page_idx
;
4085 lck_mtx_lock(&vm_page_queue_free_lock
);
4087 * reset our free page limit since we
4088 * dropped the lock protecting the vm_page_free_queue
4090 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4094 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4102 vm_page_gobble_count
+= npages
;
4105 * gobbled pages are also counted as wired pages
4107 vm_page_wire_count
+= npages
;
4109 assert(vm_page_verify_contiguous(m
, npages
));
4112 vm_page_unlock_queues();
4115 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4117 tv_end_sec
-= tv_start_sec
;
4118 if (tv_end_usec
< tv_start_usec
) {
4120 tv_end_usec
+= 1000000;
4122 tv_end_usec
-= tv_start_usec
;
4123 if (tv_end_usec
>= 1000000) {
4125 tv_end_sec
-= 1000000;
4127 if (vm_page_find_contig_debug
) {
4128 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4129 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4130 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4131 scanned
, yielded
, dumped_run
, stolen_pages
);
4136 vm_page_verify_free_lists();
4142 * Allocate a list of contiguous, wired pages.
4154 unsigned int npages
;
4156 if (size
% page_size
!= 0)
4157 return KERN_INVALID_ARGUMENT
;
4159 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4160 if (npages
!= size
/ PAGE_SIZE
) {
4161 /* 32-bit overflow */
4162 return KERN_INVALID_ARGUMENT
;
4166 * Obtain a pointer to a subset of the free
4167 * list large enough to satisfy the request;
4168 * the region will be physically contiguous.
4170 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4172 if (pages
== VM_PAGE_NULL
)
4173 return KERN_NO_SPACE
;
4175 * determine need for wakeups
4177 if ((vm_page_free_count
< vm_page_free_min
) ||
4178 ((vm_page_free_count
< vm_page_free_target
) &&
4179 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4180 thread_wakeup((event_t
) &vm_page_free_wanted
);
4187 * Decide if we need to poke the memorystatus notification thread.
4190 (vm_page_active_count
+ vm_page_inactive_count
+
4191 vm_page_speculative_count
+ vm_page_free_count
+
4192 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
4194 if (percent_avail
<= (kern_memorystatus_level
- 5)) {
4195 kern_memorystatus_level
= percent_avail
;
4196 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
4201 * The CPM pages should now be available and
4202 * ordered by ascending physical address.
4204 assert(vm_page_verify_contiguous(pages
, npages
));
4207 return KERN_SUCCESS
;
4210 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4212 static vm_page_t hibernate_gobble_queue
;
4215 hibernate_page_list_zero(hibernate_page_list_t
*list
)
4218 hibernate_bitmap_t
* bitmap
;
4220 bitmap
= &list
->bank_bitmap
[0];
4221 for (bank
= 0; bank
< list
->bank_count
; bank
++)
4225 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
4226 // set out-of-bound bits at end of bitmap.
4227 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
4229 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
4231 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
4236 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
4240 uint64_t start
, end
, timeout
, nsec
;
4241 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
4242 clock_get_uptime(&start
);
4244 for (i
= 0; i
< gobble_count
; i
++)
4246 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
4248 clock_get_uptime(&end
);
4258 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
4259 hibernate_gobble_queue
= m
;
4262 clock_get_uptime(&end
);
4263 absolutetime_to_nanoseconds(end
- start
, &nsec
);
4264 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
4268 hibernate_free_gobble_pages(void)
4273 m
= (vm_page_t
) hibernate_gobble_queue
;
4276 next
= (vm_page_t
) m
->pageq
.next
;
4281 hibernate_gobble_queue
= VM_PAGE_NULL
;
4284 HIBLOG("Freed %d pages\n", count
);
4288 hibernate_consider_discard(vm_page_t m
)
4290 vm_object_t object
= NULL
;
4292 boolean_t discard
= FALSE
;
4297 panic("hibernate_consider_discard: private");
4299 if (!vm_object_lock_try(m
->object
))
4304 if (VM_PAGE_WIRED(m
))
4309 if (m
->busy
|| !object
->alive
)
4311 * Somebody is playing with this page.
4315 if (m
->absent
|| m
->unusual
|| m
->error
)
4317 * If it's unusual in anyway, ignore it
4324 if (m
->laundry
|| m
->list_req_pending
)
4329 refmod_state
= pmap_get_refmod(m
->phys_page
);
4331 if (refmod_state
& VM_MEM_REFERENCED
)
4332 m
->reference
= TRUE
;
4333 if (refmod_state
& VM_MEM_MODIFIED
)
4338 * If it's clean or purgeable we can discard the page on wakeup.
4340 discard
= (!m
->dirty
)
4341 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
4342 || (VM_PURGABLE_EMPTY
== m
->object
->purgable
);
4347 vm_object_unlock(object
);
4354 hibernate_discard_page(vm_page_t m
)
4356 if (m
->absent
|| m
->unusual
|| m
->error
)
4358 * If it's unusual in anyway, ignore
4362 if (m
->pmapped
== TRUE
)
4364 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
4368 panic("hibernate_discard_page(%p) laundry", m
);
4370 panic("hibernate_discard_page(%p) private", m
);
4372 panic("hibernate_discard_page(%p) fictitious", m
);
4374 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
4376 /* object should be on a queue */
4377 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
4378 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
4380 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4381 makes sure these locks are uncontended before sleep */
4382 vm_purgeable_token_delete_first(old_queue
);
4383 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
4390 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4391 pages known to VM to not need saving are subtracted.
4392 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4396 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
4397 hibernate_page_list_t
* page_list_wired
,
4398 uint32_t * pagesOut
)
4400 uint64_t start
, end
, nsec
;
4402 uint32_t pages
= page_list
->page_count
;
4403 uint32_t count_zf
= 0, count_throttled
= 0;
4404 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0;
4405 uint32_t count_wire
= pages
;
4406 uint32_t count_discard_active
= 0;
4407 uint32_t count_discard_inactive
= 0;
4408 uint32_t count_discard_purgeable
= 0;
4409 uint32_t count_discard_speculative
= 0;
4412 hibernate_bitmap_t
* bitmap
;
4413 hibernate_bitmap_t
* bitmap_wired
;
4416 HIBLOG("hibernate_page_list_setall start\n");
4418 clock_get_uptime(&start
);
4420 hibernate_page_list_zero(page_list
);
4421 hibernate_page_list_zero(page_list_wired
);
4423 if (vm_page_local_q
) {
4424 for (i
= 0; i
< vm_page_local_q_count
; i
++)
4425 vm_page_reactivate_local(i
, TRUE
, TRUE
);
4428 m
= (vm_page_t
) hibernate_gobble_queue
;
4433 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4434 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4435 m
= (vm_page_t
) m
->pageq
.next
;
4438 for( i
= 0; i
< vm_colors
; i
++ )
4440 queue_iterate(&vm_page_queue_free
[i
],
4447 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4448 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4452 queue_iterate(&vm_lopage_queue_free
,
4459 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4460 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4463 queue_iterate( &vm_page_queue_throttled
,
4468 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
4469 && hibernate_consider_discard(m
))
4471 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4472 count_discard_inactive
++;
4477 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4480 queue_iterate( &vm_page_queue_zf
,
4485 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
4486 && hibernate_consider_discard(m
))
4488 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4490 count_discard_purgeable
++;
4492 count_discard_inactive
++;
4497 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4500 queue_iterate( &vm_page_queue_inactive
,
4505 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
4506 && hibernate_consider_discard(m
))
4508 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4510 count_discard_purgeable
++;
4512 count_discard_inactive
++;
4517 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4520 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
4522 queue_iterate(&vm_page_queue_speculative
[i
].age_q
,
4527 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
4528 && hibernate_consider_discard(m
))
4530 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4531 count_discard_speculative
++;
4534 count_speculative
++;
4536 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4540 queue_iterate( &vm_page_queue_active
,
4545 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
4546 && hibernate_consider_discard(m
))
4548 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4550 count_discard_purgeable
++;
4552 count_discard_active
++;
4557 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4560 // pull wired from hibernate_bitmap
4562 bitmap
= &page_list
->bank_bitmap
[0];
4563 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
4564 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
4566 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
4567 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
4568 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
4569 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
4572 // machine dependent adjustments
4573 hibernate_page_list_setall_machine(page_list
, page_list_wired
, &pages
);
4575 clock_get_uptime(&end
);
4576 absolutetime_to_nanoseconds(end
- start
, &nsec
);
4577 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
4579 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
4580 pages
, count_wire
, count_active
, count_inactive
, count_speculative
, count_zf
, count_throttled
,
4581 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
);
4583 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
;
4587 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
4589 uint64_t start
, end
, nsec
;
4593 uint32_t count_discard_active
= 0;
4594 uint32_t count_discard_inactive
= 0;
4595 uint32_t count_discard_purgeable
= 0;
4596 uint32_t count_discard_speculative
= 0;
4598 clock_get_uptime(&start
);
4600 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
4601 while (m
&& !queue_end(&vm_page_queue_zf
, (queue_entry_t
)m
))
4603 next
= (vm_page_t
) m
->pageq
.next
;
4604 if (hibernate_page_bittst(page_list
, m
->phys_page
))
4607 count_discard_purgeable
++;
4609 count_discard_inactive
++;
4610 hibernate_discard_page(m
);
4615 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
4617 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
4618 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
4620 next
= (vm_page_t
) m
->pageq
.next
;
4621 if (hibernate_page_bittst(page_list
, m
->phys_page
))
4623 count_discard_speculative
++;
4624 hibernate_discard_page(m
);
4630 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
4631 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
4633 next
= (vm_page_t
) m
->pageq
.next
;
4634 if (hibernate_page_bittst(page_list
, m
->phys_page
))
4637 count_discard_purgeable
++;
4639 count_discard_inactive
++;
4640 hibernate_discard_page(m
);
4645 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
4646 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
4648 next
= (vm_page_t
) m
->pageq
.next
;
4649 if (hibernate_page_bittst(page_list
, m
->phys_page
))
4652 count_discard_purgeable
++;
4654 count_discard_active
++;
4655 hibernate_discard_page(m
);
4660 clock_get_uptime(&end
);
4661 absolutetime_to_nanoseconds(end
- start
, &nsec
);
4662 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
4664 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
);
4667 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4669 #include <mach_vm_debug.h>
4672 #include <mach_debug/hash_info.h>
4673 #include <vm/vm_debug.h>
4676 * Routine: vm_page_info
4678 * Return information about the global VP table.
4679 * Fills the buffer with as much information as possible
4680 * and returns the desired size of the buffer.
4682 * Nothing locked. The caller should provide
4683 * possibly-pageable memory.
4688 hash_info_bucket_t
*info
,
4692 lck_spin_t
*bucket_lock
;
4694 if (vm_page_bucket_count
< count
)
4695 count
= vm_page_bucket_count
;
4697 for (i
= 0; i
< count
; i
++) {
4698 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
4699 unsigned int bucket_count
= 0;
4702 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
4703 lck_spin_lock(bucket_lock
);
4705 for (m
= bucket
->pages
; m
!= VM_PAGE_NULL
; m
= m
->next
)
4708 lck_spin_unlock(bucket_lock
);
4710 /* don't touch pageable memory while holding locks */
4711 info
[i
].hib_count
= bucket_count
;
4714 return vm_page_bucket_count
;
4716 #endif /* MACH_VM_DEBUG */
4718 #include <mach_kdb.h>
4721 #include <ddb/db_output.h>
4722 #include <vm/vm_print.h>
4723 #define printf kdbprintf
4726 * Routine: vm_page_print [exported]
4734 p
= (vm_page_t
) (long) db_addr
;
4736 iprintf("page 0x%x\n", p
);
4740 iprintf("object=0x%x", p
->object
);
4741 printf(", offset=0x%x", p
->offset
);
4742 printf(", wire_count=%d", p
->wire_count
);
4744 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
4745 (p
->local
? "" : "!"),
4746 (p
->inactive
? "" : "!"),
4747 (p
->active
? "" : "!"),
4748 (p
->throttled
? "" : "!"),
4749 (p
->gobbled
? "" : "!"),
4750 (p
->laundry
? "" : "!"),
4751 (p
->free
? "" : "!"),
4752 (p
->reference
? "" : "!"),
4753 (p
->encrypted
? "" : "!"));
4754 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
4755 (p
->busy
? "" : "!"),
4756 (p
->wanted
? "" : "!"),
4757 (p
->tabled
? "" : "!"),
4758 (p
->fictitious
? "" : "!"),
4759 (p
->private ? "" : "!"),
4760 (p
->precious
? "" : "!"));
4761 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
4762 (p
->absent
? "" : "!"),
4763 (p
->error
? "" : "!"),
4764 (p
->dirty
? "" : "!"),
4765 (p
->cleaning
? "" : "!"),
4766 (p
->pageout
? "" : "!"),
4767 (p
->clustered
? "" : "!"));
4768 iprintf("%soverwriting, %srestart, %sunusual\n",
4769 (p
->overwriting
? "" : "!"),
4770 (p
->restart
? "" : "!"),
4771 (p
->unusual
? "" : "!"));
4773 iprintf("phys_page=0x%x", p
->phys_page
);
4777 #endif /* MACH_KDB */