2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
88 #include <pexpert/pexpert.h>
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
94 #include <IOKit/IOHibernatePrivate.h>
97 #include <sys/kern_memorystatus.h>
99 #include <sys/kdebug.h>
101 boolean_t vm_page_free_verify
= TRUE
;
103 uint32_t vm_lopage_free_count
= 0;
104 uint32_t vm_lopage_free_limit
= 0;
105 uint32_t vm_lopage_lowater
= 0;
106 boolean_t vm_lopage_refill
= FALSE
;
107 boolean_t vm_lopage_needed
= FALSE
;
109 lck_mtx_ext_t vm_page_queue_lock_ext
;
110 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
111 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
113 int speculative_age_index
= 0;
114 int speculative_steal_index
= 0;
115 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
118 __private_extern__
void vm_page_init_lck_grp(void);
120 static void vm_page_free_prepare(vm_page_t page
);
121 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
127 * Associated with page of user-allocatable memory is a
132 * These variables record the values returned by vm_page_bootstrap,
133 * for debugging purposes. The implementation of pmap_steal_memory
134 * and pmap_startup here also uses them internally.
137 vm_offset_t virtual_space_start
;
138 vm_offset_t virtual_space_end
;
139 uint32_t vm_page_pages
;
142 * The vm_page_lookup() routine, which provides for fast
143 * (virtual memory object, offset) to page lookup, employs
144 * the following hash table. The vm_page_{insert,remove}
145 * routines install and remove associations in the table.
146 * [This table is often called the virtual-to-physical,
151 #if MACH_PAGE_HASH_STATS
152 int cur_count
; /* current count */
153 int hi_count
; /* high water mark */
154 #endif /* MACH_PAGE_HASH_STATS */
158 #define BUCKETS_PER_LOCK 16
160 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
161 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
162 unsigned int vm_page_hash_mask
; /* Mask for hash function */
163 unsigned int vm_page_hash_shift
; /* Shift for hash function */
164 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
165 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
167 lck_spin_t
*vm_page_bucket_locks
;
170 #if MACH_PAGE_HASH_STATS
171 /* This routine is only for debug. It is intended to be called by
172 * hand by a developer using a kernel debugger. This routine prints
173 * out vm_page_hash table statistics to the kernel debug console.
183 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
184 if (vm_page_buckets
[i
].hi_count
) {
186 highsum
+= vm_page_buckets
[i
].hi_count
;
187 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
188 maxdepth
= vm_page_buckets
[i
].hi_count
;
191 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
192 printf("Number used buckets: %d = %d%%\n",
193 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
194 printf("Number unused buckets: %d = %d%%\n",
195 vm_page_bucket_count
- numbuckets
,
196 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
197 printf("Sum of bucket max depth: %d\n", highsum
);
198 printf("Average bucket depth: %d.%2d\n",
199 highsum
/vm_page_bucket_count
,
200 highsum%vm_page_bucket_count
);
201 printf("Maximum bucket depth: %d\n", maxdepth
);
203 #endif /* MACH_PAGE_HASH_STATS */
206 * The virtual page size is currently implemented as a runtime
207 * variable, but is constant once initialized using vm_set_page_size.
208 * This initialization must be done in the machine-dependent
209 * bootstrap sequence, before calling other machine-independent
212 * All references to the virtual page size outside this
213 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
216 vm_size_t page_size
= PAGE_SIZE
;
217 vm_size_t page_mask
= PAGE_MASK
;
218 int page_shift
= PAGE_SHIFT
;
221 * Resident page structures are initialized from
222 * a template (see vm_page_alloc).
224 * When adding a new field to the virtual memory
225 * object structure, be sure to add initialization
226 * (see vm_page_bootstrap).
228 struct vm_page vm_page_template
;
230 vm_page_t vm_pages
= VM_PAGE_NULL
;
231 unsigned int vm_pages_count
= 0;
232 ppnum_t vm_page_lowest
= 0;
235 * Resident pages that represent real memory
236 * are allocated from a set of free lists,
239 unsigned int vm_colors
;
240 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
241 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
242 queue_head_t vm_page_queue_free
[MAX_COLORS
];
243 unsigned int vm_page_free_wanted
;
244 unsigned int vm_page_free_wanted_privileged
;
245 unsigned int vm_page_free_count
;
246 unsigned int vm_page_fictitious_count
;
248 unsigned int vm_page_free_count_minimum
; /* debugging */
251 * Occasionally, the virtual memory system uses
252 * resident page structures that do not refer to
253 * real pages, for example to leave a page with
254 * important state information in the VP table.
256 * These page structures are allocated the way
257 * most other kernel structures are.
260 vm_locks_array_t vm_page_locks
;
261 decl_lck_mtx_data(,vm_page_alloc_lock
)
262 unsigned int io_throttle_zero_fill
;
264 unsigned int vm_page_local_q_count
= 0;
265 unsigned int vm_page_local_q_soft_limit
= 250;
266 unsigned int vm_page_local_q_hard_limit
= 500;
267 struct vplq
*vm_page_local_q
= NULL
;
270 * Fictitious pages don't have a physical address,
271 * but we must initialize phys_page to something.
272 * For debugging, this should be a strange value
273 * that the pmap module can recognize in assertions.
275 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
278 * Guard pages are not accessible so they don't
279 * need a physical address, but we need to enter
281 * Let's make it recognizable and make sure that
282 * we don't use a real physical page with that
285 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
288 * Resident page structures are also chained on
289 * queues that are used by the page replacement
290 * system (pageout daemon). These queues are
291 * defined here, but are shared by the pageout
292 * module. The inactive queue is broken into
293 * inactive and zf for convenience as the
294 * pageout daemon often assignes a higher
295 * affinity to zf pages
297 queue_head_t vm_page_queue_active
;
298 queue_head_t vm_page_queue_inactive
;
299 queue_head_t vm_page_queue_zf
; /* inactive memory queue for zero fill */
300 queue_head_t vm_page_queue_throttled
;
302 unsigned int vm_page_active_count
;
303 unsigned int vm_page_inactive_count
;
304 unsigned int vm_page_throttled_count
;
305 unsigned int vm_page_speculative_count
;
306 unsigned int vm_page_wire_count
;
307 unsigned int vm_page_wire_count_initial
;
308 unsigned int vm_page_gobble_count
= 0;
309 unsigned int vm_page_wire_count_warning
= 0;
310 unsigned int vm_page_gobble_count_warning
= 0;
312 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
313 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
314 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
316 #if DEVELOPMENT || DEBUG
317 unsigned int vm_page_speculative_recreated
= 0;
318 unsigned int vm_page_speculative_created
= 0;
319 unsigned int vm_page_speculative_used
= 0;
322 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
323 ppnum_t max_valid_low_ppnum
= 0xffffffff;
327 * Several page replacement parameters are also
328 * shared with this module, so that page allocation
329 * (done here in vm_page_alloc) can trigger the
332 unsigned int vm_page_free_target
= 0;
333 unsigned int vm_page_free_min
= 0;
334 unsigned int vm_page_throttle_limit
= 0;
335 uint32_t vm_page_creation_throttle
= 0;
336 unsigned int vm_page_inactive_target
= 0;
337 unsigned int vm_page_inactive_min
= 0;
338 unsigned int vm_page_free_reserved
= 0;
339 unsigned int vm_page_throttle_count
= 0;
342 * The VM system has a couple of heuristics for deciding
343 * that pages are "uninteresting" and should be placed
344 * on the inactive queue as likely candidates for replacement.
345 * These variables let the heuristics be controlled at run-time
346 * to make experimentation easier.
349 boolean_t vm_page_deactivate_hint
= TRUE
;
351 struct vm_page_stats_reusable vm_page_stats_reusable
;
356 * Sets the page size, perhaps based upon the memory
357 * size. Must be called before any use of page-size
358 * dependent functions.
360 * Sets page_shift and page_mask from page_size.
363 vm_set_page_size(void)
365 page_mask
= page_size
- 1;
367 if ((page_mask
& page_size
) != 0)
368 panic("vm_set_page_size: page size not a power of two");
370 for (page_shift
= 0; ; page_shift
++)
371 if ((1U << page_shift
) == page_size
)
376 /* Called once during statup, once the cache geometry is known.
379 vm_page_set_colors( void )
381 unsigned int n
, override
;
383 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
385 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
386 n
= vm_cache_geometry_colors
;
387 else n
= DEFAULT_COLORS
; /* use default if all else fails */
391 if ( n
> MAX_COLORS
)
394 /* the count must be a power of 2 */
395 if ( ( n
& (n
- 1)) != 0 )
396 panic("vm_page_set_colors");
399 vm_color_mask
= n
- 1;
403 lck_grp_t vm_page_lck_grp_free
;
404 lck_grp_t vm_page_lck_grp_queue
;
405 lck_grp_t vm_page_lck_grp_local
;
406 lck_grp_t vm_page_lck_grp_purge
;
407 lck_grp_t vm_page_lck_grp_alloc
;
408 lck_grp_t vm_page_lck_grp_bucket
;
409 lck_grp_attr_t vm_page_lck_grp_attr
;
410 lck_attr_t vm_page_lck_attr
;
413 __private_extern__
void
414 vm_page_init_lck_grp(void)
417 * initialze the vm_page lock world
419 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
420 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
421 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
422 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
423 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
424 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
425 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
426 lck_attr_setdefault(&vm_page_lck_attr
);
430 vm_page_init_local_q()
432 unsigned int num_cpus
;
434 struct vplq
*t_local_q
;
436 num_cpus
= ml_get_max_cpus();
439 * no point in this for a uni-processor system
442 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
444 for (i
= 0; i
< num_cpus
; i
++) {
447 lq
= &t_local_q
[i
].vpl_un
.vpl
;
448 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
449 queue_init(&lq
->vpl_queue
);
452 vm_page_local_q_count
= num_cpus
;
454 vm_page_local_q
= (struct vplq
*)t_local_q
;
462 * Initializes the resident memory module.
464 * Allocates memory for the page cells, and
465 * for the object/offset-to-page hash table headers.
466 * Each page cell is initialized and placed on the free list.
467 * Returns the range of available kernel virtual memory.
475 register vm_page_t m
;
482 * Initialize the vm_page template.
485 m
= &vm_page_template
;
486 bzero(m
, sizeof (*m
));
488 m
->pageq
.next
= NULL
;
489 m
->pageq
.prev
= NULL
;
490 m
->listq
.next
= NULL
;
491 m
->listq
.prev
= NULL
;
492 m
->next
= VM_PAGE_NULL
;
494 m
->object
= VM_OBJECT_NULL
; /* reset later */
495 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
501 m
->pageout_queue
= FALSE
;
502 m
->speculative
= FALSE
;
505 m
->reference
= FALSE
;
508 m
->throttled
= FALSE
;
509 m
->__unused_pageq_bits
= 0;
511 m
->phys_page
= 0; /* reset later */
516 m
->fictitious
= FALSE
;
525 m
->clustered
= FALSE
;
526 m
->overwriting
= FALSE
;
529 m
->encrypted
= FALSE
;
530 m
->encrypted_cleaning
= FALSE
;
531 m
->list_req_pending
= FALSE
;
532 m
->dump_cleaning
= FALSE
;
533 m
->cs_validated
= FALSE
;
534 m
->cs_tainted
= FALSE
;
536 m
->zero_fill
= FALSE
;
539 m
->__unused_object_bits
= 0;
543 * Initialize the page queues.
545 vm_page_init_lck_grp();
547 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
548 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
549 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
551 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
554 purgeable_queues
[i
].token_q_head
= 0;
555 purgeable_queues
[i
].token_q_tail
= 0;
556 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
557 queue_init(&purgeable_queues
[i
].objq
[group
]);
559 purgeable_queues
[i
].type
= i
;
560 purgeable_queues
[i
].new_pages
= 0;
562 purgeable_queues
[i
].debug_count_tokens
= 0;
563 purgeable_queues
[i
].debug_count_objects
= 0;
567 for (i
= 0; i
< MAX_COLORS
; i
++ )
568 queue_init(&vm_page_queue_free
[i
]);
570 queue_init(&vm_lopage_queue_free
);
571 queue_init(&vm_page_queue_active
);
572 queue_init(&vm_page_queue_inactive
);
573 queue_init(&vm_page_queue_throttled
);
574 queue_init(&vm_page_queue_zf
);
576 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
577 queue_init(&vm_page_queue_speculative
[i
].age_q
);
579 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
580 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
582 vm_page_free_wanted
= 0;
583 vm_page_free_wanted_privileged
= 0;
585 vm_page_set_colors();
589 * Steal memory for the map and zone subsystems.
592 vm_map_steal_memory();
596 * Allocate (and initialize) the virtual-to-physical
597 * table hash buckets.
599 * The number of buckets should be a power of two to
600 * get a good hash function. The following computation
601 * chooses the first power of two that is greater
602 * than the number of physical pages in the system.
605 if (vm_page_bucket_count
== 0) {
606 unsigned int npages
= pmap_free_pages();
608 vm_page_bucket_count
= 1;
609 while (vm_page_bucket_count
< npages
)
610 vm_page_bucket_count
<<= 1;
612 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
614 vm_page_hash_mask
= vm_page_bucket_count
- 1;
617 * Calculate object shift value for hashing algorithm:
618 * O = log2(sizeof(struct vm_object))
619 * B = log2(vm_page_bucket_count)
620 * hash shifts the object left by
623 size
= vm_page_bucket_count
;
624 for (log1
= 0; size
> 1; log1
++)
626 size
= sizeof(struct vm_object
);
627 for (log2
= 0; size
> 1; log2
++)
629 vm_page_hash_shift
= log1
/2 - log2
+ 1;
631 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
632 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
633 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
635 if (vm_page_hash_mask
& vm_page_bucket_count
)
636 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
638 vm_page_buckets
= (vm_page_bucket_t
*)
639 pmap_steal_memory(vm_page_bucket_count
*
640 sizeof(vm_page_bucket_t
));
642 vm_page_bucket_locks
= (lck_spin_t
*)
643 pmap_steal_memory(vm_page_bucket_lock_count
*
646 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
647 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
649 bucket
->pages
= VM_PAGE_NULL
;
650 #if MACH_PAGE_HASH_STATS
651 bucket
->cur_count
= 0;
652 bucket
->hi_count
= 0;
653 #endif /* MACH_PAGE_HASH_STATS */
656 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
657 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
660 * Machine-dependent code allocates the resident page table.
661 * It uses vm_page_init to initialize the page frames.
662 * The code also returns to us the virtual space available
663 * to the kernel. We don't trust the pmap module
664 * to get the alignment right.
667 pmap_startup(&virtual_space_start
, &virtual_space_end
);
668 virtual_space_start
= round_page(virtual_space_start
);
669 virtual_space_end
= trunc_page(virtual_space_end
);
671 *startp
= virtual_space_start
;
672 *endp
= virtual_space_end
;
675 * Compute the initial "wire" count.
676 * Up until now, the pages which have been set aside are not under
677 * the VM system's control, so although they aren't explicitly
678 * wired, they nonetheless can't be moved. At this moment,
679 * all VM managed pages are "free", courtesy of pmap_startup.
681 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
682 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
683 vm_page_wire_count_initial
= vm_page_wire_count
;
684 vm_page_free_count_minimum
= vm_page_free_count
;
686 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
687 vm_page_free_count
, vm_page_wire_count
);
689 simple_lock_init(&vm_paging_lock
, 0);
692 #ifndef MACHINE_PAGES
694 * We implement pmap_steal_memory and pmap_startup with the help
695 * of two simpler functions, pmap_virtual_space and pmap_next_page.
702 vm_offset_t addr
, vaddr
;
706 * We round the size to a round multiple.
709 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
712 * If this is the first call to pmap_steal_memory,
713 * we have to initialize ourself.
716 if (virtual_space_start
== virtual_space_end
) {
717 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
720 * The initial values must be aligned properly, and
721 * we don't trust the pmap module to do it right.
724 virtual_space_start
= round_page(virtual_space_start
);
725 virtual_space_end
= trunc_page(virtual_space_end
);
729 * Allocate virtual memory for this request.
732 addr
= virtual_space_start
;
733 virtual_space_start
+= size
;
735 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
738 * Allocate and map physical pages to back new virtual pages.
741 for (vaddr
= round_page(addr
);
743 vaddr
+= PAGE_SIZE
) {
745 if (!pmap_next_page_hi(&phys_page
))
746 panic("pmap_steal_memory");
749 * XXX Logically, these mappings should be wired,
750 * but some pmap modules barf if they are.
752 #if defined(__LP64__)
753 pmap_pre_expand(kernel_pmap
, vaddr
);
756 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
757 VM_PROT_READ
|VM_PROT_WRITE
,
758 VM_WIMG_USE_DEFAULT
, FALSE
);
760 * Account for newly stolen memory
762 vm_page_wire_count
++;
766 return (void *) addr
;
774 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
779 * We calculate how many page frames we will have
780 * and then allocate the page structures in one chunk.
783 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
784 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
785 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
787 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
790 * Initialize the page frames.
792 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
793 if (!pmap_next_page(&phys_page
))
795 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
796 vm_page_lowest
= phys_page
;
798 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
802 vm_pages_count
= pages_initialized
;
805 * Check if we want to initialize pages to a known value
807 fill
= 0; /* Assume no fill */
808 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
810 // -debug code remove
811 if (2 == vm_himemory_mode
) {
812 // free low -> high so high is preferred
813 for (i
= 1; i
<= pages_initialized
; i
++) {
814 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
815 vm_page_release(&vm_pages
[i
- 1]);
819 // debug code remove-
822 * Release pages in reverse order so that physical pages
823 * initially get allocated in ascending addresses. This keeps
824 * the devices (which must address physical memory) happy if
825 * they require several consecutive pages.
827 for (i
= pages_initialized
; i
> 0; i
--) {
828 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
829 vm_page_release(&vm_pages
[i
- 1]);
834 vm_page_t xx
, xxo
, xxl
;
837 j
= 0; /* (BRINGUP) */
840 for( i
= 0; i
< vm_colors
; i
++ ) {
841 queue_iterate(&vm_page_queue_free
[i
],
844 pageq
) { /* BRINGUP */
846 if(j
> vm_page_free_count
) { /* (BRINGUP) */
847 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
850 l
= vm_page_free_count
- j
; /* (BRINGUP) */
851 k
= 0; /* (BRINGUP) */
853 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
855 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
857 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
858 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
859 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
867 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
868 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
875 * We have to re-align virtual_space_start,
876 * because pmap_steal_memory has been using it.
879 virtual_space_start
= round_page(virtual_space_start
);
881 *startp
= virtual_space_start
;
882 *endp
= virtual_space_end
;
884 #endif /* MACHINE_PAGES */
887 * Routine: vm_page_module_init
889 * Second initialization pass, to be done after
890 * the basic VM system is ready.
893 vm_page_module_init(void)
895 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
896 0, PAGE_SIZE
, "vm pages");
899 zone_debug_disable(vm_page_zone
);
900 #endif /* ZONE_DEBUG */
902 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
903 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
904 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
905 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
908 * Adjust zone statistics to account for the real pages allocated
909 * in vm_page_create(). [Q: is this really what we want?]
911 vm_page_zone
->count
+= vm_page_pages
;
912 vm_page_zone
->sum_count
+= vm_page_pages
;
913 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
915 lck_mtx_init(&vm_page_alloc_lock
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
919 * Routine: vm_page_create
921 * After the VM system is up, machine-dependent code
922 * may stumble across more physical memory. For example,
923 * memory that it was reserving for a frame buffer.
924 * vm_page_create turns this memory into available pages.
935 for (phys_page
= start
;
938 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
940 vm_page_more_fictitious();
942 m
->fictitious
= FALSE
;
943 pmap_clear_noencrypt(phys_page
);
953 * Distributes the object/offset key pair among hash buckets.
955 * NOTE: The bucket count must be a power of 2
957 #define vm_page_hash(object, offset) (\
958 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
963 * vm_page_insert: [ internal use only ]
965 * Inserts the given mem entry into the object/object-page
966 * table and object list.
968 * The object must be locked.
974 vm_object_offset_t offset
)
976 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
);
980 vm_page_insert_internal(
983 vm_object_offset_t offset
,
984 boolean_t queues_lock_held
,
985 boolean_t insert_in_hash
)
987 vm_page_bucket_t
*bucket
;
988 lck_spin_t
*bucket_lock
;
992 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
993 object
, offset
, mem
, 0,0);
997 if (object
== vm_submap_object
) {
998 /* the vm_submap_object is only a placeholder for submaps */
999 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset
);
1002 vm_object_lock_assert_exclusive(object
);
1004 lck_mtx_assert(&vm_page_queue_lock
,
1005 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1006 : LCK_MTX_ASSERT_NOTOWNED
);
1009 if (insert_in_hash
== TRUE
) {
1011 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1012 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1013 "already in (obj=%p,off=0x%llx)",
1014 mem
, object
, offset
, mem
->object
, mem
->offset
);
1016 assert(!object
->internal
|| offset
< object
->vo_size
);
1018 /* only insert "pageout" pages into "pageout" objects,
1019 * and normal pages into normal objects */
1020 assert(object
->pageout
== mem
->pageout
);
1022 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1025 * Record the object/offset pair in this page
1028 mem
->object
= object
;
1029 mem
->offset
= offset
;
1032 * Insert it into the object_object/offset hash table
1034 hash_id
= vm_page_hash(object
, offset
);
1035 bucket
= &vm_page_buckets
[hash_id
];
1036 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1038 lck_spin_lock(bucket_lock
);
1040 mem
->next
= bucket
->pages
;
1041 bucket
->pages
= mem
;
1042 #if MACH_PAGE_HASH_STATS
1043 if (++bucket
->cur_count
> bucket
->hi_count
)
1044 bucket
->hi_count
= bucket
->cur_count
;
1045 #endif /* MACH_PAGE_HASH_STATS */
1047 lck_spin_unlock(bucket_lock
);
1050 { unsigned int cache_attr
;
1052 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1054 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1055 pmap_set_cache_attributes(mem
->phys_page
, cache_attr
);
1056 object
->set_cache_attr
= TRUE
;
1060 * Now link into the object's list of backed pages.
1063 VM_PAGE_INSERT(mem
, object
);
1067 * Show that the object has one more resident page.
1070 object
->resident_page_count
++;
1071 if (VM_PAGE_WIRED(mem
)) {
1072 object
->wired_page_count
++;
1074 assert(object
->resident_page_count
>= object
->wired_page_count
);
1076 assert(!mem
->reusable
);
1078 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1079 if (VM_PAGE_WIRED(mem
)) {
1080 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
1082 OSAddAtomic(1, &vm_page_purgeable_count
);
1084 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1087 * This page belongs to a purged VM object but hasn't
1088 * been purged (because it was "busy").
1089 * It's in the "throttled" queue and hence not
1090 * visible to vm_pageout_scan(). Move it to a pageable
1091 * queue, so that it can eventually be reclaimed, instead
1092 * of lingering in the "empty" object.
1094 if (queues_lock_held
== FALSE
)
1095 vm_page_lockspin_queues();
1096 vm_page_deactivate(mem
);
1097 if (queues_lock_held
== FALSE
)
1098 vm_page_unlock_queues();
1105 * Exactly like vm_page_insert, except that we first
1106 * remove any existing page at the given offset in object.
1108 * The object must be locked.
1112 register vm_page_t mem
,
1113 register vm_object_t object
,
1114 register vm_object_offset_t offset
)
1116 vm_page_bucket_t
*bucket
;
1117 vm_page_t found_m
= VM_PAGE_NULL
;
1118 lck_spin_t
*bucket_lock
;
1122 vm_object_lock_assert_exclusive(object
);
1124 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1125 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1126 "already in (obj=%p,off=0x%llx)",
1127 mem
, object
, offset
, mem
->object
, mem
->offset
);
1128 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1131 * Record the object/offset pair in this page
1134 mem
->object
= object
;
1135 mem
->offset
= offset
;
1138 * Insert it into the object_object/offset hash table,
1139 * replacing any page that might have been there.
1142 hash_id
= vm_page_hash(object
, offset
);
1143 bucket
= &vm_page_buckets
[hash_id
];
1144 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1146 lck_spin_lock(bucket_lock
);
1148 if (bucket
->pages
) {
1149 vm_page_t
*mp
= &bucket
->pages
;
1153 if (m
->object
== object
&& m
->offset
== offset
) {
1155 * Remove old page from hash list
1163 } while ((m
= *mp
));
1165 mem
->next
= bucket
->pages
;
1167 mem
->next
= VM_PAGE_NULL
;
1170 * insert new page at head of hash list
1172 bucket
->pages
= mem
;
1174 lck_spin_unlock(bucket_lock
);
1178 * there was already a page at the specified
1179 * offset for this object... remove it from
1180 * the object and free it back to the free list
1182 vm_page_free_unlocked(found_m
, FALSE
);
1184 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
);
1188 * vm_page_remove: [ internal use only ]
1190 * Removes the given mem entry from the object/offset-page
1191 * table and the object page list.
1193 * The object must be locked.
1199 boolean_t remove_from_hash
)
1201 vm_page_bucket_t
*bucket
;
1203 lck_spin_t
*bucket_lock
;
1207 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1208 mem
->object
, mem
->offset
,
1211 vm_object_lock_assert_exclusive(mem
->object
);
1212 assert(mem
->tabled
);
1213 assert(!mem
->cleaning
);
1216 if (remove_from_hash
== TRUE
) {
1218 * Remove from the object_object/offset hash table
1220 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1221 bucket
= &vm_page_buckets
[hash_id
];
1222 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1224 lck_spin_lock(bucket_lock
);
1226 if ((this = bucket
->pages
) == mem
) {
1227 /* optimize for common case */
1229 bucket
->pages
= mem
->next
;
1233 for (prev
= &this->next
;
1234 (this = *prev
) != mem
;
1239 #if MACH_PAGE_HASH_STATS
1240 bucket
->cur_count
--;
1241 #endif /* MACH_PAGE_HASH_STATS */
1243 lck_spin_unlock(bucket_lock
);
1246 * Now remove from the object's list of backed pages.
1249 VM_PAGE_REMOVE(mem
);
1252 * And show that the object has one fewer resident
1256 assert(mem
->object
->resident_page_count
> 0);
1257 mem
->object
->resident_page_count
--;
1259 if (!mem
->object
->internal
&& (mem
->object
->objq
.next
|| mem
->object
->objq
.prev
)) {
1260 if (mem
->object
->resident_page_count
== 0)
1261 vm_object_cache_remove(mem
->object
);
1264 if (VM_PAGE_WIRED(mem
)) {
1265 assert(mem
->object
->wired_page_count
> 0);
1266 mem
->object
->wired_page_count
--;
1268 assert(mem
->object
->resident_page_count
>=
1269 mem
->object
->wired_page_count
);
1270 if (mem
->reusable
) {
1271 assert(mem
->object
->reusable_page_count
> 0);
1272 mem
->object
->reusable_page_count
--;
1273 assert(mem
->object
->reusable_page_count
<=
1274 mem
->object
->resident_page_count
);
1275 mem
->reusable
= FALSE
;
1276 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1277 vm_page_stats_reusable
.reused_remove
++;
1278 } else if (mem
->object
->all_reusable
) {
1279 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1280 vm_page_stats_reusable
.reused_remove
++;
1283 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1284 if (VM_PAGE_WIRED(mem
)) {
1285 assert(vm_page_purgeable_wired_count
> 0);
1286 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1288 assert(vm_page_purgeable_count
> 0);
1289 OSAddAtomic(-1, &vm_page_purgeable_count
);
1292 if (mem
->object
->set_cache_attr
== TRUE
)
1293 pmap_set_cache_attributes(mem
->phys_page
, 0);
1295 mem
->tabled
= FALSE
;
1296 mem
->object
= VM_OBJECT_NULL
;
1297 mem
->offset
= (vm_object_offset_t
) -1;
1304 * Returns the page associated with the object/offset
1305 * pair specified; if none is found, VM_PAGE_NULL is returned.
1307 * The object must be locked. No side effects.
1310 unsigned long vm_page_lookup_hint
= 0;
1311 unsigned long vm_page_lookup_hint_next
= 0;
1312 unsigned long vm_page_lookup_hint_prev
= 0;
1313 unsigned long vm_page_lookup_hint_miss
= 0;
1314 unsigned long vm_page_lookup_bucket_NULL
= 0;
1315 unsigned long vm_page_lookup_miss
= 0;
1321 vm_object_offset_t offset
)
1324 vm_page_bucket_t
*bucket
;
1326 lck_spin_t
*bucket_lock
;
1329 vm_object_lock_assert_held(object
);
1330 mem
= object
->memq_hint
;
1332 if (mem
!= VM_PAGE_NULL
) {
1333 assert(mem
->object
== object
);
1335 if (mem
->offset
== offset
) {
1336 vm_page_lookup_hint
++;
1339 qe
= queue_next(&mem
->listq
);
1341 if (! queue_end(&object
->memq
, qe
)) {
1342 vm_page_t next_page
;
1344 next_page
= (vm_page_t
) qe
;
1345 assert(next_page
->object
== object
);
1347 if (next_page
->offset
== offset
) {
1348 vm_page_lookup_hint_next
++;
1349 object
->memq_hint
= next_page
; /* new hint */
1353 qe
= queue_prev(&mem
->listq
);
1355 if (! queue_end(&object
->memq
, qe
)) {
1356 vm_page_t prev_page
;
1358 prev_page
= (vm_page_t
) qe
;
1359 assert(prev_page
->object
== object
);
1361 if (prev_page
->offset
== offset
) {
1362 vm_page_lookup_hint_prev
++;
1363 object
->memq_hint
= prev_page
; /* new hint */
1369 * Search the hash table for this object/offset pair
1371 hash_id
= vm_page_hash(object
, offset
);
1372 bucket
= &vm_page_buckets
[hash_id
];
1375 * since we hold the object lock, we are guaranteed that no
1376 * new pages can be inserted into this object... this in turn
1377 * guarantess that the page we're looking for can't exist
1378 * if the bucket it hashes to is currently NULL even when looked
1379 * at outside the scope of the hash bucket lock... this is a
1380 * really cheap optimiztion to avoid taking the lock
1382 if (bucket
->pages
== VM_PAGE_NULL
) {
1383 vm_page_lookup_bucket_NULL
++;
1385 return (VM_PAGE_NULL
);
1387 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1389 lck_spin_lock(bucket_lock
);
1391 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
1393 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1396 lck_spin_unlock(bucket_lock
);
1398 if (mem
!= VM_PAGE_NULL
) {
1399 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1400 vm_page_lookup_hint_miss
++;
1402 assert(mem
->object
== object
);
1403 object
->memq_hint
= mem
;
1405 vm_page_lookup_miss
++;
1414 * Move the given memory entry from its
1415 * current object to the specified target object/offset.
1417 * The object must be locked.
1421 register vm_page_t mem
,
1422 register vm_object_t new_object
,
1423 vm_object_offset_t new_offset
,
1424 boolean_t encrypted_ok
)
1426 assert(mem
->object
!= new_object
);
1430 * The encryption key is based on the page's memory object
1431 * (aka "pager") and paging offset. Moving the page to
1432 * another VM object changes its "pager" and "paging_offset"
1433 * so it has to be decrypted first, or we would lose the key.
1435 * One exception is VM object collapsing, where we transfer pages
1436 * from one backing object to its parent object. This operation also
1437 * transfers the paging information, so the <pager,paging_offset> info
1438 * should remain consistent. The caller (vm_object_do_collapse())
1439 * sets "encrypted_ok" in this case.
1441 if (!encrypted_ok
&& mem
->encrypted
) {
1442 panic("vm_page_rename: page %p is encrypted\n", mem
);
1446 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1447 new_object
, new_offset
,
1451 * Changes to mem->object require the page lock because
1452 * the pageout daemon uses that lock to get the object.
1454 vm_page_lockspin_queues();
1456 vm_page_remove(mem
, TRUE
);
1457 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
);
1459 vm_page_unlock_queues();
1465 * Initialize the fields in a new page.
1466 * This takes a structure with random values and initializes it
1467 * so that it can be given to vm_page_release or vm_page_insert.
1478 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
1479 if (!(pmap_valid_page(phys_page
))) {
1480 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
1484 *mem
= vm_page_template
;
1485 mem
->phys_page
= phys_page
;
1488 * we're leaving this turned off for now... currently pages
1489 * come off the free list and are either immediately dirtied/referenced
1490 * due to zero-fill or COW faults, or are used to read or write files...
1491 * in the file I/O case, the UPL mechanism takes care of clearing
1492 * the state of the HW ref/mod bits in a somewhat fragile way.
1493 * Since we may change the way this works in the future (to toughen it up),
1494 * I'm leaving this as a reminder of where these bits could get cleared
1498 * make sure both the h/w referenced and modified bits are
1499 * clear at this point... we are especially dependent on
1500 * not finding a 'stale' h/w modified in a number of spots
1501 * once this page goes back into use
1503 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
1505 mem
->lopage
= lopage
;
1509 * vm_page_grab_fictitious:
1511 * Remove a fictitious page from the free list.
1512 * Returns VM_PAGE_NULL if there are no free pages.
1514 int c_vm_page_grab_fictitious
= 0;
1515 int c_vm_page_grab_fictitious_failed
= 0;
1516 int c_vm_page_release_fictitious
= 0;
1517 int c_vm_page_more_fictitious
= 0;
1520 vm_page_grab_fictitious_common(
1525 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
1527 vm_page_init(m
, phys_addr
, FALSE
);
1528 m
->fictitious
= TRUE
;
1530 c_vm_page_grab_fictitious
++;
1532 c_vm_page_grab_fictitious_failed
++;
1538 vm_page_grab_fictitious(void)
1540 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1544 vm_page_grab_guard(void)
1546 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1551 * vm_page_release_fictitious:
1553 * Release a fictitious page to the zone pool
1556 vm_page_release_fictitious(
1560 assert(m
->fictitious
);
1561 assert(m
->phys_page
== vm_page_fictitious_addr
||
1562 m
->phys_page
== vm_page_guard_addr
);
1564 c_vm_page_release_fictitious
++;
1566 zfree(vm_page_zone
, m
);
1570 * vm_page_more_fictitious:
1572 * Add more fictitious pages to the zone.
1573 * Allowed to block. This routine is way intimate
1574 * with the zones code, for several reasons:
1575 * 1. we need to carve some page structures out of physical
1576 * memory before zones work, so they _cannot_ come from
1578 * 2. the zone needs to be collectable in order to prevent
1579 * growth without bound. These structures are used by
1580 * the device pager (by the hundreds and thousands), as
1581 * private pages for pageout, and as blocking pages for
1582 * pagein. Temporary bursts in demand should not result in
1583 * permanent allocation of a resource.
1584 * 3. To smooth allocation humps, we allocate single pages
1585 * with kernel_memory_allocate(), and cram them into the
1589 void vm_page_more_fictitious(void)
1592 kern_return_t retval
;
1594 c_vm_page_more_fictitious
++;
1597 * Allocate a single page from the zone_map. Do not wait if no physical
1598 * pages are immediately available, and do not zero the space. We need
1599 * our own blocking lock here to prevent having multiple,
1600 * simultaneous requests from piling up on the zone_map lock. Exactly
1601 * one (of our) threads should be potentially waiting on the map lock.
1602 * If winner is not vm-privileged, then the page allocation will fail,
1603 * and it will temporarily block here in the vm_page_wait().
1605 lck_mtx_lock(&vm_page_alloc_lock
);
1607 * If another thread allocated space, just bail out now.
1609 if (zone_free_count(vm_page_zone
) > 5) {
1611 * The number "5" is a small number that is larger than the
1612 * number of fictitious pages that any single caller will
1613 * attempt to allocate. Otherwise, a thread will attempt to
1614 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1615 * release all of the resources and locks already acquired,
1616 * and then call this routine. This routine finds the pages
1617 * that the caller released, so fails to allocate new space.
1618 * The process repeats infinitely. The largest known number
1619 * of fictitious pages required in this manner is 2. 5 is
1620 * simply a somewhat larger number.
1622 lck_mtx_unlock(&vm_page_alloc_lock
);
1626 retval
= kernel_memory_allocate(zone_map
,
1627 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1628 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1629 if (retval
!= KERN_SUCCESS
) {
1631 * No page was available. Drop the
1632 * lock to give another thread a chance at it, and
1633 * wait for the pageout daemon to make progress.
1635 lck_mtx_unlock(&vm_page_alloc_lock
);
1636 vm_page_wait(THREAD_UNINT
);
1639 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
1641 lck_mtx_unlock(&vm_page_alloc_lock
);
1648 * Return true if it is not likely that a non-vm_privileged thread
1649 * can get memory without blocking. Advisory only, since the
1650 * situation may change under us.
1655 /* No locking, at worst we will fib. */
1656 return( vm_page_free_count
<= vm_page_free_reserved
);
1662 * this is an interface to support bring-up of drivers
1663 * on platforms with physical memory > 4G...
1665 int vm_himemory_mode
= 0;
1669 * this interface exists to support hardware controllers
1670 * incapable of generating DMAs with more than 32 bits
1671 * of address on platforms with physical memory > 4G...
1673 unsigned int vm_lopages_allocated_q
= 0;
1674 unsigned int vm_lopages_allocated_cpm_success
= 0;
1675 unsigned int vm_lopages_allocated_cpm_failed
= 0;
1676 queue_head_t vm_lopage_queue_free
;
1679 vm_page_grablo(void)
1683 if (vm_lopage_needed
== FALSE
)
1684 return (vm_page_grab());
1686 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1688 if ( !queue_empty(&vm_lopage_queue_free
)) {
1689 queue_remove_first(&vm_lopage_queue_free
,
1693 assert(vm_lopage_free_count
);
1695 vm_lopage_free_count
--;
1696 vm_lopages_allocated_q
++;
1698 if (vm_lopage_free_count
< vm_lopage_lowater
)
1699 vm_lopage_refill
= TRUE
;
1701 lck_mtx_unlock(&vm_page_queue_free_lock
);
1703 lck_mtx_unlock(&vm_page_queue_free_lock
);
1705 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
1707 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1708 vm_lopages_allocated_cpm_failed
++;
1709 lck_mtx_unlock(&vm_page_queue_free_lock
);
1711 return (VM_PAGE_NULL
);
1715 vm_page_lockspin_queues();
1717 mem
->gobbled
= FALSE
;
1718 vm_page_gobble_count
--;
1719 vm_page_wire_count
--;
1721 vm_lopages_allocated_cpm_success
++;
1722 vm_page_unlock_queues();
1726 assert(!mem
->pmapped
);
1727 assert(!mem
->wpmapped
);
1728 assert(!pmap_is_noencrypt(mem
->phys_page
));
1730 mem
->pageq
.next
= NULL
;
1731 mem
->pageq
.prev
= NULL
;
1740 * first try to grab a page from the per-cpu free list...
1741 * this must be done while pre-emption is disabled... if
1742 * a page is available, we're done...
1743 * if no page is available, grab the vm_page_queue_free_lock
1744 * and see if current number of free pages would allow us
1745 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1746 * if there are pages available, disable preemption and
1747 * recheck the state of the per-cpu free list... we could
1748 * have been preempted and moved to a different cpu, or
1749 * some other thread could have re-filled it... if still
1750 * empty, figure out how many pages we can steal from the
1751 * global free queue and move to the per-cpu queue...
1752 * return 1 of these pages when done... only wakeup the
1753 * pageout_scan thread if we moved pages from the global
1754 * list... no need for the wakeup if we've satisfied the
1755 * request from the per-cpu queue.
1758 #define COLOR_GROUPS_TO_STEAL 4
1762 vm_page_grab( void )
1767 disable_preemption();
1769 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1770 return_page_from_cpu_list
:
1771 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1772 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
1773 mem
->pageq
.next
= NULL
;
1775 enable_preemption();
1777 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1778 assert(mem
->tabled
== FALSE
);
1779 assert(mem
->object
== VM_OBJECT_NULL
);
1780 assert(!mem
->laundry
);
1782 assert(pmap_verify_free(mem
->phys_page
));
1784 assert(!mem
->encrypted
);
1785 assert(!mem
->pmapped
);
1786 assert(!mem
->wpmapped
);
1787 assert(!mem
->active
);
1788 assert(!mem
->inactive
);
1789 assert(!mem
->throttled
);
1790 assert(!mem
->speculative
);
1791 assert(!pmap_is_noencrypt(mem
->phys_page
));
1795 enable_preemption();
1799 * Optionally produce warnings if the wire or gobble
1800 * counts exceed some threshold.
1802 if (vm_page_wire_count_warning
> 0
1803 && vm_page_wire_count
>= vm_page_wire_count_warning
) {
1804 printf("mk: vm_page_grab(): high wired page count of %d\n",
1805 vm_page_wire_count
);
1806 assert(vm_page_wire_count
< vm_page_wire_count_warning
);
1808 if (vm_page_gobble_count_warning
> 0
1809 && vm_page_gobble_count
>= vm_page_gobble_count_warning
) {
1810 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1811 vm_page_gobble_count
);
1812 assert(vm_page_gobble_count
< vm_page_gobble_count_warning
);
1815 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1818 * Only let privileged threads (involved in pageout)
1819 * dip into the reserved pool.
1821 if ((vm_page_free_count
< vm_page_free_reserved
) &&
1822 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
1823 lck_mtx_unlock(&vm_page_queue_free_lock
);
1829 unsigned int pages_to_steal
;
1832 while ( vm_page_free_count
== 0 ) {
1834 lck_mtx_unlock(&vm_page_queue_free_lock
);
1836 * must be a privileged thread to be
1837 * in this state since a non-privileged
1838 * thread would have bailed if we were
1839 * under the vm_page_free_reserved mark
1842 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1845 disable_preemption();
1847 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1848 lck_mtx_unlock(&vm_page_queue_free_lock
);
1851 * we got preempted and moved to another processor
1852 * or we got preempted and someone else ran and filled the cache
1854 goto return_page_from_cpu_list
;
1856 if (vm_page_free_count
<= vm_page_free_reserved
)
1859 pages_to_steal
= COLOR_GROUPS_TO_STEAL
* vm_colors
;
1861 if (pages_to_steal
> (vm_page_free_count
- vm_page_free_reserved
))
1862 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
1864 color
= PROCESSOR_DATA(current_processor(), start_color
);
1867 while (pages_to_steal
--) {
1868 if (--vm_page_free_count
< vm_page_free_count_minimum
)
1869 vm_page_free_count_minimum
= vm_page_free_count
;
1871 while (queue_empty(&vm_page_queue_free
[color
]))
1872 color
= (color
+ 1) & vm_color_mask
;
1874 queue_remove_first(&vm_page_queue_free
[color
],
1878 mem
->pageq
.next
= NULL
;
1879 mem
->pageq
.prev
= NULL
;
1881 assert(!mem
->active
);
1882 assert(!mem
->inactive
);
1883 assert(!mem
->throttled
);
1884 assert(!mem
->speculative
);
1886 color
= (color
+ 1) & vm_color_mask
;
1891 tail
->pageq
.next
= (queue_t
)mem
;
1894 mem
->pageq
.prev
= NULL
;
1895 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1896 assert(mem
->tabled
== FALSE
);
1897 assert(mem
->object
== VM_OBJECT_NULL
);
1898 assert(!mem
->laundry
);
1902 assert(pmap_verify_free(mem
->phys_page
));
1905 assert(!mem
->encrypted
);
1906 assert(!mem
->pmapped
);
1907 assert(!mem
->wpmapped
);
1908 assert(!pmap_is_noencrypt(mem
->phys_page
));
1910 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
1911 PROCESSOR_DATA(current_processor(), start_color
) = color
;
1914 * satisfy this request
1916 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1918 mem
->pageq
.next
= NULL
;
1920 lck_mtx_unlock(&vm_page_queue_free_lock
);
1922 enable_preemption();
1925 * Decide if we should poke the pageout daemon.
1926 * We do this if the free count is less than the low
1927 * water mark, or if the free count is less than the high
1928 * water mark (but above the low water mark) and the inactive
1929 * count is less than its target.
1931 * We don't have the counts locked ... if they change a little,
1932 * it doesn't really matter.
1934 if ((vm_page_free_count
< vm_page_free_min
) ||
1935 ((vm_page_free_count
< vm_page_free_target
) &&
1936 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
1937 thread_wakeup((event_t
) &vm_page_free_wanted
);
1939 VM_CHECK_MEMORYSTATUS
;
1941 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1949 * Return a page to the free list.
1954 register vm_page_t mem
)
1957 int need_wakeup
= 0;
1958 int need_priv_wakeup
= 0;
1961 assert(!mem
->private && !mem
->fictitious
);
1962 if (vm_page_free_verify
) {
1963 assert(pmap_verify_free(mem
->phys_page
));
1965 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1968 pmap_clear_noencrypt(mem
->phys_page
);
1970 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1973 panic("vm_page_release");
1977 assert(!mem
->laundry
);
1978 assert(mem
->object
== VM_OBJECT_NULL
);
1979 assert(mem
->pageq
.next
== NULL
&&
1980 mem
->pageq
.prev
== NULL
);
1981 assert(mem
->listq
.next
== NULL
&&
1982 mem
->listq
.prev
== NULL
);
1984 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
1985 vm_lopage_free_count
< vm_lopage_free_limit
&&
1986 mem
->phys_page
< max_valid_low_ppnum
) {
1988 * this exists to support hardware controllers
1989 * incapable of generating DMAs with more than 32 bits
1990 * of address on platforms with physical memory > 4G...
1992 queue_enter_first(&vm_lopage_queue_free
,
1996 vm_lopage_free_count
++;
1998 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
1999 vm_lopage_refill
= FALSE
;
2003 mem
->lopage
= FALSE
;
2006 color
= mem
->phys_page
& vm_color_mask
;
2007 queue_enter_first(&vm_page_queue_free
[color
],
2011 vm_page_free_count
++;
2013 * Check if we should wake up someone waiting for page.
2014 * But don't bother waking them unless they can allocate.
2016 * We wakeup only one thread, to prevent starvation.
2017 * Because the scheduling system handles wait queues FIFO,
2018 * if we wakeup all waiting threads, one greedy thread
2019 * can starve multiple niceguy threads. When the threads
2020 * all wakeup, the greedy threads runs first, grabs the page,
2021 * and waits for another page. It will be the first to run
2022 * when the next page is freed.
2024 * However, there is a slight danger here.
2025 * The thread we wake might not use the free page.
2026 * Then the other threads could wait indefinitely
2027 * while the page goes unused. To forestall this,
2028 * the pageout daemon will keep making free pages
2029 * as long as vm_page_free_wanted is non-zero.
2032 assert(vm_page_free_count
> 0);
2033 if (vm_page_free_wanted_privileged
> 0) {
2034 vm_page_free_wanted_privileged
--;
2035 need_priv_wakeup
= 1;
2036 } else if (vm_page_free_wanted
> 0 &&
2037 vm_page_free_count
> vm_page_free_reserved
) {
2038 vm_page_free_wanted
--;
2042 lck_mtx_unlock(&vm_page_queue_free_lock
);
2044 if (need_priv_wakeup
)
2045 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2046 else if (need_wakeup
)
2047 thread_wakeup_one((event_t
) &vm_page_free_count
);
2049 VM_CHECK_MEMORYSTATUS
;
2055 * Wait for a page to become available.
2056 * If there are plenty of free pages, then we don't sleep.
2059 * TRUE: There may be another page, try again
2060 * FALSE: We were interrupted out of our wait, don't try again
2068 * We can't use vm_page_free_reserved to make this
2069 * determination. Consider: some thread might
2070 * need to allocate two pages. The first allocation
2071 * succeeds, the second fails. After the first page is freed,
2072 * a call to vm_page_wait must really block.
2074 kern_return_t wait_result
;
2075 int need_wakeup
= 0;
2076 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2078 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2080 if (is_privileged
&& vm_page_free_count
) {
2081 lck_mtx_unlock(&vm_page_queue_free_lock
);
2084 if (vm_page_free_count
< vm_page_free_target
) {
2086 if (is_privileged
) {
2087 if (vm_page_free_wanted_privileged
++ == 0)
2089 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2091 if (vm_page_free_wanted
++ == 0)
2093 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2095 lck_mtx_unlock(&vm_page_queue_free_lock
);
2096 counter(c_vm_page_wait_block
++);
2099 thread_wakeup((event_t
)&vm_page_free_wanted
);
2101 if (wait_result
== THREAD_WAITING
)
2102 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2104 return(wait_result
== THREAD_AWAKENED
);
2106 lck_mtx_unlock(&vm_page_queue_free_lock
);
2114 * Allocate and return a memory cell associated
2115 * with this VM object/offset pair.
2117 * Object must be locked.
2123 vm_object_offset_t offset
)
2125 register vm_page_t mem
;
2127 vm_object_lock_assert_exclusive(object
);
2128 mem
= vm_page_grab();
2129 if (mem
== VM_PAGE_NULL
)
2130 return VM_PAGE_NULL
;
2132 vm_page_insert(mem
, object
, offset
);
2140 vm_object_offset_t offset
)
2142 register vm_page_t mem
;
2144 vm_object_lock_assert_exclusive(object
);
2145 mem
= vm_page_grablo();
2146 if (mem
== VM_PAGE_NULL
)
2147 return VM_PAGE_NULL
;
2149 vm_page_insert(mem
, object
, offset
);
2156 * vm_page_alloc_guard:
2158 * Allocate a fictitious page which will be used
2159 * as a guard page. The page will be inserted into
2160 * the object and returned to the caller.
2164 vm_page_alloc_guard(
2166 vm_object_offset_t offset
)
2168 register vm_page_t mem
;
2170 vm_object_lock_assert_exclusive(object
);
2171 mem
= vm_page_grab_guard();
2172 if (mem
== VM_PAGE_NULL
)
2173 return VM_PAGE_NULL
;
2175 vm_page_insert(mem
, object
, offset
);
2181 counter(unsigned int c_laundry_pages_freed
= 0;)
2184 * vm_page_free_prepare:
2186 * Removes page from any queue it may be on
2187 * and disassociates it from its VM object.
2189 * Object and page queues must be locked prior to entry.
2192 vm_page_free_prepare(
2195 vm_page_free_prepare_queues(mem
);
2196 vm_page_free_prepare_object(mem
, TRUE
);
2201 vm_page_free_prepare_queues(
2206 assert(!mem
->cleaning
);
2207 assert(!mem
->pageout
);
2209 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2211 panic("vm_page_free: freeing page on free list\n");
2214 vm_object_lock_assert_exclusive(mem
->object
);
2219 * We may have to free a page while it's being laundered
2220 * if we lost its pager (due to a forced unmount, for example).
2221 * We need to call vm_pageout_throttle_up() before removing
2222 * the page from its VM object, so that we can find out on
2223 * which pageout queue the page is on.
2225 vm_pageout_throttle_up(mem
);
2226 counter(++c_laundry_pages_freed
);
2228 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2230 if (VM_PAGE_WIRED(mem
)) {
2232 assert(mem
->object
->wired_page_count
> 0);
2233 mem
->object
->wired_page_count
--;
2234 assert(mem
->object
->resident_page_count
>=
2235 mem
->object
->wired_page_count
);
2237 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2238 OSAddAtomic(+1, &vm_page_purgeable_count
);
2239 assert(vm_page_purgeable_wired_count
> 0);
2240 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2243 if (!mem
->private && !mem
->fictitious
)
2244 vm_page_wire_count
--;
2245 mem
->wire_count
= 0;
2246 assert(!mem
->gobbled
);
2247 } else if (mem
->gobbled
) {
2248 if (!mem
->private && !mem
->fictitious
)
2249 vm_page_wire_count
--;
2250 vm_page_gobble_count
--;
2256 vm_page_free_prepare_object(
2258 boolean_t remove_from_hash
)
2261 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2263 PAGE_WAKEUP(mem
); /* clears wanted */
2266 mem
->private = FALSE
;
2267 mem
->fictitious
= TRUE
;
2268 mem
->phys_page
= vm_page_fictitious_addr
;
2270 if ( !mem
->fictitious
) {
2271 if (mem
->zero_fill
== TRUE
)
2273 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2281 * Returns the given page to the free list,
2282 * disassociating it with any VM object.
2284 * Object and page queues must be locked prior to entry.
2290 vm_page_free_prepare(mem
);
2292 if (mem
->fictitious
) {
2293 vm_page_release_fictitious(mem
);
2295 vm_page_release(mem
);
2301 vm_page_free_unlocked(
2303 boolean_t remove_from_hash
)
2305 vm_page_lockspin_queues();
2306 vm_page_free_prepare_queues(mem
);
2307 vm_page_unlock_queues();
2309 vm_page_free_prepare_object(mem
, remove_from_hash
);
2311 if (mem
->fictitious
) {
2312 vm_page_release_fictitious(mem
);
2314 vm_page_release(mem
);
2319 * Free a list of pages. The list can be up to several hundred pages,
2320 * as blocked up by vm_pageout_scan().
2321 * The big win is not having to take the free list lock once
2322 * per page. We sort the incoming pages into n lists, one for
2328 boolean_t prepare_object
)
2333 int inuse_list_head
= -1;
2335 queue_head_t free_list
[MAX_COLORS
];
2336 int inuse
[MAX_COLORS
];
2338 for (color
= 0; color
< (signed) vm_colors
; color
++) {
2339 queue_init(&free_list
[color
]);
2343 assert(!mem
->inactive
);
2344 assert(!mem
->active
);
2345 assert(!mem
->throttled
);
2347 assert(!mem
->speculative
);
2348 assert(!VM_PAGE_WIRED(mem
));
2349 assert(mem
->pageq
.prev
== NULL
);
2351 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2353 if (prepare_object
== TRUE
)
2354 vm_page_free_prepare_object(mem
, TRUE
);
2356 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2357 assert(pmap_verify_free(mem
->phys_page
));
2360 if (!mem
->fictitious
) {
2362 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2363 vm_lopage_free_count
< vm_lopage_free_limit
&&
2364 mem
->phys_page
< max_valid_low_ppnum
) {
2365 mem
->pageq
.next
= NULL
;
2366 vm_page_release(mem
);
2370 * IMPORTANT: we can't set the page "free" here
2371 * because that would make the page eligible for
2372 * a physically-contiguous allocation (see
2373 * vm_page_find_contiguous()) right away (we don't
2374 * hold the vm_page_queue_free lock). That would
2375 * cause trouble because the page is not actually
2376 * in the free queue yet...
2378 color
= mem
->phys_page
& vm_color_mask
;
2379 if (queue_empty(&free_list
[color
])) {
2380 inuse
[color
] = inuse_list_head
;
2381 inuse_list_head
= color
;
2383 queue_enter_first(&free_list
[color
],
2389 pmap_clear_noencrypt(mem
->phys_page
);
2392 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2393 mem
->phys_page
== vm_page_guard_addr
);
2394 vm_page_release_fictitious(mem
);
2399 unsigned int avail_free_count
;
2400 unsigned int need_wakeup
= 0;
2401 unsigned int need_priv_wakeup
= 0;
2403 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2405 color
= inuse_list_head
;
2407 while( color
!= -1 ) {
2408 vm_page_t first
, last
;
2409 vm_page_t first_free
;
2412 * Now that we hold the vm_page_queue_free lock,
2413 * it's safe to mark all pages in our local queue
2416 queue_iterate(&free_list
[color
],
2426 * ... and insert our local queue at the head of
2427 * the global free queue.
2429 first
= (vm_page_t
) queue_first(&free_list
[color
]);
2430 last
= (vm_page_t
) queue_last(&free_list
[color
]);
2431 first_free
= (vm_page_t
) queue_first(&vm_page_queue_free
[color
]);
2432 if (queue_empty(&vm_page_queue_free
[color
])) {
2433 queue_last(&vm_page_queue_free
[color
]) =
2434 (queue_entry_t
) last
;
2436 queue_prev(&first_free
->pageq
) =
2437 (queue_entry_t
) last
;
2439 queue_first(&vm_page_queue_free
[color
]) =
2440 (queue_entry_t
) first
;
2441 queue_prev(&first
->pageq
) =
2442 (queue_entry_t
) &vm_page_queue_free
[color
];
2443 queue_next(&last
->pageq
) =
2444 (queue_entry_t
) first_free
;
2447 color
= inuse
[color
];
2450 vm_page_free_count
+= pg_count
;
2451 avail_free_count
= vm_page_free_count
;
2453 if (vm_page_free_wanted_privileged
> 0 &&
2454 avail_free_count
> 0) {
2455 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2456 need_priv_wakeup
= avail_free_count
;
2457 vm_page_free_wanted_privileged
-=
2459 avail_free_count
= 0;
2461 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2462 vm_page_free_wanted_privileged
= 0;
2464 vm_page_free_wanted_privileged
;
2468 if (vm_page_free_wanted
> 0 &&
2469 avail_free_count
> vm_page_free_reserved
) {
2470 unsigned int available_pages
;
2472 available_pages
= (avail_free_count
-
2473 vm_page_free_reserved
);
2475 if (available_pages
>= vm_page_free_wanted
) {
2476 need_wakeup
= vm_page_free_wanted
;
2477 vm_page_free_wanted
= 0;
2479 need_wakeup
= available_pages
;
2480 vm_page_free_wanted
-= available_pages
;
2483 lck_mtx_unlock(&vm_page_queue_free_lock
);
2485 if (need_priv_wakeup
!= 0) {
2487 * There shouldn't be that many VM-privileged threads,
2488 * so let's wake them all up, even if we don't quite
2489 * have enough pages to satisfy them all.
2491 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2493 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2495 * We don't expect to have any more waiters
2496 * after this, so let's wake them all up at
2499 thread_wakeup((event_t
) &vm_page_free_count
);
2500 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2502 * Wake up one waiter per page we just released.
2504 thread_wakeup_one((event_t
) &vm_page_free_count
);
2507 VM_CHECK_MEMORYSTATUS
;
2515 * Mark this page as wired down by yet
2516 * another map, removing it from paging queues
2519 * The page's object and the page queues must be locked.
2523 register vm_page_t mem
)
2526 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2530 vm_object_lock_assert_exclusive(mem
->object
);
2533 * In theory, the page should be in an object before it
2534 * gets wired, since we need to hold the object lock
2535 * to update some fields in the page structure.
2536 * However, some code (i386 pmap, for example) might want
2537 * to wire a page before it gets inserted into an object.
2538 * That's somewhat OK, as long as nobody else can get to
2539 * that page and update it at the same time.
2543 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2545 if ( !VM_PAGE_WIRED(mem
)) {
2546 VM_PAGE_QUEUES_REMOVE(mem
);
2549 mem
->object
->wired_page_count
++;
2550 assert(mem
->object
->resident_page_count
>=
2551 mem
->object
->wired_page_count
);
2552 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2553 assert(vm_page_purgeable_count
> 0);
2554 OSAddAtomic(-1, &vm_page_purgeable_count
);
2555 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2557 if (mem
->object
->all_reusable
) {
2559 * Wired pages are not counted as "re-usable"
2560 * in "all_reusable" VM objects, so nothing
2563 } else if (mem
->reusable
) {
2565 * This page is not "re-usable" when it's
2566 * wired, so adjust its state and the
2569 vm_object_reuse_pages(mem
->object
,
2571 mem
->offset
+PAGE_SIZE_64
,
2575 assert(!mem
->reusable
);
2577 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2578 vm_page_wire_count
++;
2580 vm_page_gobble_count
--;
2581 mem
->gobbled
= FALSE
;
2582 if (mem
->zero_fill
== TRUE
) {
2583 mem
->zero_fill
= FALSE
;
2587 VM_CHECK_MEMORYSTATUS
;
2591 * The page could be encrypted, but
2592 * We don't have to decrypt it here
2593 * because we don't guarantee that the
2594 * data is actually valid at this point.
2595 * The page will get decrypted in
2596 * vm_fault_wire() if needed.
2599 assert(!mem
->gobbled
);
2607 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2609 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2613 register vm_page_t mem
)
2615 vm_page_lockspin_queues();
2618 assert(!mem
->gobbled
);
2619 assert( !VM_PAGE_WIRED(mem
));
2621 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2622 if (!mem
->private && !mem
->fictitious
)
2623 vm_page_wire_count
++;
2625 vm_page_gobble_count
++;
2626 mem
->gobbled
= TRUE
;
2627 vm_page_unlock_queues();
2633 * Release one wiring of this page, potentially
2634 * enabling it to be paged again.
2636 * The page's object and the page queues must be locked.
2644 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2647 assert(VM_PAGE_WIRED(mem
));
2648 assert(mem
->object
!= VM_OBJECT_NULL
);
2650 vm_object_lock_assert_exclusive(mem
->object
);
2651 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2653 if (--mem
->wire_count
== 0) {
2654 assert(!mem
->private && !mem
->fictitious
);
2655 vm_page_wire_count
--;
2656 assert(mem
->object
->wired_page_count
> 0);
2657 mem
->object
->wired_page_count
--;
2658 assert(mem
->object
->resident_page_count
>=
2659 mem
->object
->wired_page_count
);
2660 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2661 OSAddAtomic(+1, &vm_page_purgeable_count
);
2662 assert(vm_page_purgeable_wired_count
> 0);
2663 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2665 assert(!mem
->laundry
);
2666 assert(mem
->object
!= kernel_object
);
2667 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
2669 if (queueit
== TRUE
) {
2670 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
2671 vm_page_deactivate(mem
);
2673 vm_page_activate(mem
);
2677 VM_CHECK_MEMORYSTATUS
;
2684 * vm_page_deactivate:
2686 * Returns the given page to the inactive list,
2687 * indicating that no physical maps have access
2688 * to this page. [Used by the physical mapping system.]
2690 * The page queues must be locked.
2696 vm_page_deactivate_internal(m
, TRUE
);
2701 vm_page_deactivate_internal(
2703 boolean_t clear_hw_reference
)
2707 assert(m
->object
!= kernel_object
);
2708 assert(m
->phys_page
!= vm_page_guard_addr
);
2710 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2712 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2715 * This page is no longer very interesting. If it was
2716 * interesting (active or inactive/referenced), then we
2717 * clear the reference bit and (re)enter it in the
2718 * inactive queue. Note wired pages should not have
2719 * their reference bit cleared.
2721 assert ( !(m
->absent
&& !m
->unusual
));
2723 if (m
->gobbled
) { /* can this happen? */
2724 assert( !VM_PAGE_WIRED(m
));
2726 if (!m
->private && !m
->fictitious
)
2727 vm_page_wire_count
--;
2728 vm_page_gobble_count
--;
2731 if (m
->private || m
->fictitious
|| (VM_PAGE_WIRED(m
)))
2734 if (!m
->absent
&& clear_hw_reference
== TRUE
)
2735 pmap_clear_reference(m
->phys_page
);
2737 m
->reference
= FALSE
;
2738 m
->no_cache
= FALSE
;
2741 VM_PAGE_QUEUES_REMOVE(m
);
2743 assert(!m
->laundry
);
2744 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2746 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2747 m
->dirty
&& m
->object
->internal
&&
2748 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2749 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2750 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2751 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2752 m
->throttled
= TRUE
;
2753 vm_page_throttled_count
++;
2755 if (m
->object
->named
&& m
->object
->ref_count
== 1) {
2756 vm_page_speculate(m
, FALSE
);
2757 #if DEVELOPMENT || DEBUG
2758 vm_page_speculative_recreated
++;
2761 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
2770 * Put the specified page on the active list (if appropriate).
2772 * The page queues must be locked.
2777 register vm_page_t m
)
2780 #ifdef FIXME_4778297
2781 assert(m
->object
!= kernel_object
);
2783 assert(m
->phys_page
!= vm_page_guard_addr
);
2785 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2787 assert( !(m
->absent
&& !m
->unusual
));
2790 assert( !VM_PAGE_WIRED(m
));
2791 if (!m
->private && !m
->fictitious
)
2792 vm_page_wire_count
--;
2793 vm_page_gobble_count
--;
2796 if (m
->private || m
->fictitious
)
2801 panic("vm_page_activate: already active");
2804 if (m
->speculative
) {
2805 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
2806 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
2809 VM_PAGE_QUEUES_REMOVE(m
);
2811 if ( !VM_PAGE_WIRED(m
)) {
2812 assert(!m
->laundry
);
2813 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2814 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2815 m
->dirty
&& m
->object
->internal
&&
2816 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2817 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2818 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2819 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2820 m
->throttled
= TRUE
;
2821 vm_page_throttled_count
++;
2823 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
2825 vm_page_active_count
++;
2827 m
->reference
= TRUE
;
2828 m
->no_cache
= FALSE
;
2835 * vm_page_speculate:
2837 * Put the specified page on the speculative list (if appropriate).
2839 * The page queues must be locked.
2846 struct vm_speculative_age_q
*aq
;
2849 assert(m
->object
!= kernel_object
);
2850 assert(m
->phys_page
!= vm_page_guard_addr
);
2852 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2854 assert( !(m
->absent
&& !m
->unusual
));
2856 if (m
->private || m
->fictitious
)
2859 VM_PAGE_QUEUES_REMOVE(m
);
2861 if ( !VM_PAGE_WIRED(m
)) {
2866 clock_get_system_nanotime(&sec
, &nsec
);
2867 ts
.tv_sec
= (unsigned int) sec
;
2870 if (vm_page_speculative_count
== 0) {
2872 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2873 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2875 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2878 * set the timer to begin a new group
2880 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
2881 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
2883 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2885 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2887 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
2889 speculative_age_index
++;
2891 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2892 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2893 if (speculative_age_index
== speculative_steal_index
) {
2894 speculative_steal_index
= speculative_age_index
+ 1;
2896 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2897 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2899 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2901 if (!queue_empty(&aq
->age_q
))
2902 vm_page_speculate_ageit(aq
);
2904 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
2905 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
2907 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2910 enqueue_tail(&aq
->age_q
, &m
->pageq
);
2911 m
->speculative
= TRUE
;
2912 vm_page_speculative_count
++;
2915 vm_object_lock_assert_exclusive(m
->object
);
2917 m
->object
->pages_created
++;
2918 #if DEVELOPMENT || DEBUG
2919 vm_page_speculative_created
++;
2928 * move pages from the specified aging bin to
2929 * the speculative bin that pageout_scan claims from
2931 * The page queues must be locked.
2934 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
2936 struct vm_speculative_age_q
*sq
;
2939 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
2941 if (queue_empty(&sq
->age_q
)) {
2942 sq
->age_q
.next
= aq
->age_q
.next
;
2943 sq
->age_q
.prev
= aq
->age_q
.prev
;
2945 t
= (vm_page_t
)sq
->age_q
.next
;
2946 t
->pageq
.prev
= &sq
->age_q
;
2948 t
= (vm_page_t
)sq
->age_q
.prev
;
2949 t
->pageq
.next
= &sq
->age_q
;
2951 t
= (vm_page_t
)sq
->age_q
.prev
;
2952 t
->pageq
.next
= aq
->age_q
.next
;
2954 t
= (vm_page_t
)aq
->age_q
.next
;
2955 t
->pageq
.prev
= sq
->age_q
.prev
;
2957 t
= (vm_page_t
)aq
->age_q
.prev
;
2958 t
->pageq
.next
= &sq
->age_q
;
2960 sq
->age_q
.prev
= aq
->age_q
.prev
;
2962 queue_init(&aq
->age_q
);
2971 assert(m
->object
!= kernel_object
);
2972 assert(m
->phys_page
!= vm_page_guard_addr
);
2975 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2977 if (m
->active
|| m
->reference
)
2980 if (m
->private || (VM_PAGE_WIRED(m
)))
2983 m
->no_cache
= FALSE
;
2985 VM_PAGE_QUEUES_REMOVE(m
);
2987 assert(!m
->laundry
);
2988 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2990 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
2995 vm_page_reactivate_all_throttled(void)
2997 vm_page_t first_throttled
, last_throttled
;
2998 vm_page_t first_active
;
3000 int extra_active_count
;
3002 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
))
3005 extra_active_count
= 0;
3006 vm_page_lock_queues();
3007 if (! queue_empty(&vm_page_queue_throttled
)) {
3009 * Switch "throttled" pages to "active".
3011 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3013 assert(m
->throttled
);
3015 assert(!m
->inactive
);
3016 assert(!m
->speculative
);
3017 assert(!VM_PAGE_WIRED(m
));
3019 extra_active_count
++;
3021 m
->throttled
= FALSE
;
3027 * Transfer the entire throttled queue to a regular LRU page queues.
3028 * We insert it at the head of the active queue, so that these pages
3029 * get re-evaluated by the LRU algorithm first, since they've been
3030 * completely out of it until now.
3032 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3033 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3034 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3035 if (queue_empty(&vm_page_queue_active
)) {
3036 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3038 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3040 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3041 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3042 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3045 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3047 queue_init(&vm_page_queue_throttled
);
3049 * Adjust the global page counts.
3051 vm_page_active_count
+= extra_active_count
;
3052 vm_page_throttled_count
= 0;
3054 assert(vm_page_throttled_count
== 0);
3055 assert(queue_empty(&vm_page_queue_throttled
));
3056 vm_page_unlock_queues();
3061 * move pages from the indicated local queue to the global active queue
3062 * its ok to fail if we're below the hard limit and force == FALSE
3063 * the nolocks == TRUE case is to allow this function to be run on
3064 * the hibernate path
3068 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3071 vm_page_t first_local
, last_local
;
3072 vm_page_t first_active
;
3076 if (vm_page_local_q
== NULL
)
3079 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3081 if (nolocks
== FALSE
) {
3082 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3083 if ( !vm_page_trylockspin_queues())
3086 vm_page_lockspin_queues();
3088 VPL_LOCK(&lq
->vpl_lock
);
3090 if (lq
->vpl_count
) {
3092 * Switch "local" pages to "active".
3094 assert(!queue_empty(&lq
->vpl_queue
));
3096 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3100 assert(!m
->inactive
);
3101 assert(!m
->speculative
);
3102 assert(!VM_PAGE_WIRED(m
));
3103 assert(!m
->throttled
);
3104 assert(!m
->fictitious
);
3106 if (m
->local_id
!= lid
)
3107 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3116 if (count
!= lq
->vpl_count
)
3117 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3120 * Transfer the entire local queue to a regular LRU page queues.
3122 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3123 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3124 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3126 if (queue_empty(&vm_page_queue_active
)) {
3127 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3129 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3131 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3132 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3133 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3135 queue_init(&lq
->vpl_queue
);
3137 * Adjust the global page counts.
3139 vm_page_active_count
+= lq
->vpl_count
;
3142 assert(queue_empty(&lq
->vpl_queue
));
3144 if (nolocks
== FALSE
) {
3145 VPL_UNLOCK(&lq
->vpl_lock
);
3146 vm_page_unlock_queues();
3151 * vm_page_part_zero_fill:
3153 * Zero-fill a part of the page.
3156 vm_page_part_zero_fill(
3164 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3165 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3168 tmp
= vm_page_grab();
3169 if (tmp
== VM_PAGE_NULL
) {
3170 vm_page_wait(THREAD_UNINT
);
3175 vm_page_zero_fill(tmp
);
3177 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3179 if((m_pa
+ len
) < PAGE_SIZE
) {
3180 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3181 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3183 vm_page_copy(tmp
,m
);
3190 * vm_page_zero_fill:
3192 * Zero-fill the specified page.
3199 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3200 m
->object
, m
->offset
, m
, 0,0);
3204 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3205 pmap_zero_page(m
->phys_page
);
3209 * vm_page_part_copy:
3211 * copy part of one page to another
3222 VM_PAGE_CHECK(src_m
);
3223 VM_PAGE_CHECK(dst_m
);
3225 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3226 dst_m
->phys_page
, dst_pa
, len
);
3232 * Copy one page to another
3235 * The source page should not be encrypted. The caller should
3236 * make sure the page is decrypted first, if necessary.
3239 int vm_page_copy_cs_validations
= 0;
3240 int vm_page_copy_cs_tainted
= 0;
3248 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3249 src_m
->object
, src_m
->offset
,
3250 dest_m
->object
, dest_m
->offset
,
3253 VM_PAGE_CHECK(src_m
);
3254 VM_PAGE_CHECK(dest_m
);
3258 * The source page should not be encrypted at this point.
3259 * The destination page will therefore not contain encrypted
3260 * data after the copy.
3262 if (src_m
->encrypted
) {
3263 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3265 dest_m
->encrypted
= FALSE
;
3267 if (src_m
->object
!= VM_OBJECT_NULL
&&
3268 src_m
->object
->code_signed
) {
3270 * We're copying a page from a code-signed object.
3271 * Whoever ends up mapping the copy page might care about
3272 * the original page's integrity, so let's validate the
3275 vm_page_copy_cs_validations
++;
3276 vm_page_validate_cs(src_m
);
3279 if (vm_page_is_slideable(src_m
)) {
3280 boolean_t was_busy
= src_m
->busy
;
3282 (void) vm_page_slide(src_m
, 0);
3283 assert(src_m
->busy
);
3285 PAGE_WAKEUP_DONE(src_m
);
3290 * Propagate the cs_tainted bit to the copy page. Do not propagate
3291 * the cs_validated bit.
3293 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3294 if (dest_m
->cs_tainted
) {
3295 vm_page_copy_cs_tainted
++;
3297 dest_m
->slid
= src_m
->slid
;
3298 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
3299 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3307 printf("vm_page %p: \n", p
);
3308 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3309 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3310 printf(" next=%p\n", p
->next
);
3311 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3312 printf(" wire_count=%u\n", p
->wire_count
);
3314 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3315 (p
->local
? "" : "!"),
3316 (p
->inactive
? "" : "!"),
3317 (p
->active
? "" : "!"),
3318 (p
->pageout_queue
? "" : "!"),
3319 (p
->speculative
? "" : "!"),
3320 (p
->laundry
? "" : "!"));
3321 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3322 (p
->free
? "" : "!"),
3323 (p
->reference
? "" : "!"),
3324 (p
->gobbled
? "" : "!"),
3325 (p
->private ? "" : "!"),
3326 (p
->throttled
? "" : "!"));
3327 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3328 (p
->busy
? "" : "!"),
3329 (p
->wanted
? "" : "!"),
3330 (p
->tabled
? "" : "!"),
3331 (p
->fictitious
? "" : "!"),
3332 (p
->pmapped
? "" : "!"),
3333 (p
->wpmapped
? "" : "!"));
3334 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3335 (p
->pageout
? "" : "!"),
3336 (p
->absent
? "" : "!"),
3337 (p
->error
? "" : "!"),
3338 (p
->dirty
? "" : "!"),
3339 (p
->cleaning
? "" : "!"),
3340 (p
->precious
? "" : "!"),
3341 (p
->clustered
? "" : "!"));
3342 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3343 (p
->overwriting
? "" : "!"),
3344 (p
->restart
? "" : "!"),
3345 (p
->unusual
? "" : "!"),
3346 (p
->encrypted
? "" : "!"),
3347 (p
->encrypted_cleaning
? "" : "!"));
3348 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3349 (p
->list_req_pending
? "" : "!"),
3350 (p
->dump_cleaning
? "" : "!"),
3351 (p
->cs_validated
? "" : "!"),
3352 (p
->cs_tainted
? "" : "!"),
3353 (p
->no_cache
? "" : "!"));
3354 printf(" %szero_fill\n",
3355 (p
->zero_fill
? "" : "!"));
3357 printf("phys_page=0x%x\n", p
->phys_page
);
3361 * Check that the list of pages is ordered by
3362 * ascending physical address and has no holes.
3365 vm_page_verify_contiguous(
3367 unsigned int npages
)
3369 register vm_page_t m
;
3370 unsigned int page_count
;
3371 vm_offset_t prev_addr
;
3373 prev_addr
= pages
->phys_page
;
3375 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3376 if (m
->phys_page
!= prev_addr
+ 1) {
3377 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3378 m
, (long)prev_addr
, m
->phys_page
);
3379 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
3380 panic("vm_page_verify_contiguous: not contiguous!");
3382 prev_addr
= m
->phys_page
;
3385 if (page_count
!= npages
) {
3386 printf("pages %p actual count 0x%x but requested 0x%x\n",
3387 pages
, page_count
, npages
);
3388 panic("vm_page_verify_contiguous: count error");
3395 * Check the free lists for proper length etc.
3398 vm_page_verify_free_list(
3399 queue_head_t
*vm_page_queue
,
3401 vm_page_t look_for_page
,
3402 boolean_t expect_page
)
3404 unsigned int npages
;
3407 boolean_t found_page
;
3411 prev_m
= (vm_page_t
) vm_page_queue
;
3412 queue_iterate(vm_page_queue
,
3417 if (m
== look_for_page
) {
3420 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3421 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3422 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3424 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3426 if (color
!= (unsigned int) -1) {
3427 if ((m
->phys_page
& vm_color_mask
) != color
)
3428 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3429 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3431 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3437 if (look_for_page
!= VM_PAGE_NULL
) {
3438 unsigned int other_color
;
3440 if (expect_page
&& !found_page
) {
3441 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3442 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3443 _vm_page_print(look_for_page
);
3444 for (other_color
= 0;
3445 other_color
< vm_colors
;
3447 if (other_color
== color
)
3449 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
3450 other_color
, look_for_page
, FALSE
);
3452 if (color
== (unsigned int) -1) {
3453 vm_page_verify_free_list(&vm_lopage_queue_free
,
3454 (unsigned int) -1, look_for_page
, FALSE
);
3456 panic("vm_page_verify_free_list(color=%u)\n", color
);
3458 if (!expect_page
&& found_page
) {
3459 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3460 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3466 static boolean_t vm_page_verify_free_lists_enabled
= FALSE
;
3468 vm_page_verify_free_lists( void )
3470 unsigned int color
, npages
, nlopages
;
3472 if (! vm_page_verify_free_lists_enabled
)
3477 lck_mtx_lock(&vm_page_queue_free_lock
);
3479 for( color
= 0; color
< vm_colors
; color
++ ) {
3480 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
3481 color
, VM_PAGE_NULL
, FALSE
);
3483 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
3485 VM_PAGE_NULL
, FALSE
);
3486 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
3487 panic("vm_page_verify_free_lists: "
3488 "npages %u free_count %d nlopages %u lo_free_count %u",
3489 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
3491 lck_mtx_unlock(&vm_page_queue_free_lock
);
3495 vm_page_queues_assert(
3499 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3500 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3501 _vm_page_print(mem
);
3502 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3504 if (VM_PAGE_WIRED(mem
)) {
3505 assert(!mem
->active
);
3506 assert(!mem
->inactive
);
3507 assert(!mem
->speculative
);
3508 assert(!mem
->throttled
);
3511 #endif /* MACH_ASSERT */
3515 * CONTIGUOUS PAGE ALLOCATION
3517 * Find a region large enough to contain at least n pages
3518 * of contiguous physical memory.
3520 * This is done by traversing the vm_page_t array in a linear fashion
3521 * we assume that the vm_page_t array has the avaiable physical pages in an
3522 * ordered, ascending list... this is currently true of all our implementations
3523 * and must remain so... there can be 'holes' in the array... we also can
3524 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3525 * which use to happen via 'vm_page_convert'... that function was no longer
3526 * being called and was removed...
3528 * The basic flow consists of stabilizing some of the interesting state of
3529 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3530 * sweep at the beginning of the array looking for pages that meet our criterea
3531 * for a 'stealable' page... currently we are pretty conservative... if the page
3532 * meets this criterea and is physically contiguous to the previous page in the 'run'
3533 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3534 * and start to develop a new run... if at this point we've already considered
3535 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3536 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3537 * to other threads trying to acquire free pages (or move pages from q to q),
3538 * and then continue from the spot we left off... we only make 1 pass through the
3539 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3540 * which steals the pages from the queues they're currently on... pages on the free
3541 * queue can be stolen directly... pages that are on any of the other queues
3542 * must be removed from the object they are tabled on... this requires taking the
3543 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3544 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3545 * dump the pages we've currently stolen back to the free list, and pick up our
3546 * scan from the point where we aborted the 'current' run.
3550 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3552 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3557 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3560 #define RESET_STATE_OF_RUN() \
3562 prevcontaddr = -2; \
3564 free_considered = 0; \
3565 substitute_needed = 0; \
3570 * Can we steal in-use (i.e. not free) pages when searching for
3571 * physically-contiguous pages ?
3573 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3575 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
3577 int vm_page_find_contig_debug
= 0;
3581 vm_page_find_contiguous(
3582 unsigned int contig_pages
,
3589 ppnum_t prevcontaddr
;
3591 unsigned int npages
, considered
, scanned
;
3592 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
3593 unsigned int idx_last_contig_page_found
= 0;
3594 int free_considered
, free_available
;
3595 int substitute_needed
;
3598 clock_sec_t tv_start_sec
, tv_end_sec
;
3599 clock_usec_t tv_start_usec
, tv_end_usec
;
3604 int stolen_pages
= 0;
3607 if (contig_pages
== 0)
3608 return VM_PAGE_NULL
;
3611 vm_page_verify_free_lists();
3614 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
3616 vm_page_lock_queues();
3617 lck_mtx_lock(&vm_page_queue_free_lock
);
3619 RESET_STATE_OF_RUN();
3623 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3627 if(flags
& KMA_LOMEM
)
3628 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
3630 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
3632 orig_last_idx
= idx_last_contig_page_found
;
3633 last_idx
= orig_last_idx
;
3635 for (page_idx
= last_idx
, start_idx
= last_idx
;
3636 npages
< contig_pages
&& page_idx
< vm_pages_count
;
3641 page_idx
>= orig_last_idx
) {
3643 * We're back where we started and we haven't
3644 * found any suitable contiguous range. Let's
3650 m
= &vm_pages
[page_idx
];
3652 assert(!m
->fictitious
);
3653 assert(!m
->private);
3655 if (max_pnum
&& m
->phys_page
> max_pnum
) {
3656 /* no more low pages... */
3659 if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
3663 RESET_STATE_OF_RUN();
3665 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
3666 m
->encrypted
|| m
->encrypted_cleaning
|| m
->cs_validated
|| m
->cs_tainted
||
3667 m
->error
|| m
->absent
|| m
->pageout_queue
|| m
->laundry
|| m
->wanted
|| m
->precious
||
3668 m
->cleaning
|| m
->overwriting
|| m
->restart
|| m
->unusual
|| m
->list_req_pending
||
3671 * page is in a transient state
3672 * or a state we don't want to deal
3673 * with, so don't consider it which
3674 * means starting a new run
3676 RESET_STATE_OF_RUN();
3678 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
) {
3680 * page needs to be on one of our queues
3681 * in order for it to be stable behind the
3682 * locks we hold at this point...
3683 * if not, don't consider it which
3684 * means starting a new run
3686 RESET_STATE_OF_RUN();
3688 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
3690 * pages on the free list are always 'busy'
3691 * so we couldn't test for 'busy' in the check
3692 * for the transient states... pages that are
3693 * 'free' are never 'tabled', so we also couldn't
3694 * test for 'tabled'. So we check here to make
3695 * sure that a non-free page is not busy and is
3696 * tabled on an object...
3697 * if not, don't consider it which
3698 * means starting a new run
3700 RESET_STATE_OF_RUN();
3703 if (m
->phys_page
!= prevcontaddr
+ 1) {
3704 if ((m
->phys_page
& pnum_mask
) != 0) {
3705 RESET_STATE_OF_RUN();
3709 start_idx
= page_idx
;
3710 start_pnum
= m
->phys_page
;
3715 prevcontaddr
= m
->phys_page
;
3722 * This page is not free.
3723 * If we can't steal used pages,
3724 * we have to give up this run
3726 * Otherwise, we might need to
3727 * move the contents of this page
3728 * into a substitute page.
3730 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3731 if (m
->pmapped
|| m
->dirty
) {
3732 substitute_needed
++;
3735 RESET_STATE_OF_RUN();
3739 if ((free_considered
+ substitute_needed
) > free_available
) {
3741 * if we let this run continue
3742 * we will end up dropping the vm_page_free_count
3743 * below the reserve limit... we need to abort
3744 * this run, but we can at least re-consider this
3745 * page... thus the jump back to 'retry'
3747 RESET_STATE_OF_RUN();
3749 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
3754 * free_available == 0
3755 * so can't consider any free pages... if
3756 * we went to retry in this case, we'd
3757 * get stuck looking at the same page
3758 * w/o making any forward progress
3759 * we also want to take this path if we've already
3760 * reached our limit that controls the lock latency
3765 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
3767 lck_mtx_unlock(&vm_page_queue_free_lock
);
3768 vm_page_unlock_queues();
3772 vm_page_lock_queues();
3773 lck_mtx_lock(&vm_page_queue_free_lock
);
3775 RESET_STATE_OF_RUN();
3777 * reset our free page limit since we
3778 * dropped the lock protecting the vm_page_free_queue
3780 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3791 if (npages
!= contig_pages
) {
3794 * We didn't find a contiguous range but we didn't
3795 * start from the very first page.
3796 * Start again from the very first page.
3798 RESET_STATE_OF_RUN();
3799 if( flags
& KMA_LOMEM
)
3800 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
3802 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
3804 page_idx
= last_idx
;
3808 lck_mtx_unlock(&vm_page_queue_free_lock
);
3812 unsigned int cur_idx
;
3813 unsigned int tmp_start_idx
;
3814 vm_object_t locked_object
= VM_OBJECT_NULL
;
3815 boolean_t abort_run
= FALSE
;
3817 assert(page_idx
- start_idx
== contig_pages
);
3819 tmp_start_idx
= start_idx
;
3822 * first pass through to pull the free pages
3823 * off of the free queue so that in case we
3824 * need substitute pages, we won't grab any
3825 * of the free pages in the run... we'll clear
3826 * the 'free' bit in the 2nd pass, and even in
3827 * an abort_run case, we'll collect all of the
3828 * free pages in this run and return them to the free list
3830 while (start_idx
< page_idx
) {
3832 m1
= &vm_pages
[start_idx
++];
3834 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3841 color
= m1
->phys_page
& vm_color_mask
;
3843 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, m1
, TRUE
);
3845 queue_remove(&vm_page_queue_free
[color
],
3849 m1
->pageq
.next
= NULL
;
3850 m1
->pageq
.prev
= NULL
;
3852 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, VM_PAGE_NULL
, FALSE
);
3855 * Clear the "free" bit so that this page
3856 * does not get considered for another
3857 * concurrent physically-contiguous allocation.
3862 vm_page_free_count
--;
3866 * adjust global freelist counts
3868 if (vm_page_free_count
< vm_page_free_count_minimum
)
3869 vm_page_free_count_minimum
= vm_page_free_count
;
3871 if( flags
& KMA_LOMEM
)
3872 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
3874 vm_page_find_contiguous_last_idx
= page_idx
;
3877 * we can drop the free queue lock at this point since
3878 * we've pulled any 'free' candidates off of the list
3879 * we need it dropped so that we can do a vm_page_grab
3880 * when substituing for pmapped/dirty pages
3882 lck_mtx_unlock(&vm_page_queue_free_lock
);
3884 start_idx
= tmp_start_idx
;
3885 cur_idx
= page_idx
- 1;
3887 while (start_idx
++ < page_idx
) {
3889 * must go through the list from back to front
3890 * so that the page list is created in the
3891 * correct order - low -> high phys addresses
3893 m1
= &vm_pages
[cur_idx
--];
3896 if (m1
->object
== VM_OBJECT_NULL
) {
3898 * page has already been removed from
3899 * the free list in the 1st pass
3901 assert(m1
->offset
== (vm_object_offset_t
) -1);
3903 assert(!m1
->wanted
);
3904 assert(!m1
->laundry
);
3908 if (abort_run
== TRUE
)
3911 object
= m1
->object
;
3913 if (object
!= locked_object
) {
3914 if (locked_object
) {
3915 vm_object_unlock(locked_object
);
3916 locked_object
= VM_OBJECT_NULL
;
3918 if (vm_object_lock_try(object
))
3919 locked_object
= object
;
3921 if (locked_object
== VM_OBJECT_NULL
||
3922 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
3923 m1
->encrypted
|| m1
->encrypted_cleaning
|| m1
->cs_validated
|| m1
->cs_tainted
||
3924 m1
->error
|| m1
->absent
|| m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
|| m1
->precious
||
3925 m1
->cleaning
|| m1
->overwriting
|| m1
->restart
|| m1
->unusual
|| m1
->list_req_pending
|| m1
->busy
)) {
3927 if (locked_object
) {
3928 vm_object_unlock(locked_object
);
3929 locked_object
= VM_OBJECT_NULL
;
3931 tmp_start_idx
= cur_idx
;
3935 if (m1
->pmapped
|| m1
->dirty
) {
3937 vm_object_offset_t offset
;
3939 m2
= vm_page_grab();
3941 if (m2
== VM_PAGE_NULL
) {
3942 if (locked_object
) {
3943 vm_object_unlock(locked_object
);
3944 locked_object
= VM_OBJECT_NULL
;
3946 tmp_start_idx
= cur_idx
;
3951 refmod
= pmap_disconnect(m1
->phys_page
);
3954 vm_page_copy(m1
, m2
);
3956 m2
->reference
= m1
->reference
;
3957 m2
->dirty
= m1
->dirty
;
3959 if (refmod
& VM_MEM_REFERENCED
)
3960 m2
->reference
= TRUE
;
3961 if (refmod
& VM_MEM_MODIFIED
)
3963 offset
= m1
->offset
;
3966 * completely cleans up the state
3967 * of the page so that it is ready
3968 * to be put onto the free list, or
3969 * for this purpose it looks like it
3970 * just came off of the free list
3972 vm_page_free_prepare(m1
);
3975 * make sure we clear the ref/mod state
3976 * from the pmap layer... else we risk
3977 * inheriting state from the last time
3978 * this page was used...
3980 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
3982 * now put the substitute page on the object
3984 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
);
3987 vm_page_activate(m2
);
3989 vm_page_deactivate(m2
);
3991 PAGE_WAKEUP_DONE(m2
);
3995 * completely cleans up the state
3996 * of the page so that it is ready
3997 * to be put onto the free list, or
3998 * for this purpose it looks like it
3999 * just came off of the free list
4001 vm_page_free_prepare(m1
);
4007 m1
->pageq
.next
= (queue_entry_t
) m
;
4008 m1
->pageq
.prev
= NULL
;
4011 if (locked_object
) {
4012 vm_object_unlock(locked_object
);
4013 locked_object
= VM_OBJECT_NULL
;
4016 if (abort_run
== TRUE
) {
4017 if (m
!= VM_PAGE_NULL
) {
4018 vm_page_free_list(m
, FALSE
);
4024 * want the index of the last
4025 * page in this run that was
4026 * successfully 'stolen', so back
4027 * it up 1 for the auto-decrement on use
4028 * and 1 more to bump back over this page
4030 page_idx
= tmp_start_idx
+ 2;
4031 if (page_idx
>= vm_pages_count
) {
4034 page_idx
= last_idx
= 0;
4040 * We didn't find a contiguous range but we didn't
4041 * start from the very first page.
4042 * Start again from the very first page.
4044 RESET_STATE_OF_RUN();
4046 if( flags
& KMA_LOMEM
)
4047 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4049 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4051 last_idx
= page_idx
;
4053 lck_mtx_lock(&vm_page_queue_free_lock
);
4055 * reset our free page limit since we
4056 * dropped the lock protecting the vm_page_free_queue
4058 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4062 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4070 vm_page_gobble_count
+= npages
;
4073 * gobbled pages are also counted as wired pages
4075 vm_page_wire_count
+= npages
;
4077 assert(vm_page_verify_contiguous(m
, npages
));
4080 vm_page_unlock_queues();
4083 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4085 tv_end_sec
-= tv_start_sec
;
4086 if (tv_end_usec
< tv_start_usec
) {
4088 tv_end_usec
+= 1000000;
4090 tv_end_usec
-= tv_start_usec
;
4091 if (tv_end_usec
>= 1000000) {
4093 tv_end_sec
-= 1000000;
4095 if (vm_page_find_contig_debug
) {
4096 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4097 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4098 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4099 scanned
, yielded
, dumped_run
, stolen_pages
);
4104 vm_page_verify_free_lists();
4110 * Allocate a list of contiguous, wired pages.
4122 unsigned int npages
;
4124 if (size
% PAGE_SIZE
!= 0)
4125 return KERN_INVALID_ARGUMENT
;
4127 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4128 if (npages
!= size
/ PAGE_SIZE
) {
4129 /* 32-bit overflow */
4130 return KERN_INVALID_ARGUMENT
;
4134 * Obtain a pointer to a subset of the free
4135 * list large enough to satisfy the request;
4136 * the region will be physically contiguous.
4138 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4140 if (pages
== VM_PAGE_NULL
)
4141 return KERN_NO_SPACE
;
4143 * determine need for wakeups
4145 if ((vm_page_free_count
< vm_page_free_min
) ||
4146 ((vm_page_free_count
< vm_page_free_target
) &&
4147 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4148 thread_wakeup((event_t
) &vm_page_free_wanted
);
4150 VM_CHECK_MEMORYSTATUS
;
4153 * The CPM pages should now be available and
4154 * ordered by ascending physical address.
4156 assert(vm_page_verify_contiguous(pages
, npages
));
4159 return KERN_SUCCESS
;
4163 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
4166 * when working on a 'run' of pages, it is necessary to hold
4167 * the vm_page_queue_lock (a hot global lock) for certain operations
4168 * on the page... however, the majority of the work can be done
4169 * while merely holding the object lock... in fact there are certain
4170 * collections of pages that don't require any work brokered by the
4171 * vm_page_queue_lock... to mitigate the time spent behind the global
4172 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4173 * while doing all of the work that doesn't require the vm_page_queue_lock...
4174 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4175 * necessary work for each page... we will grab the busy bit on the page
4176 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4177 * if it can't immediately take the vm_page_queue_lock in order to compete
4178 * for the locks in the same order that vm_pageout_scan takes them.
4179 * the operation names are modeled after the names of the routines that
4180 * need to be called in order to make the changes very obvious in the
4185 vm_page_do_delayed_work(
4187 struct vm_page_delayed_work
*dwp
,
4192 vm_page_t local_free_q
= VM_PAGE_NULL
;
4193 boolean_t dropped_obj_lock
= FALSE
;
4196 * pageout_scan takes the vm_page_lock_queues first
4197 * then tries for the object lock... to avoid what
4198 * is effectively a lock inversion, we'll go to the
4199 * trouble of taking them in that same order... otherwise
4200 * if this object contains the majority of the pages resident
4201 * in the UBC (or a small set of large objects actively being
4202 * worked on contain the majority of the pages), we could
4203 * cause the pageout_scan thread to 'starve' in its attempt
4204 * to find pages to move to the free queue, since it has to
4205 * successfully acquire the object lock of any candidate page
4206 * before it can steal/clean it.
4208 if (!vm_page_trylockspin_queues()) {
4209 vm_object_unlock(object
);
4211 vm_page_lockspin_queues();
4213 for (j
= 0; ; j
++) {
4214 if (!vm_object_lock_avoid(object
) &&
4215 _vm_object_lock_try(object
))
4217 vm_page_unlock_queues();
4219 vm_page_lockspin_queues();
4221 dropped_obj_lock
= TRUE
;
4223 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
4227 if (dwp
->dw_mask
& DW_set_list_req_pending
) {
4228 m
->list_req_pending
= TRUE
;
4230 if (dropped_obj_lock
== TRUE
) {
4232 * need to make sure anyone that might have
4233 * blocked on busy == TRUE when we dropped
4234 * the object lock gets a chance to re-evaluate
4235 * its state since we have several places
4236 * where we avoid potential deadlocks with
4237 * the fileysystem by stealing pages with
4238 * list_req_pending == TRUE and busy == TRUE
4240 dwp
->dw_mask
|= DW_PAGE_WAKEUP
;
4243 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
4244 vm_pageout_throttle_up(m
);
4246 if (dwp
->dw_mask
& DW_vm_page_wire
)
4248 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
4251 queueit
= (dwp
->dw_mask
& DW_vm_page_free
) ? FALSE
: TRUE
;
4253 vm_page_unwire(m
, queueit
);
4255 if (dwp
->dw_mask
& DW_vm_page_free
) {
4256 vm_page_free_prepare_queues(m
);
4258 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
4260 * Add this page to our list of reclaimed pages,
4261 * to be freed later.
4263 m
->pageq
.next
= (queue_entry_t
) local_free_q
;
4266 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
4267 vm_page_deactivate_internal(m
, FALSE
);
4268 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
4269 if (m
->active
== FALSE
) {
4270 vm_page_activate(m
);
4273 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
4274 vm_page_speculate(m
, TRUE
);
4275 else if (dwp
->dw_mask
& DW_vm_page_lru
)
4277 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
)
4278 VM_PAGE_QUEUES_REMOVE(m
);
4280 if (dwp
->dw_mask
& DW_set_reference
)
4281 m
->reference
= TRUE
;
4282 else if (dwp
->dw_mask
& DW_clear_reference
)
4283 m
->reference
= FALSE
;
4285 if (dwp
->dw_mask
& DW_move_page
) {
4286 VM_PAGE_QUEUES_REMOVE(m
);
4288 assert(!m
->laundry
);
4289 assert(m
->object
!= kernel_object
);
4290 assert(m
->pageq
.next
== NULL
&&
4291 m
->pageq
.prev
== NULL
);
4293 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
4295 if (dwp
->dw_mask
& DW_clear_busy
)
4298 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
4302 vm_page_unlock_queues();
4305 vm_page_free_list(local_free_q
, TRUE
);
4307 VM_CHECK_MEMORYSTATUS
;
4314 void vm_check_memorystatus()
4317 static boolean_t in_critical
= FALSE
;
4318 static unsigned int last_memorystatus
= 0;
4319 unsigned int pages_avail
;
4321 if (!kern_memorystatus_delta
) {
4325 pages_avail
= (vm_page_active_count
+
4326 vm_page_inactive_count
+
4327 vm_page_speculative_count
+
4328 vm_page_free_count
+
4329 (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) ? 0 : vm_page_purgeable_count
));
4330 if ( (!in_critical
&& (pages_avail
< kern_memorystatus_delta
)) ||
4331 (pages_avail
>= (last_memorystatus
+ kern_memorystatus_delta
)) ||
4332 (last_memorystatus
>= (pages_avail
+ kern_memorystatus_delta
)) ) {
4333 kern_memorystatus_level
= pages_avail
* 100 / atop_64(max_mem
);
4334 last_memorystatus
= pages_avail
;
4336 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
4338 in_critical
= (pages_avail
< kern_memorystatus_delta
) ? TRUE
: FALSE
;
4349 vm_page_t lo_page_list
= VM_PAGE_NULL
;
4353 if ( !(flags
& KMA_LOMEM
))
4354 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4356 for (i
= 0; i
< page_count
; i
++) {
4358 mem
= vm_page_grablo();
4360 if (mem
== VM_PAGE_NULL
) {
4362 vm_page_free_list(lo_page_list
, FALSE
);
4364 *list
= VM_PAGE_NULL
;
4366 return (KERN_RESOURCE_SHORTAGE
);
4368 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
4371 *list
= lo_page_list
;
4373 return (KERN_SUCCESS
);
4377 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
4379 page
->offset
= offset
;
4383 vm_page_get_next(vm_page_t page
)
4385 return ((vm_page_t
) page
->pageq
.next
);
4389 vm_page_get_offset(vm_page_t page
)
4391 return (page
->offset
);
4395 vm_page_get_phys_page(vm_page_t page
)
4397 return (page
->phys_page
);
4401 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4405 static vm_page_t hibernate_gobble_queue
;
4407 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4409 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
4410 static int hibernate_flush_dirty_pages(void);
4411 static int hibernate_flush_queue(queue_head_t
*, int);
4412 static void hibernate_dirty_page(vm_page_t
);
4414 void hibernate_flush_wait(void);
4415 void hibernate_mark_in_progress(void);
4416 void hibernate_clear_in_progress(void);
4419 struct hibernate_statistics
{
4420 int hibernate_considered
;
4421 int hibernate_reentered_on_q
;
4422 int hibernate_found_dirty
;
4423 int hibernate_skipped_cleaning
;
4424 int hibernate_skipped_transient
;
4425 int hibernate_skipped_precious
;
4426 int hibernate_queue_nolock
;
4427 int hibernate_queue_paused
;
4428 int hibernate_throttled
;
4429 int hibernate_throttle_timeout
;
4430 int hibernate_drained
;
4431 int hibernate_drain_timeout
;
4433 int cd_found_precious
;
4436 int cd_found_unusual
;
4437 int cd_found_cleaning
;
4438 int cd_found_laundry
;
4442 int cd_vm_page_wire_count
;
4451 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
4453 wait_result_t wait_result
;
4455 vm_page_lock_queues();
4457 while (q
->pgo_laundry
) {
4459 q
->pgo_draining
= TRUE
;
4461 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
4463 vm_page_unlock_queues();
4465 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4467 if (wait_result
== THREAD_TIMED_OUT
) {
4468 hibernate_stats
.hibernate_drain_timeout
++;
4471 vm_page_lock_queues();
4473 hibernate_stats
.hibernate_drained
++;
4475 vm_page_unlock_queues();
4481 hibernate_dirty_page(vm_page_t m
)
4483 vm_object_t object
= m
->object
;
4484 struct vm_pageout_queue
*q
;
4487 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4489 vm_object_lock_assert_exclusive(object
);
4492 * protect the object from collapse -
4493 * locking in the object's paging_offset.
4495 vm_object_paging_begin(object
);
4497 m
->list_req_pending
= TRUE
;
4501 if (object
->internal
== TRUE
)
4502 q
= &vm_pageout_queue_internal
;
4504 q
= &vm_pageout_queue_external
;
4507 * pgo_laundry count is tied to the laundry bit
4512 m
->pageout_queue
= TRUE
;
4513 queue_enter(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
4515 if (q
->pgo_idle
== TRUE
) {
4516 q
->pgo_idle
= FALSE
;
4517 thread_wakeup((event_t
) &q
->pgo_pending
);
4522 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
4525 vm_object_t l_object
= NULL
;
4526 vm_object_t m_object
= NULL
;
4527 int refmod_state
= 0;
4528 int try_failed_count
= 0;
4530 int current_run
= 0;
4531 struct vm_pageout_queue
*iq
;
4532 struct vm_pageout_queue
*eq
;
4533 struct vm_pageout_queue
*tq
;
4536 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
4538 iq
= &vm_pageout_queue_internal
;
4539 eq
= &vm_pageout_queue_external
;
4541 vm_page_lock_queues();
4543 while (qcount
&& !queue_empty(q
)) {
4545 if (current_run
++ == 1000) {
4546 if (hibernate_should_abort()) {
4553 m
= (vm_page_t
) queue_first(q
);
4554 m_object
= m
->object
;
4557 * check to see if we currently are working
4558 * with the same object... if so, we've
4559 * already got the lock
4561 if (m_object
!= l_object
) {
4563 * the object associated with candidate page is
4564 * different from the one we were just working
4565 * with... dump the lock if we still own it
4567 if (l_object
!= NULL
) {
4568 vm_object_unlock(l_object
);
4572 * Try to lock object; since we've alread got the
4573 * page queues lock, we can only 'try' for this one.
4574 * if the 'try' fails, we need to do a mutex_pause
4575 * to allow the owner of the object lock a chance to
4578 if ( !vm_object_lock_try_scan(m_object
)) {
4580 if (try_failed_count
> 20) {
4581 hibernate_stats
.hibernate_queue_nolock
++;
4583 goto reenter_pg_on_q
;
4585 vm_pageout_scan_wants_object
= m_object
;
4587 vm_page_unlock_queues();
4588 mutex_pause(try_failed_count
++);
4589 vm_page_lock_queues();
4591 hibernate_stats
.hibernate_queue_paused
++;
4594 l_object
= m_object
;
4595 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4598 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->busy
|| m
->absent
|| m
->error
) {
4600 * page is not to be cleaned
4601 * put it back on the head of its queue
4604 hibernate_stats
.hibernate_skipped_cleaning
++;
4606 hibernate_stats
.hibernate_skipped_transient
++;
4608 goto reenter_pg_on_q
;
4610 if ( !m_object
->pager_initialized
&& m_object
->pager_created
)
4611 goto reenter_pg_on_q
;
4613 if (m_object
->copy
== VM_OBJECT_NULL
) {
4614 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
4616 * let the normal hibernate image path
4619 goto reenter_pg_on_q
;
4622 if ( !m
->dirty
&& m
->pmapped
) {
4623 refmod_state
= pmap_get_refmod(m
->phys_page
);
4625 if ((refmod_state
& VM_MEM_MODIFIED
))
4632 * page is not to be cleaned
4633 * put it back on the head of its queue
4636 hibernate_stats
.hibernate_skipped_precious
++;
4638 goto reenter_pg_on_q
;
4642 if (m_object
->internal
) {
4643 if (VM_PAGE_Q_THROTTLED(iq
))
4645 } else if (VM_PAGE_Q_THROTTLED(eq
))
4649 wait_result_t wait_result
;
4652 if (l_object
!= NULL
) {
4653 vm_object_unlock(l_object
);
4656 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4658 tq
->pgo_throttled
= TRUE
;
4660 while (retval
== 0) {
4662 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
4664 vm_page_unlock_queues();
4666 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4668 vm_page_lock_queues();
4670 if (hibernate_should_abort())
4673 if (wait_result
!= THREAD_TIMED_OUT
)
4676 if (--wait_count
== 0) {
4677 hibernate_stats
.hibernate_throttle_timeout
++;
4684 hibernate_stats
.hibernate_throttled
++;
4688 VM_PAGE_QUEUES_REMOVE(m
);
4690 hibernate_dirty_page(m
);
4692 hibernate_stats
.hibernate_found_dirty
++;
4697 queue_remove(q
, m
, vm_page_t
, pageq
);
4698 queue_enter(q
, m
, vm_page_t
, pageq
);
4700 hibernate_stats
.hibernate_reentered_on_q
++;
4702 hibernate_stats
.hibernate_considered
++;
4705 try_failed_count
= 0;
4707 if (l_object
!= NULL
) {
4708 vm_object_unlock(l_object
);
4711 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4713 vm_page_unlock_queues();
4715 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
4722 hibernate_flush_dirty_pages()
4724 struct vm_speculative_age_q
*aq
;
4727 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
4729 if (vm_page_local_q
) {
4730 for (i
= 0; i
< vm_page_local_q_count
; i
++)
4731 vm_page_reactivate_local(i
, TRUE
, FALSE
);
4734 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
4738 aq
= &vm_page_queue_speculative
[i
];
4740 if (queue_empty(&aq
->age_q
))
4744 vm_page_lockspin_queues();
4746 queue_iterate(&aq
->age_q
,
4753 vm_page_unlock_queues();
4756 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
4760 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
))
4762 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_zf_queue_count
))
4764 if (hibernate_flush_queue(&vm_page_queue_zf
, vm_zf_queue_count
))
4767 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
4769 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external
));
4773 extern void IOSleep(unsigned int);
4774 extern int sync_internal(void);
4777 hibernate_flush_memory()
4781 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
4785 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_NONE
, vm_page_free_count
, 0, 0, 0, 0);
4787 if ((retval
= hibernate_flush_dirty_pages()) == 0) {
4788 if (consider_buffer_cache_collect
!= NULL
) {
4790 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, vm_page_wire_count
, 0, 0, 0, 0);
4793 (void)(*consider_buffer_cache_collect
)(1);
4794 consider_zone_gc(TRUE
);
4796 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, vm_page_wire_count
, 0, 0, 0, 0);
4799 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
4801 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4802 hibernate_stats
.hibernate_considered
,
4803 hibernate_stats
.hibernate_reentered_on_q
,
4804 hibernate_stats
.hibernate_found_dirty
);
4805 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4806 hibernate_stats
.hibernate_skipped_cleaning
,
4807 hibernate_stats
.hibernate_skipped_transient
,
4808 hibernate_stats
.hibernate_skipped_precious
,
4809 hibernate_stats
.hibernate_queue_nolock
);
4810 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4811 hibernate_stats
.hibernate_queue_paused
,
4812 hibernate_stats
.hibernate_throttled
,
4813 hibernate_stats
.hibernate_throttle_timeout
,
4814 hibernate_stats
.hibernate_drained
,
4815 hibernate_stats
.hibernate_drain_timeout
);
4822 hibernate_page_list_zero(hibernate_page_list_t
*list
)
4825 hibernate_bitmap_t
* bitmap
;
4827 bitmap
= &list
->bank_bitmap
[0];
4828 for (bank
= 0; bank
< list
->bank_count
; bank
++)
4832 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
4833 // set out-of-bound bits at end of bitmap.
4834 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
4836 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
4838 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
4843 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
4847 uint64_t start
, end
, timeout
, nsec
;
4848 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
4849 clock_get_uptime(&start
);
4851 for (i
= 0; i
< gobble_count
; i
++)
4853 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
4855 clock_get_uptime(&end
);
4865 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
4866 hibernate_gobble_queue
= m
;
4869 clock_get_uptime(&end
);
4870 absolutetime_to_nanoseconds(end
- start
, &nsec
);
4871 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
4875 hibernate_free_gobble_pages(void)
4880 m
= (vm_page_t
) hibernate_gobble_queue
;
4883 next
= (vm_page_t
) m
->pageq
.next
;
4888 hibernate_gobble_queue
= VM_PAGE_NULL
;
4891 HIBLOG("Freed %d pages\n", count
);
4895 hibernate_consider_discard(vm_page_t m
)
4897 vm_object_t object
= NULL
;
4899 boolean_t discard
= FALSE
;
4904 panic("hibernate_consider_discard: private");
4906 if (!vm_object_lock_try(m
->object
)) {
4907 hibernate_stats
.cd_lock_failed
++;
4912 if (VM_PAGE_WIRED(m
)) {
4913 hibernate_stats
.cd_found_wired
++;
4917 hibernate_stats
.cd_found_precious
++;
4920 if (m
->busy
|| !object
->alive
) {
4922 * Somebody is playing with this page.
4924 hibernate_stats
.cd_found_busy
++;
4927 if (m
->absent
|| m
->unusual
|| m
->error
) {
4929 * If it's unusual in anyway, ignore it
4931 hibernate_stats
.cd_found_unusual
++;
4935 hibernate_stats
.cd_found_cleaning
++;
4938 if (m
->laundry
|| m
->list_req_pending
) {
4939 hibernate_stats
.cd_found_laundry
++;
4944 refmod_state
= pmap_get_refmod(m
->phys_page
);
4946 if (refmod_state
& VM_MEM_REFERENCED
)
4947 m
->reference
= TRUE
;
4948 if (refmod_state
& VM_MEM_MODIFIED
)
4953 * If it's clean or purgeable we can discard the page on wakeup.
4955 discard
= (!m
->dirty
)
4956 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
4957 || (VM_PURGABLE_EMPTY
== object
->purgable
);
4959 if (discard
== FALSE
)
4960 hibernate_stats
.cd_found_dirty
++;
4965 vm_object_unlock(object
);
4972 hibernate_discard_page(vm_page_t m
)
4974 if (m
->absent
|| m
->unusual
|| m
->error
)
4976 * If it's unusual in anyway, ignore
4980 if (m
->pmapped
== TRUE
)
4982 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
4986 panic("hibernate_discard_page(%p) laundry", m
);
4988 panic("hibernate_discard_page(%p) private", m
);
4990 panic("hibernate_discard_page(%p) fictitious", m
);
4992 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
4994 /* object should be on a queue */
4995 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
4996 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
4998 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4999 makes sure these locks are uncontended before sleep */
5000 vm_purgeable_token_delete_first(old_queue
);
5001 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
5008 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5009 pages known to VM to not need saving are subtracted.
5010 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5014 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
5015 hibernate_page_list_t
* page_list_wired
,
5016 hibernate_page_list_t
* page_list_pal
,
5017 uint32_t * pagesOut
)
5019 uint64_t start
, end
, nsec
;
5021 uint32_t pages
= page_list
->page_count
;
5022 uint32_t count_zf
= 0, count_throttled
= 0;
5023 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0;
5024 uint32_t count_wire
= pages
;
5025 uint32_t count_discard_active
= 0;
5026 uint32_t count_discard_inactive
= 0;
5027 uint32_t count_discard_purgeable
= 0;
5028 uint32_t count_discard_speculative
= 0;
5031 hibernate_bitmap_t
* bitmap
;
5032 hibernate_bitmap_t
* bitmap_wired
;
5035 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list
, page_list_wired
);
5037 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
5039 clock_get_uptime(&start
);
5041 hibernate_page_list_zero(page_list
);
5042 hibernate_page_list_zero(page_list_wired
);
5043 hibernate_page_list_zero(page_list_pal
);
5045 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
5046 hibernate_stats
.cd_pages
= pages
;
5048 if (vm_page_local_q
) {
5049 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5050 vm_page_reactivate_local(i
, TRUE
, TRUE
);
5053 m
= (vm_page_t
) hibernate_gobble_queue
;
5058 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5059 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5060 m
= (vm_page_t
) m
->pageq
.next
;
5063 for( i
= 0; i
< real_ncpus
; i
++ )
5065 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
5067 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
5071 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5072 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5074 hibernate_stats
.cd_local_free
++;
5075 hibernate_stats
.cd_total_free
++;
5080 for( i
= 0; i
< vm_colors
; i
++ )
5082 queue_iterate(&vm_page_queue_free
[i
],
5089 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5090 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5092 hibernate_stats
.cd_total_free
++;
5096 queue_iterate(&vm_lopage_queue_free
,
5103 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5104 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5106 hibernate_stats
.cd_total_free
++;
5109 queue_iterate( &vm_page_queue_throttled
,
5114 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5115 && hibernate_consider_discard(m
))
5117 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5118 count_discard_inactive
++;
5123 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5126 queue_iterate( &vm_page_queue_zf
,
5131 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5132 && hibernate_consider_discard(m
))
5134 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5136 count_discard_purgeable
++;
5138 count_discard_inactive
++;
5143 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5146 queue_iterate( &vm_page_queue_inactive
,
5151 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5152 && hibernate_consider_discard(m
))
5154 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5156 count_discard_purgeable
++;
5158 count_discard_inactive
++;
5163 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5166 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5168 queue_iterate(&vm_page_queue_speculative
[i
].age_q
,
5173 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5174 && hibernate_consider_discard(m
))
5176 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5177 count_discard_speculative
++;
5180 count_speculative
++;
5182 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5186 queue_iterate( &vm_page_queue_active
,
5191 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
5192 && hibernate_consider_discard(m
))
5194 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5196 count_discard_purgeable
++;
5198 count_discard_active
++;
5203 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5206 // pull wired from hibernate_bitmap
5208 bitmap
= &page_list
->bank_bitmap
[0];
5209 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
5210 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
5212 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
5213 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
5214 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5215 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
5218 // machine dependent adjustments
5219 hibernate_page_list_setall_machine(page_list
, page_list_wired
, &pages
);
5221 hibernate_stats
.cd_count_wire
= count_wire
;
5222 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+ count_discard_speculative
;
5224 clock_get_uptime(&end
);
5225 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5226 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
5228 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5229 pages
, count_wire
, count_active
, count_inactive
, count_speculative
, count_zf
, count_throttled
,
5230 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
);
5232 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
;
5234 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
5238 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
5240 uint64_t start
, end
, nsec
;
5244 uint32_t count_discard_active
= 0;
5245 uint32_t count_discard_inactive
= 0;
5246 uint32_t count_discard_purgeable
= 0;
5247 uint32_t count_discard_speculative
= 0;
5249 clock_get_uptime(&start
);
5251 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
5252 while (m
&& !queue_end(&vm_page_queue_zf
, (queue_entry_t
)m
))
5254 next
= (vm_page_t
) m
->pageq
.next
;
5255 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5258 count_discard_purgeable
++;
5260 count_discard_inactive
++;
5261 hibernate_discard_page(m
);
5266 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5268 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5269 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5271 next
= (vm_page_t
) m
->pageq
.next
;
5272 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5274 count_discard_speculative
++;
5275 hibernate_discard_page(m
);
5281 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5282 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5284 next
= (vm_page_t
) m
->pageq
.next
;
5285 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5288 count_discard_purgeable
++;
5290 count_discard_inactive
++;
5291 hibernate_discard_page(m
);
5296 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5297 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5299 next
= (vm_page_t
) m
->pageq
.next
;
5300 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5303 count_discard_purgeable
++;
5305 count_discard_active
++;
5306 hibernate_discard_page(m
);
5311 clock_get_uptime(&end
);
5312 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5313 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5315 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
);
5318 #endif /* HIBERNATION */
5320 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5322 #include <mach_vm_debug.h>
5325 #include <mach_debug/hash_info.h>
5326 #include <vm/vm_debug.h>
5329 * Routine: vm_page_info
5331 * Return information about the global VP table.
5332 * Fills the buffer with as much information as possible
5333 * and returns the desired size of the buffer.
5335 * Nothing locked. The caller should provide
5336 * possibly-pageable memory.
5341 hash_info_bucket_t
*info
,
5345 lck_spin_t
*bucket_lock
;
5347 if (vm_page_bucket_count
< count
)
5348 count
= vm_page_bucket_count
;
5350 for (i
= 0; i
< count
; i
++) {
5351 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
5352 unsigned int bucket_count
= 0;
5355 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
5356 lck_spin_lock(bucket_lock
);
5358 for (m
= bucket
->pages
; m
!= VM_PAGE_NULL
; m
= m
->next
)
5361 lck_spin_unlock(bucket_lock
);
5363 /* don't touch pageable memory while holding locks */
5364 info
[i
].hib_count
= bucket_count
;
5367 return vm_page_bucket_count
;
5369 #endif /* MACH_VM_DEBUG */
5371 #include <mach_kdb.h>
5374 #include <ddb/db_output.h>
5375 #include <vm/vm_print.h>
5376 #define printf kdbprintf
5379 * Routine: vm_page_print [exported]
5387 p
= (vm_page_t
) (long) db_addr
;
5389 iprintf("page 0x%x\n", p
);
5393 iprintf("object=0x%x", p
->object
);
5394 printf(", offset=0x%x", p
->offset
);
5395 printf(", wire_count=%d", p
->wire_count
);
5397 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5398 (p
->local
? "" : "!"),
5399 (p
->inactive
? "" : "!"),
5400 (p
->active
? "" : "!"),
5401 (p
->throttled
? "" : "!"),
5402 (p
->gobbled
? "" : "!"),
5403 (p
->laundry
? "" : "!"),
5404 (p
->free
? "" : "!"),
5405 (p
->reference
? "" : "!"),
5406 (p
->encrypted
? "" : "!"));
5407 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5408 (p
->busy
? "" : "!"),
5409 (p
->wanted
? "" : "!"),
5410 (p
->tabled
? "" : "!"),
5411 (p
->fictitious
? "" : "!"),
5412 (p
->private ? "" : "!"),
5413 (p
->precious
? "" : "!"));
5414 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5415 (p
->absent
? "" : "!"),
5416 (p
->error
? "" : "!"),
5417 (p
->dirty
? "" : "!"),
5418 (p
->cleaning
? "" : "!"),
5419 (p
->pageout
? "" : "!"),
5420 (p
->clustered
? "" : "!"));
5421 iprintf("%soverwriting, %srestart, %sunusual\n",
5422 (p
->overwriting
? "" : "!"),
5423 (p
->restart
? "" : "!"),
5424 (p
->unusual
? "" : "!"));
5426 iprintf("phys_page=0x%x", p
->phys_page
);
5430 #endif /* MACH_KDB */