2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
88 #include <pexpert/pexpert.h>
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
94 #include <IOKit/IOHibernatePrivate.h>
97 #include <sys/kern_memorystatus.h>
99 #include <sys/kdebug.h>
101 boolean_t vm_page_free_verify
= TRUE
;
103 uint32_t vm_lopage_free_count
= 0;
104 uint32_t vm_lopage_free_limit
= 0;
105 uint32_t vm_lopage_lowater
= 0;
106 boolean_t vm_lopage_refill
= FALSE
;
107 boolean_t vm_lopage_needed
= FALSE
;
109 lck_mtx_ext_t vm_page_queue_lock_ext
;
110 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
111 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
113 int speculative_age_index
= 0;
114 int speculative_steal_index
= 0;
115 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
118 __private_extern__
void vm_page_init_lck_grp(void);
120 static void vm_page_free_prepare(vm_page_t page
);
121 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
127 * Associated with page of user-allocatable memory is a
132 * These variables record the values returned by vm_page_bootstrap,
133 * for debugging purposes. The implementation of pmap_steal_memory
134 * and pmap_startup here also uses them internally.
137 vm_offset_t virtual_space_start
;
138 vm_offset_t virtual_space_end
;
142 * The vm_page_lookup() routine, which provides for fast
143 * (virtual memory object, offset) to page lookup, employs
144 * the following hash table. The vm_page_{insert,remove}
145 * routines install and remove associations in the table.
146 * [This table is often called the virtual-to-physical,
151 #if MACH_PAGE_HASH_STATS
152 int cur_count
; /* current count */
153 int hi_count
; /* high water mark */
154 #endif /* MACH_PAGE_HASH_STATS */
158 #define BUCKETS_PER_LOCK 16
160 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
161 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
162 unsigned int vm_page_hash_mask
; /* Mask for hash function */
163 unsigned int vm_page_hash_shift
; /* Shift for hash function */
164 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
165 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
167 lck_spin_t
*vm_page_bucket_locks
;
170 #if MACH_PAGE_HASH_STATS
171 /* This routine is only for debug. It is intended to be called by
172 * hand by a developer using a kernel debugger. This routine prints
173 * out vm_page_hash table statistics to the kernel debug console.
183 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
184 if (vm_page_buckets
[i
].hi_count
) {
186 highsum
+= vm_page_buckets
[i
].hi_count
;
187 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
188 maxdepth
= vm_page_buckets
[i
].hi_count
;
191 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
192 printf("Number used buckets: %d = %d%%\n",
193 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
194 printf("Number unused buckets: %d = %d%%\n",
195 vm_page_bucket_count
- numbuckets
,
196 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
197 printf("Sum of bucket max depth: %d\n", highsum
);
198 printf("Average bucket depth: %d.%2d\n",
199 highsum
/vm_page_bucket_count
,
200 highsum%vm_page_bucket_count
);
201 printf("Maximum bucket depth: %d\n", maxdepth
);
203 #endif /* MACH_PAGE_HASH_STATS */
206 * The virtual page size is currently implemented as a runtime
207 * variable, but is constant once initialized using vm_set_page_size.
208 * This initialization must be done in the machine-dependent
209 * bootstrap sequence, before calling other machine-independent
212 * All references to the virtual page size outside this
213 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
216 vm_size_t page_size
= PAGE_SIZE
;
217 vm_size_t page_mask
= PAGE_MASK
;
218 int page_shift
= PAGE_SHIFT
;
221 * Resident page structures are initialized from
222 * a template (see vm_page_alloc).
224 * When adding a new field to the virtual memory
225 * object structure, be sure to add initialization
226 * (see vm_page_bootstrap).
228 struct vm_page vm_page_template
;
230 vm_page_t vm_pages
= VM_PAGE_NULL
;
231 unsigned int vm_pages_count
= 0;
232 ppnum_t vm_page_lowest
= 0;
235 * Resident pages that represent real memory
236 * are allocated from a set of free lists,
239 unsigned int vm_colors
;
240 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
241 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
242 queue_head_t vm_page_queue_free
[MAX_COLORS
];
243 unsigned int vm_page_free_wanted
;
244 unsigned int vm_page_free_wanted_privileged
;
245 unsigned int vm_page_free_count
;
246 unsigned int vm_page_fictitious_count
;
248 unsigned int vm_page_free_count_minimum
; /* debugging */
251 * Occasionally, the virtual memory system uses
252 * resident page structures that do not refer to
253 * real pages, for example to leave a page with
254 * important state information in the VP table.
256 * These page structures are allocated the way
257 * most other kernel structures are.
260 vm_locks_array_t vm_page_locks
;
261 decl_lck_mtx_data(,vm_page_alloc_lock
)
262 unsigned int io_throttle_zero_fill
;
264 unsigned int vm_page_local_q_count
= 0;
265 unsigned int vm_page_local_q_soft_limit
= 250;
266 unsigned int vm_page_local_q_hard_limit
= 500;
267 struct vplq
*vm_page_local_q
= NULL
;
270 * Fictitious pages don't have a physical address,
271 * but we must initialize phys_page to something.
272 * For debugging, this should be a strange value
273 * that the pmap module can recognize in assertions.
275 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
278 * Guard pages are not accessible so they don't
279 * need a physical address, but we need to enter
281 * Let's make it recognizable and make sure that
282 * we don't use a real physical page with that
285 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
288 * Resident page structures are also chained on
289 * queues that are used by the page replacement
290 * system (pageout daemon). These queues are
291 * defined here, but are shared by the pageout
292 * module. The inactive queue is broken into
293 * inactive and zf for convenience as the
294 * pageout daemon often assignes a higher
295 * affinity to zf pages
297 queue_head_t vm_page_queue_active
;
298 queue_head_t vm_page_queue_inactive
;
299 queue_head_t vm_page_queue_zf
; /* inactive memory queue for zero fill */
300 queue_head_t vm_page_queue_throttled
;
302 unsigned int vm_page_active_count
;
303 unsigned int vm_page_inactive_count
;
304 unsigned int vm_page_throttled_count
;
305 unsigned int vm_page_speculative_count
;
306 unsigned int vm_page_wire_count
;
307 unsigned int vm_page_wire_count_initial
;
308 unsigned int vm_page_gobble_count
= 0;
309 unsigned int vm_page_wire_count_warning
= 0;
310 unsigned int vm_page_gobble_count_warning
= 0;
312 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
313 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
314 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
316 #if DEVELOPMENT || DEBUG
317 unsigned int vm_page_speculative_recreated
= 0;
318 unsigned int vm_page_speculative_created
= 0;
319 unsigned int vm_page_speculative_used
= 0;
322 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
323 ppnum_t max_valid_low_ppnum
= 0xffffffff;
327 * Several page replacement parameters are also
328 * shared with this module, so that page allocation
329 * (done here in vm_page_alloc) can trigger the
332 unsigned int vm_page_free_target
= 0;
333 unsigned int vm_page_free_min
= 0;
334 unsigned int vm_page_throttle_limit
= 0;
335 uint32_t vm_page_creation_throttle
= 0;
336 unsigned int vm_page_inactive_target
= 0;
337 unsigned int vm_page_inactive_min
= 0;
338 unsigned int vm_page_free_reserved
= 0;
339 unsigned int vm_page_throttle_count
= 0;
342 * The VM system has a couple of heuristics for deciding
343 * that pages are "uninteresting" and should be placed
344 * on the inactive queue as likely candidates for replacement.
345 * These variables let the heuristics be controlled at run-time
346 * to make experimentation easier.
349 boolean_t vm_page_deactivate_hint
= TRUE
;
351 struct vm_page_stats_reusable vm_page_stats_reusable
;
356 * Sets the page size, perhaps based upon the memory
357 * size. Must be called before any use of page-size
358 * dependent functions.
360 * Sets page_shift and page_mask from page_size.
363 vm_set_page_size(void)
365 page_mask
= page_size
- 1;
367 if ((page_mask
& page_size
) != 0)
368 panic("vm_set_page_size: page size not a power of two");
370 for (page_shift
= 0; ; page_shift
++)
371 if ((1U << page_shift
) == page_size
)
376 /* Called once during statup, once the cache geometry is known.
379 vm_page_set_colors( void )
381 unsigned int n
, override
;
383 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
385 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
386 n
= vm_cache_geometry_colors
;
387 else n
= DEFAULT_COLORS
; /* use default if all else fails */
391 if ( n
> MAX_COLORS
)
394 /* the count must be a power of 2 */
395 if ( ( n
& (n
- 1)) != 0 )
396 panic("vm_page_set_colors");
399 vm_color_mask
= n
- 1;
403 lck_grp_t vm_page_lck_grp_free
;
404 lck_grp_t vm_page_lck_grp_queue
;
405 lck_grp_t vm_page_lck_grp_local
;
406 lck_grp_t vm_page_lck_grp_purge
;
407 lck_grp_t vm_page_lck_grp_alloc
;
408 lck_grp_t vm_page_lck_grp_bucket
;
409 lck_grp_attr_t vm_page_lck_grp_attr
;
410 lck_attr_t vm_page_lck_attr
;
413 __private_extern__
void
414 vm_page_init_lck_grp(void)
417 * initialze the vm_page lock world
419 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
420 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
421 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
422 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
423 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
424 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
425 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
426 lck_attr_setdefault(&vm_page_lck_attr
);
430 vm_page_init_local_q()
432 unsigned int num_cpus
;
434 struct vplq
*t_local_q
;
436 num_cpus
= ml_get_max_cpus();
439 * no point in this for a uni-processor system
442 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
444 for (i
= 0; i
< num_cpus
; i
++) {
447 lq
= &t_local_q
[i
].vpl_un
.vpl
;
448 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
449 queue_init(&lq
->vpl_queue
);
452 vm_page_local_q_count
= num_cpus
;
454 vm_page_local_q
= (struct vplq
*)t_local_q
;
462 * Initializes the resident memory module.
464 * Allocates memory for the page cells, and
465 * for the object/offset-to-page hash table headers.
466 * Each page cell is initialized and placed on the free list.
467 * Returns the range of available kernel virtual memory.
475 register vm_page_t m
;
482 * Initialize the vm_page template.
485 m
= &vm_page_template
;
486 bzero(m
, sizeof (*m
));
488 m
->pageq
.next
= NULL
;
489 m
->pageq
.prev
= NULL
;
490 m
->listq
.next
= NULL
;
491 m
->listq
.prev
= NULL
;
492 m
->next
= VM_PAGE_NULL
;
494 m
->object
= VM_OBJECT_NULL
; /* reset later */
495 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
501 m
->pageout_queue
= FALSE
;
502 m
->speculative
= FALSE
;
505 m
->reference
= FALSE
;
508 m
->throttled
= FALSE
;
509 m
->__unused_pageq_bits
= 0;
511 m
->phys_page
= 0; /* reset later */
516 m
->fictitious
= FALSE
;
525 m
->clustered
= FALSE
;
526 m
->overwriting
= FALSE
;
529 m
->encrypted
= FALSE
;
530 m
->encrypted_cleaning
= FALSE
;
531 m
->list_req_pending
= FALSE
;
532 m
->dump_cleaning
= FALSE
;
533 m
->cs_validated
= FALSE
;
534 m
->cs_tainted
= FALSE
;
536 m
->zero_fill
= FALSE
;
539 m
->__unused_object_bits
= 0;
543 * Initialize the page queues.
545 vm_page_init_lck_grp();
547 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
548 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
549 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
551 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
554 purgeable_queues
[i
].token_q_head
= 0;
555 purgeable_queues
[i
].token_q_tail
= 0;
556 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
557 queue_init(&purgeable_queues
[i
].objq
[group
]);
559 purgeable_queues
[i
].type
= i
;
560 purgeable_queues
[i
].new_pages
= 0;
562 purgeable_queues
[i
].debug_count_tokens
= 0;
563 purgeable_queues
[i
].debug_count_objects
= 0;
567 for (i
= 0; i
< MAX_COLORS
; i
++ )
568 queue_init(&vm_page_queue_free
[i
]);
570 queue_init(&vm_lopage_queue_free
);
571 queue_init(&vm_page_queue_active
);
572 queue_init(&vm_page_queue_inactive
);
573 queue_init(&vm_page_queue_throttled
);
574 queue_init(&vm_page_queue_zf
);
576 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
577 queue_init(&vm_page_queue_speculative
[i
].age_q
);
579 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
580 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
582 vm_page_free_wanted
= 0;
583 vm_page_free_wanted_privileged
= 0;
585 vm_page_set_colors();
589 * Steal memory for the map and zone subsystems.
592 vm_map_steal_memory();
596 * Allocate (and initialize) the virtual-to-physical
597 * table hash buckets.
599 * The number of buckets should be a power of two to
600 * get a good hash function. The following computation
601 * chooses the first power of two that is greater
602 * than the number of physical pages in the system.
605 if (vm_page_bucket_count
== 0) {
606 unsigned int npages
= pmap_free_pages();
608 vm_page_bucket_count
= 1;
609 while (vm_page_bucket_count
< npages
)
610 vm_page_bucket_count
<<= 1;
612 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
614 vm_page_hash_mask
= vm_page_bucket_count
- 1;
617 * Calculate object shift value for hashing algorithm:
618 * O = log2(sizeof(struct vm_object))
619 * B = log2(vm_page_bucket_count)
620 * hash shifts the object left by
623 size
= vm_page_bucket_count
;
624 for (log1
= 0; size
> 1; log1
++)
626 size
= sizeof(struct vm_object
);
627 for (log2
= 0; size
> 1; log2
++)
629 vm_page_hash_shift
= log1
/2 - log2
+ 1;
631 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
632 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
633 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
635 if (vm_page_hash_mask
& vm_page_bucket_count
)
636 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
638 vm_page_buckets
= (vm_page_bucket_t
*)
639 pmap_steal_memory(vm_page_bucket_count
*
640 sizeof(vm_page_bucket_t
));
642 vm_page_bucket_locks
= (lck_spin_t
*)
643 pmap_steal_memory(vm_page_bucket_lock_count
*
646 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
647 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
649 bucket
->pages
= VM_PAGE_NULL
;
650 #if MACH_PAGE_HASH_STATS
651 bucket
->cur_count
= 0;
652 bucket
->hi_count
= 0;
653 #endif /* MACH_PAGE_HASH_STATS */
656 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
657 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
660 * Machine-dependent code allocates the resident page table.
661 * It uses vm_page_init to initialize the page frames.
662 * The code also returns to us the virtual space available
663 * to the kernel. We don't trust the pmap module
664 * to get the alignment right.
667 pmap_startup(&virtual_space_start
, &virtual_space_end
);
668 virtual_space_start
= round_page(virtual_space_start
);
669 virtual_space_end
= trunc_page(virtual_space_end
);
671 *startp
= virtual_space_start
;
672 *endp
= virtual_space_end
;
675 * Compute the initial "wire" count.
676 * Up until now, the pages which have been set aside are not under
677 * the VM system's control, so although they aren't explicitly
678 * wired, they nonetheless can't be moved. At this moment,
679 * all VM managed pages are "free", courtesy of pmap_startup.
681 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
682 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
683 vm_page_wire_count_initial
= vm_page_wire_count
;
684 vm_page_free_count_minimum
= vm_page_free_count
;
686 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
687 vm_page_free_count
, vm_page_wire_count
);
689 simple_lock_init(&vm_paging_lock
, 0);
692 #ifndef MACHINE_PAGES
694 * We implement pmap_steal_memory and pmap_startup with the help
695 * of two simpler functions, pmap_virtual_space and pmap_next_page.
702 vm_offset_t addr
, vaddr
;
706 * We round the size to a round multiple.
709 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
712 * If this is the first call to pmap_steal_memory,
713 * we have to initialize ourself.
716 if (virtual_space_start
== virtual_space_end
) {
717 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
720 * The initial values must be aligned properly, and
721 * we don't trust the pmap module to do it right.
724 virtual_space_start
= round_page(virtual_space_start
);
725 virtual_space_end
= trunc_page(virtual_space_end
);
729 * Allocate virtual memory for this request.
732 addr
= virtual_space_start
;
733 virtual_space_start
+= size
;
735 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
738 * Allocate and map physical pages to back new virtual pages.
741 for (vaddr
= round_page(addr
);
743 vaddr
+= PAGE_SIZE
) {
745 if (!pmap_next_page_hi(&phys_page
))
746 panic("pmap_steal_memory");
749 * XXX Logically, these mappings should be wired,
750 * but some pmap modules barf if they are.
752 #if defined(__LP64__)
753 pmap_pre_expand(kernel_pmap
, vaddr
);
756 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
757 VM_PROT_READ
|VM_PROT_WRITE
,
758 VM_WIMG_USE_DEFAULT
, FALSE
);
760 * Account for newly stolen memory
762 vm_page_wire_count
++;
766 return (void *) addr
;
774 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
779 * We calculate how many page frames we will have
780 * and then allocate the page structures in one chunk.
783 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
784 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
785 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
787 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
790 * Initialize the page frames.
792 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
793 if (!pmap_next_page(&phys_page
))
795 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
796 vm_page_lowest
= phys_page
;
798 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
802 vm_pages_count
= pages_initialized
;
805 * Check if we want to initialize pages to a known value
807 fill
= 0; /* Assume no fill */
808 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
810 // -debug code remove
811 if (2 == vm_himemory_mode
) {
812 // free low -> high so high is preferred
813 for (i
= 1; i
<= pages_initialized
; i
++) {
814 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
815 vm_page_release(&vm_pages
[i
- 1]);
819 // debug code remove-
822 * Release pages in reverse order so that physical pages
823 * initially get allocated in ascending addresses. This keeps
824 * the devices (which must address physical memory) happy if
825 * they require several consecutive pages.
827 for (i
= pages_initialized
; i
> 0; i
--) {
828 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
829 vm_page_release(&vm_pages
[i
- 1]);
834 vm_page_t xx
, xxo
, xxl
;
837 j
= 0; /* (BRINGUP) */
840 for( i
= 0; i
< vm_colors
; i
++ ) {
841 queue_iterate(&vm_page_queue_free
[i
],
844 pageq
) { /* BRINGUP */
846 if(j
> vm_page_free_count
) { /* (BRINGUP) */
847 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
850 l
= vm_page_free_count
- j
; /* (BRINGUP) */
851 k
= 0; /* (BRINGUP) */
853 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
855 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
857 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
858 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
859 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
867 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
868 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
875 * We have to re-align virtual_space_start,
876 * because pmap_steal_memory has been using it.
879 virtual_space_start
= round_page(virtual_space_start
);
881 *startp
= virtual_space_start
;
882 *endp
= virtual_space_end
;
884 #endif /* MACHINE_PAGES */
887 * Routine: vm_page_module_init
889 * Second initialization pass, to be done after
890 * the basic VM system is ready.
893 vm_page_module_init(void)
895 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
896 0, PAGE_SIZE
, "vm pages");
899 zone_debug_disable(vm_page_zone
);
900 #endif /* ZONE_DEBUG */
902 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
903 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
904 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
905 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
908 * Adjust zone statistics to account for the real pages allocated
909 * in vm_page_create(). [Q: is this really what we want?]
911 vm_page_zone
->count
+= vm_page_pages
;
912 vm_page_zone
->sum_count
+= vm_page_pages
;
913 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
915 lck_mtx_init(&vm_page_alloc_lock
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
919 * Routine: vm_page_create
921 * After the VM system is up, machine-dependent code
922 * may stumble across more physical memory. For example,
923 * memory that it was reserving for a frame buffer.
924 * vm_page_create turns this memory into available pages.
935 for (phys_page
= start
;
938 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
940 vm_page_more_fictitious();
942 m
->fictitious
= FALSE
;
943 pmap_clear_noencrypt(phys_page
);
953 * Distributes the object/offset key pair among hash buckets.
955 * NOTE: The bucket count must be a power of 2
957 #define vm_page_hash(object, offset) (\
958 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
963 * vm_page_insert: [ internal use only ]
965 * Inserts the given mem entry into the object/object-page
966 * table and object list.
968 * The object must be locked.
974 vm_object_offset_t offset
)
976 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
);
980 vm_page_insert_internal(
983 vm_object_offset_t offset
,
984 boolean_t queues_lock_held
,
985 boolean_t insert_in_hash
)
987 vm_page_bucket_t
*bucket
;
988 lck_spin_t
*bucket_lock
;
992 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
993 object
, offset
, mem
, 0,0);
997 if (object
== vm_submap_object
) {
998 /* the vm_submap_object is only a placeholder for submaps */
999 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset
);
1002 vm_object_lock_assert_exclusive(object
);
1004 lck_mtx_assert(&vm_page_queue_lock
,
1005 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1006 : LCK_MTX_ASSERT_NOTOWNED
);
1009 if (insert_in_hash
== TRUE
) {
1011 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1012 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1013 "already in (obj=%p,off=0x%llx)",
1014 mem
, object
, offset
, mem
->object
, mem
->offset
);
1016 assert(!object
->internal
|| offset
< object
->vo_size
);
1018 /* only insert "pageout" pages into "pageout" objects,
1019 * and normal pages into normal objects */
1020 assert(object
->pageout
== mem
->pageout
);
1022 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1025 * Record the object/offset pair in this page
1028 mem
->object
= object
;
1029 mem
->offset
= offset
;
1032 * Insert it into the object_object/offset hash table
1034 hash_id
= vm_page_hash(object
, offset
);
1035 bucket
= &vm_page_buckets
[hash_id
];
1036 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1038 lck_spin_lock(bucket_lock
);
1040 mem
->next
= bucket
->pages
;
1041 bucket
->pages
= mem
;
1042 #if MACH_PAGE_HASH_STATS
1043 if (++bucket
->cur_count
> bucket
->hi_count
)
1044 bucket
->hi_count
= bucket
->cur_count
;
1045 #endif /* MACH_PAGE_HASH_STATS */
1047 lck_spin_unlock(bucket_lock
);
1050 { unsigned int cache_attr
;
1052 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1054 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1055 pmap_set_cache_attributes(mem
->phys_page
, cache_attr
);
1056 object
->set_cache_attr
= TRUE
;
1060 * Now link into the object's list of backed pages.
1063 VM_PAGE_INSERT(mem
, object
);
1067 * Show that the object has one more resident page.
1070 object
->resident_page_count
++;
1071 if (VM_PAGE_WIRED(mem
)) {
1072 object
->wired_page_count
++;
1074 assert(object
->resident_page_count
>= object
->wired_page_count
);
1076 assert(!mem
->reusable
);
1078 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1079 if (VM_PAGE_WIRED(mem
)) {
1080 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
1082 OSAddAtomic(1, &vm_page_purgeable_count
);
1084 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1087 * This page belongs to a purged VM object but hasn't
1088 * been purged (because it was "busy").
1089 * It's in the "throttled" queue and hence not
1090 * visible to vm_pageout_scan(). Move it to a pageable
1091 * queue, so that it can eventually be reclaimed, instead
1092 * of lingering in the "empty" object.
1094 if (queues_lock_held
== FALSE
)
1095 vm_page_lockspin_queues();
1096 vm_page_deactivate(mem
);
1097 if (queues_lock_held
== FALSE
)
1098 vm_page_unlock_queues();
1105 * Exactly like vm_page_insert, except that we first
1106 * remove any existing page at the given offset in object.
1108 * The object must be locked.
1112 register vm_page_t mem
,
1113 register vm_object_t object
,
1114 register vm_object_offset_t offset
)
1116 vm_page_bucket_t
*bucket
;
1117 vm_page_t found_m
= VM_PAGE_NULL
;
1118 lck_spin_t
*bucket_lock
;
1122 vm_object_lock_assert_exclusive(object
);
1124 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1125 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1126 "already in (obj=%p,off=0x%llx)",
1127 mem
, object
, offset
, mem
->object
, mem
->offset
);
1128 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1131 * Record the object/offset pair in this page
1134 mem
->object
= object
;
1135 mem
->offset
= offset
;
1138 * Insert it into the object_object/offset hash table,
1139 * replacing any page that might have been there.
1142 hash_id
= vm_page_hash(object
, offset
);
1143 bucket
= &vm_page_buckets
[hash_id
];
1144 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1146 lck_spin_lock(bucket_lock
);
1148 if (bucket
->pages
) {
1149 vm_page_t
*mp
= &bucket
->pages
;
1153 if (m
->object
== object
&& m
->offset
== offset
) {
1155 * Remove old page from hash list
1163 } while ((m
= *mp
));
1165 mem
->next
= bucket
->pages
;
1167 mem
->next
= VM_PAGE_NULL
;
1170 * insert new page at head of hash list
1172 bucket
->pages
= mem
;
1174 lck_spin_unlock(bucket_lock
);
1178 * there was already a page at the specified
1179 * offset for this object... remove it from
1180 * the object and free it back to the free list
1182 vm_page_free_unlocked(found_m
, FALSE
);
1184 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
);
1188 * vm_page_remove: [ internal use only ]
1190 * Removes the given mem entry from the object/offset-page
1191 * table and the object page list.
1193 * The object must be locked.
1199 boolean_t remove_from_hash
)
1201 vm_page_bucket_t
*bucket
;
1203 lck_spin_t
*bucket_lock
;
1207 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1208 mem
->object
, mem
->offset
,
1211 vm_object_lock_assert_exclusive(mem
->object
);
1212 assert(mem
->tabled
);
1213 assert(!mem
->cleaning
);
1216 if (remove_from_hash
== TRUE
) {
1218 * Remove from the object_object/offset hash table
1220 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1221 bucket
= &vm_page_buckets
[hash_id
];
1222 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1224 lck_spin_lock(bucket_lock
);
1226 if ((this = bucket
->pages
) == mem
) {
1227 /* optimize for common case */
1229 bucket
->pages
= mem
->next
;
1233 for (prev
= &this->next
;
1234 (this = *prev
) != mem
;
1239 #if MACH_PAGE_HASH_STATS
1240 bucket
->cur_count
--;
1241 #endif /* MACH_PAGE_HASH_STATS */
1243 lck_spin_unlock(bucket_lock
);
1246 * Now remove from the object's list of backed pages.
1249 VM_PAGE_REMOVE(mem
);
1252 * And show that the object has one fewer resident
1256 assert(mem
->object
->resident_page_count
> 0);
1257 mem
->object
->resident_page_count
--;
1259 if (!mem
->object
->internal
&& (mem
->object
->objq
.next
|| mem
->object
->objq
.prev
)) {
1260 if (mem
->object
->resident_page_count
== 0)
1261 vm_object_cache_remove(mem
->object
);
1264 if (VM_PAGE_WIRED(mem
)) {
1265 assert(mem
->object
->wired_page_count
> 0);
1266 mem
->object
->wired_page_count
--;
1268 assert(mem
->object
->resident_page_count
>=
1269 mem
->object
->wired_page_count
);
1270 if (mem
->reusable
) {
1271 assert(mem
->object
->reusable_page_count
> 0);
1272 mem
->object
->reusable_page_count
--;
1273 assert(mem
->object
->reusable_page_count
<=
1274 mem
->object
->resident_page_count
);
1275 mem
->reusable
= FALSE
;
1276 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1277 vm_page_stats_reusable
.reused_remove
++;
1278 } else if (mem
->object
->all_reusable
) {
1279 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1280 vm_page_stats_reusable
.reused_remove
++;
1283 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1284 if (VM_PAGE_WIRED(mem
)) {
1285 assert(vm_page_purgeable_wired_count
> 0);
1286 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1288 assert(vm_page_purgeable_count
> 0);
1289 OSAddAtomic(-1, &vm_page_purgeable_count
);
1292 if (mem
->object
->set_cache_attr
== TRUE
)
1293 pmap_set_cache_attributes(mem
->phys_page
, 0);
1295 mem
->tabled
= FALSE
;
1296 mem
->object
= VM_OBJECT_NULL
;
1297 mem
->offset
= (vm_object_offset_t
) -1;
1304 * Returns the page associated with the object/offset
1305 * pair specified; if none is found, VM_PAGE_NULL is returned.
1307 * The object must be locked. No side effects.
1310 unsigned long vm_page_lookup_hint
= 0;
1311 unsigned long vm_page_lookup_hint_next
= 0;
1312 unsigned long vm_page_lookup_hint_prev
= 0;
1313 unsigned long vm_page_lookup_hint_miss
= 0;
1314 unsigned long vm_page_lookup_bucket_NULL
= 0;
1315 unsigned long vm_page_lookup_miss
= 0;
1321 vm_object_offset_t offset
)
1324 vm_page_bucket_t
*bucket
;
1326 lck_spin_t
*bucket_lock
;
1329 vm_object_lock_assert_held(object
);
1330 mem
= object
->memq_hint
;
1332 if (mem
!= VM_PAGE_NULL
) {
1333 assert(mem
->object
== object
);
1335 if (mem
->offset
== offset
) {
1336 vm_page_lookup_hint
++;
1339 qe
= queue_next(&mem
->listq
);
1341 if (! queue_end(&object
->memq
, qe
)) {
1342 vm_page_t next_page
;
1344 next_page
= (vm_page_t
) qe
;
1345 assert(next_page
->object
== object
);
1347 if (next_page
->offset
== offset
) {
1348 vm_page_lookup_hint_next
++;
1349 object
->memq_hint
= next_page
; /* new hint */
1353 qe
= queue_prev(&mem
->listq
);
1355 if (! queue_end(&object
->memq
, qe
)) {
1356 vm_page_t prev_page
;
1358 prev_page
= (vm_page_t
) qe
;
1359 assert(prev_page
->object
== object
);
1361 if (prev_page
->offset
== offset
) {
1362 vm_page_lookup_hint_prev
++;
1363 object
->memq_hint
= prev_page
; /* new hint */
1369 * Search the hash table for this object/offset pair
1371 hash_id
= vm_page_hash(object
, offset
);
1372 bucket
= &vm_page_buckets
[hash_id
];
1375 * since we hold the object lock, we are guaranteed that no
1376 * new pages can be inserted into this object... this in turn
1377 * guarantess that the page we're looking for can't exist
1378 * if the bucket it hashes to is currently NULL even when looked
1379 * at outside the scope of the hash bucket lock... this is a
1380 * really cheap optimiztion to avoid taking the lock
1382 if (bucket
->pages
== VM_PAGE_NULL
) {
1383 vm_page_lookup_bucket_NULL
++;
1385 return (VM_PAGE_NULL
);
1387 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1389 lck_spin_lock(bucket_lock
);
1391 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
1393 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1396 lck_spin_unlock(bucket_lock
);
1398 if (mem
!= VM_PAGE_NULL
) {
1399 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1400 vm_page_lookup_hint_miss
++;
1402 assert(mem
->object
== object
);
1403 object
->memq_hint
= mem
;
1405 vm_page_lookup_miss
++;
1414 * Move the given memory entry from its
1415 * current object to the specified target object/offset.
1417 * The object must be locked.
1421 register vm_page_t mem
,
1422 register vm_object_t new_object
,
1423 vm_object_offset_t new_offset
,
1424 boolean_t encrypted_ok
)
1426 assert(mem
->object
!= new_object
);
1430 * The encryption key is based on the page's memory object
1431 * (aka "pager") and paging offset. Moving the page to
1432 * another VM object changes its "pager" and "paging_offset"
1433 * so it has to be decrypted first, or we would lose the key.
1435 * One exception is VM object collapsing, where we transfer pages
1436 * from one backing object to its parent object. This operation also
1437 * transfers the paging information, so the <pager,paging_offset> info
1438 * should remain consistent. The caller (vm_object_do_collapse())
1439 * sets "encrypted_ok" in this case.
1441 if (!encrypted_ok
&& mem
->encrypted
) {
1442 panic("vm_page_rename: page %p is encrypted\n", mem
);
1446 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1447 new_object
, new_offset
,
1451 * Changes to mem->object require the page lock because
1452 * the pageout daemon uses that lock to get the object.
1454 vm_page_lockspin_queues();
1456 vm_page_remove(mem
, TRUE
);
1457 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
);
1459 vm_page_unlock_queues();
1465 * Initialize the fields in a new page.
1466 * This takes a structure with random values and initializes it
1467 * so that it can be given to vm_page_release or vm_page_insert.
1476 *mem
= vm_page_template
;
1477 mem
->phys_page
= phys_page
;
1480 * we're leaving this turned off for now... currently pages
1481 * come off the free list and are either immediately dirtied/referenced
1482 * due to zero-fill or COW faults, or are used to read or write files...
1483 * in the file I/O case, the UPL mechanism takes care of clearing
1484 * the state of the HW ref/mod bits in a somewhat fragile way.
1485 * Since we may change the way this works in the future (to toughen it up),
1486 * I'm leaving this as a reminder of where these bits could get cleared
1490 * make sure both the h/w referenced and modified bits are
1491 * clear at this point... we are especially dependent on
1492 * not finding a 'stale' h/w modified in a number of spots
1493 * once this page goes back into use
1495 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
1497 mem
->lopage
= lopage
;
1501 * vm_page_grab_fictitious:
1503 * Remove a fictitious page from the free list.
1504 * Returns VM_PAGE_NULL if there are no free pages.
1506 int c_vm_page_grab_fictitious
= 0;
1507 int c_vm_page_grab_fictitious_failed
= 0;
1508 int c_vm_page_release_fictitious
= 0;
1509 int c_vm_page_more_fictitious
= 0;
1512 vm_page_grab_fictitious_common(
1517 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
1519 vm_page_init(m
, phys_addr
, FALSE
);
1520 m
->fictitious
= TRUE
;
1522 c_vm_page_grab_fictitious
++;
1524 c_vm_page_grab_fictitious_failed
++;
1530 vm_page_grab_fictitious(void)
1532 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1536 vm_page_grab_guard(void)
1538 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1543 * vm_page_release_fictitious:
1545 * Release a fictitious page to the zone pool
1548 vm_page_release_fictitious(
1552 assert(m
->fictitious
);
1553 assert(m
->phys_page
== vm_page_fictitious_addr
||
1554 m
->phys_page
== vm_page_guard_addr
);
1556 c_vm_page_release_fictitious
++;
1558 zfree(vm_page_zone
, m
);
1562 * vm_page_more_fictitious:
1564 * Add more fictitious pages to the zone.
1565 * Allowed to block. This routine is way intimate
1566 * with the zones code, for several reasons:
1567 * 1. we need to carve some page structures out of physical
1568 * memory before zones work, so they _cannot_ come from
1570 * 2. the zone needs to be collectable in order to prevent
1571 * growth without bound. These structures are used by
1572 * the device pager (by the hundreds and thousands), as
1573 * private pages for pageout, and as blocking pages for
1574 * pagein. Temporary bursts in demand should not result in
1575 * permanent allocation of a resource.
1576 * 3. To smooth allocation humps, we allocate single pages
1577 * with kernel_memory_allocate(), and cram them into the
1581 void vm_page_more_fictitious(void)
1584 kern_return_t retval
;
1586 c_vm_page_more_fictitious
++;
1589 * Allocate a single page from the zone_map. Do not wait if no physical
1590 * pages are immediately available, and do not zero the space. We need
1591 * our own blocking lock here to prevent having multiple,
1592 * simultaneous requests from piling up on the zone_map lock. Exactly
1593 * one (of our) threads should be potentially waiting on the map lock.
1594 * If winner is not vm-privileged, then the page allocation will fail,
1595 * and it will temporarily block here in the vm_page_wait().
1597 lck_mtx_lock(&vm_page_alloc_lock
);
1599 * If another thread allocated space, just bail out now.
1601 if (zone_free_count(vm_page_zone
) > 5) {
1603 * The number "5" is a small number that is larger than the
1604 * number of fictitious pages that any single caller will
1605 * attempt to allocate. Otherwise, a thread will attempt to
1606 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1607 * release all of the resources and locks already acquired,
1608 * and then call this routine. This routine finds the pages
1609 * that the caller released, so fails to allocate new space.
1610 * The process repeats infinitely. The largest known number
1611 * of fictitious pages required in this manner is 2. 5 is
1612 * simply a somewhat larger number.
1614 lck_mtx_unlock(&vm_page_alloc_lock
);
1618 retval
= kernel_memory_allocate(zone_map
,
1619 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1620 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1621 if (retval
!= KERN_SUCCESS
) {
1623 * No page was available. Drop the
1624 * lock to give another thread a chance at it, and
1625 * wait for the pageout daemon to make progress.
1627 lck_mtx_unlock(&vm_page_alloc_lock
);
1628 vm_page_wait(THREAD_UNINT
);
1631 zcram(vm_page_zone
, (void *) addr
, PAGE_SIZE
);
1633 lck_mtx_unlock(&vm_page_alloc_lock
);
1640 * Return true if it is not likely that a non-vm_privileged thread
1641 * can get memory without blocking. Advisory only, since the
1642 * situation may change under us.
1647 /* No locking, at worst we will fib. */
1648 return( vm_page_free_count
<= vm_page_free_reserved
);
1654 * this is an interface to support bring-up of drivers
1655 * on platforms with physical memory > 4G...
1657 int vm_himemory_mode
= 0;
1661 * this interface exists to support hardware controllers
1662 * incapable of generating DMAs with more than 32 bits
1663 * of address on platforms with physical memory > 4G...
1665 unsigned int vm_lopages_allocated_q
= 0;
1666 unsigned int vm_lopages_allocated_cpm_success
= 0;
1667 unsigned int vm_lopages_allocated_cpm_failed
= 0;
1668 queue_head_t vm_lopage_queue_free
;
1671 vm_page_grablo(void)
1675 if (vm_lopage_needed
== FALSE
)
1676 return (vm_page_grab());
1678 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1680 if ( !queue_empty(&vm_lopage_queue_free
)) {
1681 queue_remove_first(&vm_lopage_queue_free
,
1685 assert(vm_lopage_free_count
);
1687 vm_lopage_free_count
--;
1688 vm_lopages_allocated_q
++;
1690 if (vm_lopage_free_count
< vm_lopage_lowater
)
1691 vm_lopage_refill
= TRUE
;
1693 lck_mtx_unlock(&vm_page_queue_free_lock
);
1695 lck_mtx_unlock(&vm_page_queue_free_lock
);
1697 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
1699 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1700 vm_lopages_allocated_cpm_failed
++;
1701 lck_mtx_unlock(&vm_page_queue_free_lock
);
1703 return (VM_PAGE_NULL
);
1707 vm_page_lockspin_queues();
1709 mem
->gobbled
= FALSE
;
1710 vm_page_gobble_count
--;
1711 vm_page_wire_count
--;
1713 vm_lopages_allocated_cpm_success
++;
1714 vm_page_unlock_queues();
1718 assert(!mem
->pmapped
);
1719 assert(!mem
->wpmapped
);
1721 mem
->pageq
.next
= NULL
;
1722 mem
->pageq
.prev
= NULL
;
1731 * first try to grab a page from the per-cpu free list...
1732 * this must be done while pre-emption is disabled... if
1733 * a page is available, we're done...
1734 * if no page is available, grab the vm_page_queue_free_lock
1735 * and see if current number of free pages would allow us
1736 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1737 * if there are pages available, disable preemption and
1738 * recheck the state of the per-cpu free list... we could
1739 * have been preempted and moved to a different cpu, or
1740 * some other thread could have re-filled it... if still
1741 * empty, figure out how many pages we can steal from the
1742 * global free queue and move to the per-cpu queue...
1743 * return 1 of these pages when done... only wakeup the
1744 * pageout_scan thread if we moved pages from the global
1745 * list... no need for the wakeup if we've satisfied the
1746 * request from the per-cpu queue.
1749 #define COLOR_GROUPS_TO_STEAL 4
1753 vm_page_grab( void )
1758 disable_preemption();
1760 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1761 return_page_from_cpu_list
:
1762 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1763 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
1764 mem
->pageq
.next
= NULL
;
1766 enable_preemption();
1768 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1769 assert(mem
->tabled
== FALSE
);
1770 assert(mem
->object
== VM_OBJECT_NULL
);
1771 assert(!mem
->laundry
);
1773 assert(pmap_verify_free(mem
->phys_page
));
1775 assert(!mem
->encrypted
);
1776 assert(!mem
->pmapped
);
1777 assert(!mem
->wpmapped
);
1778 assert(!mem
->active
);
1779 assert(!mem
->inactive
);
1780 assert(!mem
->throttled
);
1781 assert(!mem
->speculative
);
1785 enable_preemption();
1789 * Optionally produce warnings if the wire or gobble
1790 * counts exceed some threshold.
1792 if (vm_page_wire_count_warning
> 0
1793 && vm_page_wire_count
>= vm_page_wire_count_warning
) {
1794 printf("mk: vm_page_grab(): high wired page count of %d\n",
1795 vm_page_wire_count
);
1796 assert(vm_page_wire_count
< vm_page_wire_count_warning
);
1798 if (vm_page_gobble_count_warning
> 0
1799 && vm_page_gobble_count
>= vm_page_gobble_count_warning
) {
1800 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1801 vm_page_gobble_count
);
1802 assert(vm_page_gobble_count
< vm_page_gobble_count_warning
);
1805 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1808 * Only let privileged threads (involved in pageout)
1809 * dip into the reserved pool.
1811 if ((vm_page_free_count
< vm_page_free_reserved
) &&
1812 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
1813 lck_mtx_unlock(&vm_page_queue_free_lock
);
1819 unsigned int pages_to_steal
;
1822 while ( vm_page_free_count
== 0 ) {
1824 lck_mtx_unlock(&vm_page_queue_free_lock
);
1826 * must be a privileged thread to be
1827 * in this state since a non-privileged
1828 * thread would have bailed if we were
1829 * under the vm_page_free_reserved mark
1832 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1835 disable_preemption();
1837 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1838 lck_mtx_unlock(&vm_page_queue_free_lock
);
1841 * we got preempted and moved to another processor
1842 * or we got preempted and someone else ran and filled the cache
1844 goto return_page_from_cpu_list
;
1846 if (vm_page_free_count
<= vm_page_free_reserved
)
1849 pages_to_steal
= COLOR_GROUPS_TO_STEAL
* vm_colors
;
1851 if (pages_to_steal
> (vm_page_free_count
- vm_page_free_reserved
))
1852 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
1854 color
= PROCESSOR_DATA(current_processor(), start_color
);
1857 while (pages_to_steal
--) {
1858 if (--vm_page_free_count
< vm_page_free_count_minimum
)
1859 vm_page_free_count_minimum
= vm_page_free_count
;
1861 while (queue_empty(&vm_page_queue_free
[color
]))
1862 color
= (color
+ 1) & vm_color_mask
;
1864 queue_remove_first(&vm_page_queue_free
[color
],
1868 mem
->pageq
.next
= NULL
;
1869 mem
->pageq
.prev
= NULL
;
1871 assert(!mem
->active
);
1872 assert(!mem
->inactive
);
1873 assert(!mem
->throttled
);
1874 assert(!mem
->speculative
);
1876 color
= (color
+ 1) & vm_color_mask
;
1881 tail
->pageq
.next
= (queue_t
)mem
;
1884 mem
->pageq
.prev
= NULL
;
1885 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1886 assert(mem
->tabled
== FALSE
);
1887 assert(mem
->object
== VM_OBJECT_NULL
);
1888 assert(!mem
->laundry
);
1892 assert(pmap_verify_free(mem
->phys_page
));
1895 assert(!mem
->encrypted
);
1896 assert(!mem
->pmapped
);
1897 assert(!mem
->wpmapped
);
1899 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
1900 PROCESSOR_DATA(current_processor(), start_color
) = color
;
1903 * satisfy this request
1905 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1907 mem
->pageq
.next
= NULL
;
1909 lck_mtx_unlock(&vm_page_queue_free_lock
);
1911 enable_preemption();
1914 * Decide if we should poke the pageout daemon.
1915 * We do this if the free count is less than the low
1916 * water mark, or if the free count is less than the high
1917 * water mark (but above the low water mark) and the inactive
1918 * count is less than its target.
1920 * We don't have the counts locked ... if they change a little,
1921 * it doesn't really matter.
1923 if ((vm_page_free_count
< vm_page_free_min
) ||
1924 ((vm_page_free_count
< vm_page_free_target
) &&
1925 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
1926 thread_wakeup((event_t
) &vm_page_free_wanted
);
1928 VM_CHECK_MEMORYSTATUS
;
1930 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1938 * Return a page to the free list.
1943 register vm_page_t mem
)
1946 int need_wakeup
= 0;
1947 int need_priv_wakeup
= 0;
1950 assert(!mem
->private && !mem
->fictitious
);
1951 if (vm_page_free_verify
) {
1952 assert(pmap_verify_free(mem
->phys_page
));
1954 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1957 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1960 panic("vm_page_release");
1964 assert(!mem
->laundry
);
1965 assert(mem
->object
== VM_OBJECT_NULL
);
1966 assert(mem
->pageq
.next
== NULL
&&
1967 mem
->pageq
.prev
== NULL
);
1968 assert(mem
->listq
.next
== NULL
&&
1969 mem
->listq
.prev
== NULL
);
1971 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
1972 vm_lopage_free_count
< vm_lopage_free_limit
&&
1973 mem
->phys_page
< max_valid_low_ppnum
) {
1975 * this exists to support hardware controllers
1976 * incapable of generating DMAs with more than 32 bits
1977 * of address on platforms with physical memory > 4G...
1979 queue_enter_first(&vm_lopage_queue_free
,
1983 vm_lopage_free_count
++;
1985 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
1986 vm_lopage_refill
= FALSE
;
1990 mem
->lopage
= FALSE
;
1993 color
= mem
->phys_page
& vm_color_mask
;
1994 queue_enter_first(&vm_page_queue_free
[color
],
1998 vm_page_free_count
++;
2000 * Check if we should wake up someone waiting for page.
2001 * But don't bother waking them unless they can allocate.
2003 * We wakeup only one thread, to prevent starvation.
2004 * Because the scheduling system handles wait queues FIFO,
2005 * if we wakeup all waiting threads, one greedy thread
2006 * can starve multiple niceguy threads. When the threads
2007 * all wakeup, the greedy threads runs first, grabs the page,
2008 * and waits for another page. It will be the first to run
2009 * when the next page is freed.
2011 * However, there is a slight danger here.
2012 * The thread we wake might not use the free page.
2013 * Then the other threads could wait indefinitely
2014 * while the page goes unused. To forestall this,
2015 * the pageout daemon will keep making free pages
2016 * as long as vm_page_free_wanted is non-zero.
2019 assert(vm_page_free_count
> 0);
2020 if (vm_page_free_wanted_privileged
> 0) {
2021 vm_page_free_wanted_privileged
--;
2022 need_priv_wakeup
= 1;
2023 } else if (vm_page_free_wanted
> 0 &&
2024 vm_page_free_count
> vm_page_free_reserved
) {
2025 vm_page_free_wanted
--;
2029 lck_mtx_unlock(&vm_page_queue_free_lock
);
2031 if (need_priv_wakeup
)
2032 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2033 else if (need_wakeup
)
2034 thread_wakeup_one((event_t
) &vm_page_free_count
);
2036 VM_CHECK_MEMORYSTATUS
;
2042 * Wait for a page to become available.
2043 * If there are plenty of free pages, then we don't sleep.
2046 * TRUE: There may be another page, try again
2047 * FALSE: We were interrupted out of our wait, don't try again
2055 * We can't use vm_page_free_reserved to make this
2056 * determination. Consider: some thread might
2057 * need to allocate two pages. The first allocation
2058 * succeeds, the second fails. After the first page is freed,
2059 * a call to vm_page_wait must really block.
2061 kern_return_t wait_result
;
2062 int need_wakeup
= 0;
2063 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2065 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2067 if (is_privileged
&& vm_page_free_count
) {
2068 lck_mtx_unlock(&vm_page_queue_free_lock
);
2071 if (vm_page_free_count
< vm_page_free_target
) {
2073 if (is_privileged
) {
2074 if (vm_page_free_wanted_privileged
++ == 0)
2076 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2078 if (vm_page_free_wanted
++ == 0)
2080 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2082 lck_mtx_unlock(&vm_page_queue_free_lock
);
2083 counter(c_vm_page_wait_block
++);
2086 thread_wakeup((event_t
)&vm_page_free_wanted
);
2088 if (wait_result
== THREAD_WAITING
)
2089 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2091 return(wait_result
== THREAD_AWAKENED
);
2093 lck_mtx_unlock(&vm_page_queue_free_lock
);
2101 * Allocate and return a memory cell associated
2102 * with this VM object/offset pair.
2104 * Object must be locked.
2110 vm_object_offset_t offset
)
2112 register vm_page_t mem
;
2114 vm_object_lock_assert_exclusive(object
);
2115 mem
= vm_page_grab();
2116 if (mem
== VM_PAGE_NULL
)
2117 return VM_PAGE_NULL
;
2119 vm_page_insert(mem
, object
, offset
);
2127 vm_object_offset_t offset
)
2129 register vm_page_t mem
;
2131 vm_object_lock_assert_exclusive(object
);
2132 mem
= vm_page_grablo();
2133 if (mem
== VM_PAGE_NULL
)
2134 return VM_PAGE_NULL
;
2136 vm_page_insert(mem
, object
, offset
);
2143 * vm_page_alloc_guard:
2145 * Allocate a fictitious page which will be used
2146 * as a guard page. The page will be inserted into
2147 * the object and returned to the caller.
2151 vm_page_alloc_guard(
2153 vm_object_offset_t offset
)
2155 register vm_page_t mem
;
2157 vm_object_lock_assert_exclusive(object
);
2158 mem
= vm_page_grab_guard();
2159 if (mem
== VM_PAGE_NULL
)
2160 return VM_PAGE_NULL
;
2162 vm_page_insert(mem
, object
, offset
);
2168 counter(unsigned int c_laundry_pages_freed
= 0;)
2171 * vm_page_free_prepare:
2173 * Removes page from any queue it may be on
2174 * and disassociates it from its VM object.
2176 * Object and page queues must be locked prior to entry.
2179 vm_page_free_prepare(
2182 vm_page_free_prepare_queues(mem
);
2183 vm_page_free_prepare_object(mem
, TRUE
);
2188 vm_page_free_prepare_queues(
2193 assert(!mem
->cleaning
);
2194 assert(!mem
->pageout
);
2196 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2198 panic("vm_page_free: freeing page on free list\n");
2201 vm_object_lock_assert_exclusive(mem
->object
);
2206 * We may have to free a page while it's being laundered
2207 * if we lost its pager (due to a forced unmount, for example).
2208 * We need to call vm_pageout_throttle_up() before removing
2209 * the page from its VM object, so that we can find out on
2210 * which pageout queue the page is on.
2212 vm_pageout_throttle_up(mem
);
2213 counter(++c_laundry_pages_freed
);
2215 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2217 if (VM_PAGE_WIRED(mem
)) {
2219 assert(mem
->object
->wired_page_count
> 0);
2220 mem
->object
->wired_page_count
--;
2221 assert(mem
->object
->resident_page_count
>=
2222 mem
->object
->wired_page_count
);
2224 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2225 OSAddAtomic(+1, &vm_page_purgeable_count
);
2226 assert(vm_page_purgeable_wired_count
> 0);
2227 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2230 if (!mem
->private && !mem
->fictitious
)
2231 vm_page_wire_count
--;
2232 mem
->wire_count
= 0;
2233 assert(!mem
->gobbled
);
2234 } else if (mem
->gobbled
) {
2235 if (!mem
->private && !mem
->fictitious
)
2236 vm_page_wire_count
--;
2237 vm_page_gobble_count
--;
2243 vm_page_free_prepare_object(
2245 boolean_t remove_from_hash
)
2248 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2250 PAGE_WAKEUP(mem
); /* clears wanted */
2253 mem
->private = FALSE
;
2254 mem
->fictitious
= TRUE
;
2255 mem
->phys_page
= vm_page_fictitious_addr
;
2257 if ( !mem
->fictitious
) {
2258 if (mem
->zero_fill
== TRUE
)
2260 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2268 * Returns the given page to the free list,
2269 * disassociating it with any VM object.
2271 * Object and page queues must be locked prior to entry.
2277 vm_page_free_prepare(mem
);
2279 if (mem
->fictitious
) {
2280 vm_page_release_fictitious(mem
);
2282 vm_page_release(mem
);
2288 vm_page_free_unlocked(
2290 boolean_t remove_from_hash
)
2292 vm_page_lockspin_queues();
2293 vm_page_free_prepare_queues(mem
);
2294 vm_page_unlock_queues();
2296 vm_page_free_prepare_object(mem
, remove_from_hash
);
2298 if (mem
->fictitious
) {
2299 vm_page_release_fictitious(mem
);
2301 vm_page_release(mem
);
2306 * Free a list of pages. The list can be up to several hundred pages,
2307 * as blocked up by vm_pageout_scan().
2308 * The big win is not having to take the free list lock once
2309 * per page. We sort the incoming pages into n lists, one for
2315 boolean_t prepare_object
)
2320 int inuse_list_head
= -1;
2322 queue_head_t free_list
[MAX_COLORS
];
2323 int inuse
[MAX_COLORS
];
2325 for (color
= 0; color
< (signed) vm_colors
; color
++) {
2326 queue_init(&free_list
[color
]);
2330 assert(!mem
->inactive
);
2331 assert(!mem
->active
);
2332 assert(!mem
->throttled
);
2334 assert(!mem
->speculative
);
2335 assert(!VM_PAGE_WIRED(mem
));
2336 assert(mem
->pageq
.prev
== NULL
);
2338 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2340 if (prepare_object
== TRUE
)
2341 vm_page_free_prepare_object(mem
, TRUE
);
2343 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2344 assert(pmap_verify_free(mem
->phys_page
));
2347 if (!mem
->fictitious
) {
2349 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2350 vm_lopage_free_count
< vm_lopage_free_limit
&&
2351 mem
->phys_page
< max_valid_low_ppnum
) {
2352 mem
->pageq
.next
= NULL
;
2353 vm_page_release(mem
);
2357 * IMPORTANT: we can't set the page "free" here
2358 * because that would make the page eligible for
2359 * a physically-contiguous allocation (see
2360 * vm_page_find_contiguous()) right away (we don't
2361 * hold the vm_page_queue_free lock). That would
2362 * cause trouble because the page is not actually
2363 * in the free queue yet...
2365 color
= mem
->phys_page
& vm_color_mask
;
2366 if (queue_empty(&free_list
[color
])) {
2367 inuse
[color
] = inuse_list_head
;
2368 inuse_list_head
= color
;
2370 queue_enter_first(&free_list
[color
],
2377 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2378 mem
->phys_page
== vm_page_guard_addr
);
2379 vm_page_release_fictitious(mem
);
2384 unsigned int avail_free_count
;
2385 unsigned int need_wakeup
= 0;
2386 unsigned int need_priv_wakeup
= 0;
2388 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2390 color
= inuse_list_head
;
2392 while( color
!= -1 ) {
2393 vm_page_t first
, last
;
2394 vm_page_t first_free
;
2397 * Now that we hold the vm_page_queue_free lock,
2398 * it's safe to mark all pages in our local queue
2401 queue_iterate(&free_list
[color
],
2411 * ... and insert our local queue at the head of
2412 * the global free queue.
2414 first
= (vm_page_t
) queue_first(&free_list
[color
]);
2415 last
= (vm_page_t
) queue_last(&free_list
[color
]);
2416 first_free
= (vm_page_t
) queue_first(&vm_page_queue_free
[color
]);
2417 if (queue_empty(&vm_page_queue_free
[color
])) {
2418 queue_last(&vm_page_queue_free
[color
]) =
2419 (queue_entry_t
) last
;
2421 queue_prev(&first_free
->pageq
) =
2422 (queue_entry_t
) last
;
2424 queue_first(&vm_page_queue_free
[color
]) =
2425 (queue_entry_t
) first
;
2426 queue_prev(&first
->pageq
) =
2427 (queue_entry_t
) &vm_page_queue_free
[color
];
2428 queue_next(&last
->pageq
) =
2429 (queue_entry_t
) first_free
;
2432 color
= inuse
[color
];
2435 vm_page_free_count
+= pg_count
;
2436 avail_free_count
= vm_page_free_count
;
2438 if (vm_page_free_wanted_privileged
> 0 &&
2439 avail_free_count
> 0) {
2440 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2441 need_priv_wakeup
= avail_free_count
;
2442 vm_page_free_wanted_privileged
-=
2444 avail_free_count
= 0;
2446 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2447 vm_page_free_wanted_privileged
= 0;
2449 vm_page_free_wanted_privileged
;
2453 if (vm_page_free_wanted
> 0 &&
2454 avail_free_count
> vm_page_free_reserved
) {
2455 unsigned int available_pages
;
2457 available_pages
= (avail_free_count
-
2458 vm_page_free_reserved
);
2460 if (available_pages
>= vm_page_free_wanted
) {
2461 need_wakeup
= vm_page_free_wanted
;
2462 vm_page_free_wanted
= 0;
2464 need_wakeup
= available_pages
;
2465 vm_page_free_wanted
-= available_pages
;
2468 lck_mtx_unlock(&vm_page_queue_free_lock
);
2470 if (need_priv_wakeup
!= 0) {
2472 * There shouldn't be that many VM-privileged threads,
2473 * so let's wake them all up, even if we don't quite
2474 * have enough pages to satisfy them all.
2476 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2478 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2480 * We don't expect to have any more waiters
2481 * after this, so let's wake them all up at
2484 thread_wakeup((event_t
) &vm_page_free_count
);
2485 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2487 * Wake up one waiter per page we just released.
2489 thread_wakeup_one((event_t
) &vm_page_free_count
);
2492 VM_CHECK_MEMORYSTATUS
;
2500 * Mark this page as wired down by yet
2501 * another map, removing it from paging queues
2504 * The page's object and the page queues must be locked.
2508 register vm_page_t mem
)
2511 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2515 vm_object_lock_assert_exclusive(mem
->object
);
2518 * In theory, the page should be in an object before it
2519 * gets wired, since we need to hold the object lock
2520 * to update some fields in the page structure.
2521 * However, some code (i386 pmap, for example) might want
2522 * to wire a page before it gets inserted into an object.
2523 * That's somewhat OK, as long as nobody else can get to
2524 * that page and update it at the same time.
2528 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2530 if ( !VM_PAGE_WIRED(mem
)) {
2531 VM_PAGE_QUEUES_REMOVE(mem
);
2534 mem
->object
->wired_page_count
++;
2535 assert(mem
->object
->resident_page_count
>=
2536 mem
->object
->wired_page_count
);
2537 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2538 assert(vm_page_purgeable_count
> 0);
2539 OSAddAtomic(-1, &vm_page_purgeable_count
);
2540 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2542 if (mem
->object
->all_reusable
) {
2544 * Wired pages are not counted as "re-usable"
2545 * in "all_reusable" VM objects, so nothing
2548 } else if (mem
->reusable
) {
2550 * This page is not "re-usable" when it's
2551 * wired, so adjust its state and the
2554 vm_object_reuse_pages(mem
->object
,
2556 mem
->offset
+PAGE_SIZE_64
,
2560 assert(!mem
->reusable
);
2562 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2563 vm_page_wire_count
++;
2565 vm_page_gobble_count
--;
2566 mem
->gobbled
= FALSE
;
2567 if (mem
->zero_fill
== TRUE
) {
2568 mem
->zero_fill
= FALSE
;
2572 VM_CHECK_MEMORYSTATUS
;
2576 * The page could be encrypted, but
2577 * We don't have to decrypt it here
2578 * because we don't guarantee that the
2579 * data is actually valid at this point.
2580 * The page will get decrypted in
2581 * vm_fault_wire() if needed.
2584 assert(!mem
->gobbled
);
2592 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2594 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2598 register vm_page_t mem
)
2600 vm_page_lockspin_queues();
2603 assert(!mem
->gobbled
);
2604 assert( !VM_PAGE_WIRED(mem
));
2606 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2607 if (!mem
->private && !mem
->fictitious
)
2608 vm_page_wire_count
++;
2610 vm_page_gobble_count
++;
2611 mem
->gobbled
= TRUE
;
2612 vm_page_unlock_queues();
2618 * Release one wiring of this page, potentially
2619 * enabling it to be paged again.
2621 * The page's object and the page queues must be locked.
2629 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2632 assert(VM_PAGE_WIRED(mem
));
2633 assert(mem
->object
!= VM_OBJECT_NULL
);
2635 vm_object_lock_assert_exclusive(mem
->object
);
2636 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2638 if (--mem
->wire_count
== 0) {
2639 assert(!mem
->private && !mem
->fictitious
);
2640 vm_page_wire_count
--;
2641 assert(mem
->object
->wired_page_count
> 0);
2642 mem
->object
->wired_page_count
--;
2643 assert(mem
->object
->resident_page_count
>=
2644 mem
->object
->wired_page_count
);
2645 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2646 OSAddAtomic(+1, &vm_page_purgeable_count
);
2647 assert(vm_page_purgeable_wired_count
> 0);
2648 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2650 assert(!mem
->laundry
);
2651 assert(mem
->object
!= kernel_object
);
2652 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
2654 if (queueit
== TRUE
) {
2655 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
2656 vm_page_deactivate(mem
);
2658 vm_page_activate(mem
);
2662 VM_CHECK_MEMORYSTATUS
;
2669 * vm_page_deactivate:
2671 * Returns the given page to the inactive list,
2672 * indicating that no physical maps have access
2673 * to this page. [Used by the physical mapping system.]
2675 * The page queues must be locked.
2681 vm_page_deactivate_internal(m
, TRUE
);
2686 vm_page_deactivate_internal(
2688 boolean_t clear_hw_reference
)
2692 assert(m
->object
!= kernel_object
);
2693 assert(m
->phys_page
!= vm_page_guard_addr
);
2695 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2697 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2700 * This page is no longer very interesting. If it was
2701 * interesting (active or inactive/referenced), then we
2702 * clear the reference bit and (re)enter it in the
2703 * inactive queue. Note wired pages should not have
2704 * their reference bit cleared.
2706 assert ( !(m
->absent
&& !m
->unusual
));
2708 if (m
->gobbled
) { /* can this happen? */
2709 assert( !VM_PAGE_WIRED(m
));
2711 if (!m
->private && !m
->fictitious
)
2712 vm_page_wire_count
--;
2713 vm_page_gobble_count
--;
2716 if (m
->private || m
->fictitious
|| (VM_PAGE_WIRED(m
)))
2719 if (!m
->absent
&& clear_hw_reference
== TRUE
)
2720 pmap_clear_reference(m
->phys_page
);
2722 m
->reference
= FALSE
;
2723 m
->no_cache
= FALSE
;
2726 VM_PAGE_QUEUES_REMOVE(m
);
2728 assert(!m
->laundry
);
2729 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2731 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2732 m
->dirty
&& m
->object
->internal
&&
2733 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2734 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2735 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2736 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2737 m
->throttled
= TRUE
;
2738 vm_page_throttled_count
++;
2740 if (m
->object
->named
&& m
->object
->ref_count
== 1) {
2741 vm_page_speculate(m
, FALSE
);
2742 #if DEVELOPMENT || DEBUG
2743 vm_page_speculative_recreated
++;
2746 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
2755 * Put the specified page on the active list (if appropriate).
2757 * The page queues must be locked.
2762 register vm_page_t m
)
2765 #ifdef FIXME_4778297
2766 assert(m
->object
!= kernel_object
);
2768 assert(m
->phys_page
!= vm_page_guard_addr
);
2770 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2772 assert( !(m
->absent
&& !m
->unusual
));
2775 assert( !VM_PAGE_WIRED(m
));
2776 if (!m
->private && !m
->fictitious
)
2777 vm_page_wire_count
--;
2778 vm_page_gobble_count
--;
2781 if (m
->private || m
->fictitious
)
2786 panic("vm_page_activate: already active");
2789 if (m
->speculative
) {
2790 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
2791 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
2794 VM_PAGE_QUEUES_REMOVE(m
);
2796 if ( !VM_PAGE_WIRED(m
)) {
2797 assert(!m
->laundry
);
2798 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2799 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2800 m
->dirty
&& m
->object
->internal
&&
2801 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2802 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2803 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2804 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2805 m
->throttled
= TRUE
;
2806 vm_page_throttled_count
++;
2808 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
2810 vm_page_active_count
++;
2812 m
->reference
= TRUE
;
2813 m
->no_cache
= FALSE
;
2820 * vm_page_speculate:
2822 * Put the specified page on the speculative list (if appropriate).
2824 * The page queues must be locked.
2831 struct vm_speculative_age_q
*aq
;
2834 assert(m
->object
!= kernel_object
);
2835 assert(m
->phys_page
!= vm_page_guard_addr
);
2837 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2839 assert( !(m
->absent
&& !m
->unusual
));
2841 if (m
->private || m
->fictitious
)
2844 VM_PAGE_QUEUES_REMOVE(m
);
2846 if ( !VM_PAGE_WIRED(m
)) {
2851 clock_get_system_nanotime(&sec
, &nsec
);
2852 ts
.tv_sec
= (unsigned int) sec
;
2855 if (vm_page_speculative_count
== 0) {
2857 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2858 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2860 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2863 * set the timer to begin a new group
2865 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
2866 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
2868 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2870 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2872 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
2874 speculative_age_index
++;
2876 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2877 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2878 if (speculative_age_index
== speculative_steal_index
) {
2879 speculative_steal_index
= speculative_age_index
+ 1;
2881 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2882 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2884 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2886 if (!queue_empty(&aq
->age_q
))
2887 vm_page_speculate_ageit(aq
);
2889 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
2890 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
2892 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2895 enqueue_tail(&aq
->age_q
, &m
->pageq
);
2896 m
->speculative
= TRUE
;
2897 vm_page_speculative_count
++;
2900 vm_object_lock_assert_exclusive(m
->object
);
2902 m
->object
->pages_created
++;
2903 #if DEVELOPMENT || DEBUG
2904 vm_page_speculative_created
++;
2913 * move pages from the specified aging bin to
2914 * the speculative bin that pageout_scan claims from
2916 * The page queues must be locked.
2919 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
2921 struct vm_speculative_age_q
*sq
;
2924 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
2926 if (queue_empty(&sq
->age_q
)) {
2927 sq
->age_q
.next
= aq
->age_q
.next
;
2928 sq
->age_q
.prev
= aq
->age_q
.prev
;
2930 t
= (vm_page_t
)sq
->age_q
.next
;
2931 t
->pageq
.prev
= &sq
->age_q
;
2933 t
= (vm_page_t
)sq
->age_q
.prev
;
2934 t
->pageq
.next
= &sq
->age_q
;
2936 t
= (vm_page_t
)sq
->age_q
.prev
;
2937 t
->pageq
.next
= aq
->age_q
.next
;
2939 t
= (vm_page_t
)aq
->age_q
.next
;
2940 t
->pageq
.prev
= sq
->age_q
.prev
;
2942 t
= (vm_page_t
)aq
->age_q
.prev
;
2943 t
->pageq
.next
= &sq
->age_q
;
2945 sq
->age_q
.prev
= aq
->age_q
.prev
;
2947 queue_init(&aq
->age_q
);
2956 assert(m
->object
!= kernel_object
);
2957 assert(m
->phys_page
!= vm_page_guard_addr
);
2960 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2962 if (m
->active
|| m
->reference
)
2965 if (m
->private || (VM_PAGE_WIRED(m
)))
2968 m
->no_cache
= FALSE
;
2970 VM_PAGE_QUEUES_REMOVE(m
);
2972 assert(!m
->laundry
);
2973 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2975 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
2980 vm_page_reactivate_all_throttled(void)
2982 vm_page_t first_throttled
, last_throttled
;
2983 vm_page_t first_active
;
2985 int extra_active_count
;
2987 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
))
2990 extra_active_count
= 0;
2991 vm_page_lock_queues();
2992 if (! queue_empty(&vm_page_queue_throttled
)) {
2994 * Switch "throttled" pages to "active".
2996 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
2998 assert(m
->throttled
);
3000 assert(!m
->inactive
);
3001 assert(!m
->speculative
);
3002 assert(!VM_PAGE_WIRED(m
));
3004 extra_active_count
++;
3006 m
->throttled
= FALSE
;
3012 * Transfer the entire throttled queue to a regular LRU page queues.
3013 * We insert it at the head of the active queue, so that these pages
3014 * get re-evaluated by the LRU algorithm first, since they've been
3015 * completely out of it until now.
3017 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3018 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3019 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3020 if (queue_empty(&vm_page_queue_active
)) {
3021 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3023 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3025 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3026 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3027 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3030 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3032 queue_init(&vm_page_queue_throttled
);
3034 * Adjust the global page counts.
3036 vm_page_active_count
+= extra_active_count
;
3037 vm_page_throttled_count
= 0;
3039 assert(vm_page_throttled_count
== 0);
3040 assert(queue_empty(&vm_page_queue_throttled
));
3041 vm_page_unlock_queues();
3046 * move pages from the indicated local queue to the global active queue
3047 * its ok to fail if we're below the hard limit and force == FALSE
3048 * the nolocks == TRUE case is to allow this function to be run on
3049 * the hibernate path
3053 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3056 vm_page_t first_local
, last_local
;
3057 vm_page_t first_active
;
3061 if (vm_page_local_q
== NULL
)
3064 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3066 if (nolocks
== FALSE
) {
3067 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3068 if ( !vm_page_trylockspin_queues())
3071 vm_page_lockspin_queues();
3073 VPL_LOCK(&lq
->vpl_lock
);
3075 if (lq
->vpl_count
) {
3077 * Switch "local" pages to "active".
3079 assert(!queue_empty(&lq
->vpl_queue
));
3081 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3085 assert(!m
->inactive
);
3086 assert(!m
->speculative
);
3087 assert(!VM_PAGE_WIRED(m
));
3088 assert(!m
->throttled
);
3089 assert(!m
->fictitious
);
3091 if (m
->local_id
!= lid
)
3092 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3101 if (count
!= lq
->vpl_count
)
3102 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3105 * Transfer the entire local queue to a regular LRU page queues.
3107 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3108 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3109 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3111 if (queue_empty(&vm_page_queue_active
)) {
3112 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3114 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3116 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3117 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3118 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3120 queue_init(&lq
->vpl_queue
);
3122 * Adjust the global page counts.
3124 vm_page_active_count
+= lq
->vpl_count
;
3127 assert(queue_empty(&lq
->vpl_queue
));
3129 if (nolocks
== FALSE
) {
3130 VPL_UNLOCK(&lq
->vpl_lock
);
3131 vm_page_unlock_queues();
3136 * vm_page_part_zero_fill:
3138 * Zero-fill a part of the page.
3141 vm_page_part_zero_fill(
3149 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3150 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3153 tmp
= vm_page_grab();
3154 if (tmp
== VM_PAGE_NULL
) {
3155 vm_page_wait(THREAD_UNINT
);
3160 vm_page_zero_fill(tmp
);
3162 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3164 if((m_pa
+ len
) < PAGE_SIZE
) {
3165 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3166 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3168 vm_page_copy(tmp
,m
);
3175 * vm_page_zero_fill:
3177 * Zero-fill the specified page.
3184 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3185 m
->object
, m
->offset
, m
, 0,0);
3189 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3190 pmap_zero_page(m
->phys_page
);
3194 * vm_page_part_copy:
3196 * copy part of one page to another
3207 VM_PAGE_CHECK(src_m
);
3208 VM_PAGE_CHECK(dst_m
);
3210 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3211 dst_m
->phys_page
, dst_pa
, len
);
3217 * Copy one page to another
3220 * The source page should not be encrypted. The caller should
3221 * make sure the page is decrypted first, if necessary.
3224 int vm_page_copy_cs_validations
= 0;
3225 int vm_page_copy_cs_tainted
= 0;
3233 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3234 src_m
->object
, src_m
->offset
,
3235 dest_m
->object
, dest_m
->offset
,
3238 VM_PAGE_CHECK(src_m
);
3239 VM_PAGE_CHECK(dest_m
);
3243 * The source page should not be encrypted at this point.
3244 * The destination page will therefore not contain encrypted
3245 * data after the copy.
3247 if (src_m
->encrypted
) {
3248 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3250 dest_m
->encrypted
= FALSE
;
3252 if (src_m
->object
!= VM_OBJECT_NULL
&&
3253 src_m
->object
->code_signed
) {
3255 * We're copying a page from a code-signed object.
3256 * Whoever ends up mapping the copy page might care about
3257 * the original page's integrity, so let's validate the
3260 vm_page_copy_cs_validations
++;
3261 vm_page_validate_cs(src_m
);
3264 if (vm_page_is_slideable(src_m
)) {
3265 boolean_t was_busy
= src_m
->busy
;
3267 (void) vm_page_slide(src_m
, 0);
3268 assert(src_m
->busy
);
3270 PAGE_WAKEUP_DONE(src_m
);
3275 * Propagate the cs_tainted bit to the copy page. Do not propagate
3276 * the cs_validated bit.
3278 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3279 if (dest_m
->cs_tainted
) {
3280 vm_page_copy_cs_tainted
++;
3282 dest_m
->slid
= src_m
->slid
;
3283 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
3284 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3292 printf("vm_page %p: \n", p
);
3293 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3294 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3295 printf(" next=%p\n", p
->next
);
3296 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3297 printf(" wire_count=%u\n", p
->wire_count
);
3299 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3300 (p
->local
? "" : "!"),
3301 (p
->inactive
? "" : "!"),
3302 (p
->active
? "" : "!"),
3303 (p
->pageout_queue
? "" : "!"),
3304 (p
->speculative
? "" : "!"),
3305 (p
->laundry
? "" : "!"));
3306 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3307 (p
->free
? "" : "!"),
3308 (p
->reference
? "" : "!"),
3309 (p
->gobbled
? "" : "!"),
3310 (p
->private ? "" : "!"),
3311 (p
->throttled
? "" : "!"));
3312 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3313 (p
->busy
? "" : "!"),
3314 (p
->wanted
? "" : "!"),
3315 (p
->tabled
? "" : "!"),
3316 (p
->fictitious
? "" : "!"),
3317 (p
->pmapped
? "" : "!"),
3318 (p
->wpmapped
? "" : "!"));
3319 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3320 (p
->pageout
? "" : "!"),
3321 (p
->absent
? "" : "!"),
3322 (p
->error
? "" : "!"),
3323 (p
->dirty
? "" : "!"),
3324 (p
->cleaning
? "" : "!"),
3325 (p
->precious
? "" : "!"),
3326 (p
->clustered
? "" : "!"));
3327 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3328 (p
->overwriting
? "" : "!"),
3329 (p
->restart
? "" : "!"),
3330 (p
->unusual
? "" : "!"),
3331 (p
->encrypted
? "" : "!"),
3332 (p
->encrypted_cleaning
? "" : "!"));
3333 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3334 (p
->list_req_pending
? "" : "!"),
3335 (p
->dump_cleaning
? "" : "!"),
3336 (p
->cs_validated
? "" : "!"),
3337 (p
->cs_tainted
? "" : "!"),
3338 (p
->no_cache
? "" : "!"));
3339 printf(" %szero_fill\n",
3340 (p
->zero_fill
? "" : "!"));
3342 printf("phys_page=0x%x\n", p
->phys_page
);
3346 * Check that the list of pages is ordered by
3347 * ascending physical address and has no holes.
3350 vm_page_verify_contiguous(
3352 unsigned int npages
)
3354 register vm_page_t m
;
3355 unsigned int page_count
;
3356 vm_offset_t prev_addr
;
3358 prev_addr
= pages
->phys_page
;
3360 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3361 if (m
->phys_page
!= prev_addr
+ 1) {
3362 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3363 m
, (long)prev_addr
, m
->phys_page
);
3364 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
3365 panic("vm_page_verify_contiguous: not contiguous!");
3367 prev_addr
= m
->phys_page
;
3370 if (page_count
!= npages
) {
3371 printf("pages %p actual count 0x%x but requested 0x%x\n",
3372 pages
, page_count
, npages
);
3373 panic("vm_page_verify_contiguous: count error");
3380 * Check the free lists for proper length etc.
3383 vm_page_verify_free_list(
3384 queue_head_t
*vm_page_queue
,
3386 vm_page_t look_for_page
,
3387 boolean_t expect_page
)
3389 unsigned int npages
;
3392 boolean_t found_page
;
3396 prev_m
= (vm_page_t
) vm_page_queue
;
3397 queue_iterate(vm_page_queue
,
3402 if (m
== look_for_page
) {
3405 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3406 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3407 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3409 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3411 if (color
!= (unsigned int) -1) {
3412 if ((m
->phys_page
& vm_color_mask
) != color
)
3413 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3414 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3416 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3422 if (look_for_page
!= VM_PAGE_NULL
) {
3423 unsigned int other_color
;
3425 if (expect_page
&& !found_page
) {
3426 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3427 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3428 _vm_page_print(look_for_page
);
3429 for (other_color
= 0;
3430 other_color
< vm_colors
;
3432 if (other_color
== color
)
3434 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
3435 other_color
, look_for_page
, FALSE
);
3437 if (color
== (unsigned int) -1) {
3438 vm_page_verify_free_list(&vm_lopage_queue_free
,
3439 (unsigned int) -1, look_for_page
, FALSE
);
3441 panic("vm_page_verify_free_list(color=%u)\n", color
);
3443 if (!expect_page
&& found_page
) {
3444 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3445 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3451 static boolean_t vm_page_verify_free_lists_enabled
= FALSE
;
3453 vm_page_verify_free_lists( void )
3455 unsigned int color
, npages
, nlopages
;
3457 if (! vm_page_verify_free_lists_enabled
)
3462 lck_mtx_lock(&vm_page_queue_free_lock
);
3464 for( color
= 0; color
< vm_colors
; color
++ ) {
3465 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
3466 color
, VM_PAGE_NULL
, FALSE
);
3468 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
3470 VM_PAGE_NULL
, FALSE
);
3471 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
3472 panic("vm_page_verify_free_lists: "
3473 "npages %u free_count %d nlopages %u lo_free_count %u",
3474 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
3476 lck_mtx_unlock(&vm_page_queue_free_lock
);
3480 vm_page_queues_assert(
3484 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3485 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3486 _vm_page_print(mem
);
3487 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3489 if (VM_PAGE_WIRED(mem
)) {
3490 assert(!mem
->active
);
3491 assert(!mem
->inactive
);
3492 assert(!mem
->speculative
);
3493 assert(!mem
->throttled
);
3496 #endif /* MACH_ASSERT */
3500 * CONTIGUOUS PAGE ALLOCATION
3502 * Find a region large enough to contain at least n pages
3503 * of contiguous physical memory.
3505 * This is done by traversing the vm_page_t array in a linear fashion
3506 * we assume that the vm_page_t array has the avaiable physical pages in an
3507 * ordered, ascending list... this is currently true of all our implementations
3508 * and must remain so... there can be 'holes' in the array... we also can
3509 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3510 * which use to happen via 'vm_page_convert'... that function was no longer
3511 * being called and was removed...
3513 * The basic flow consists of stabilizing some of the interesting state of
3514 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3515 * sweep at the beginning of the array looking for pages that meet our criterea
3516 * for a 'stealable' page... currently we are pretty conservative... if the page
3517 * meets this criterea and is physically contiguous to the previous page in the 'run'
3518 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3519 * and start to develop a new run... if at this point we've already considered
3520 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3521 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3522 * to other threads trying to acquire free pages (or move pages from q to q),
3523 * and then continue from the spot we left off... we only make 1 pass through the
3524 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3525 * which steals the pages from the queues they're currently on... pages on the free
3526 * queue can be stolen directly... pages that are on any of the other queues
3527 * must be removed from the object they are tabled on... this requires taking the
3528 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3529 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3530 * dump the pages we've currently stolen back to the free list, and pick up our
3531 * scan from the point where we aborted the 'current' run.
3535 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3537 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3542 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3545 #define RESET_STATE_OF_RUN() \
3547 prevcontaddr = -2; \
3549 free_considered = 0; \
3550 substitute_needed = 0; \
3555 * Can we steal in-use (i.e. not free) pages when searching for
3556 * physically-contiguous pages ?
3558 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3560 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
3562 int vm_page_find_contig_debug
= 0;
3566 vm_page_find_contiguous(
3567 unsigned int contig_pages
,
3574 ppnum_t prevcontaddr
;
3576 unsigned int npages
, considered
, scanned
;
3577 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
3578 unsigned int idx_last_contig_page_found
= 0;
3579 int free_considered
, free_available
;
3580 int substitute_needed
;
3583 clock_sec_t tv_start_sec
, tv_end_sec
;
3584 clock_usec_t tv_start_usec
, tv_end_usec
;
3589 int stolen_pages
= 0;
3592 if (contig_pages
== 0)
3593 return VM_PAGE_NULL
;
3596 vm_page_verify_free_lists();
3599 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
3601 vm_page_lock_queues();
3602 lck_mtx_lock(&vm_page_queue_free_lock
);
3604 RESET_STATE_OF_RUN();
3608 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3612 if(flags
& KMA_LOMEM
)
3613 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
3615 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
3617 orig_last_idx
= idx_last_contig_page_found
;
3618 last_idx
= orig_last_idx
;
3620 for (page_idx
= last_idx
, start_idx
= last_idx
;
3621 npages
< contig_pages
&& page_idx
< vm_pages_count
;
3626 page_idx
>= orig_last_idx
) {
3628 * We're back where we started and we haven't
3629 * found any suitable contiguous range. Let's
3635 m
= &vm_pages
[page_idx
];
3637 assert(!m
->fictitious
);
3638 assert(!m
->private);
3640 if (max_pnum
&& m
->phys_page
> max_pnum
) {
3641 /* no more low pages... */
3644 if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
3648 RESET_STATE_OF_RUN();
3650 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
3651 m
->encrypted
|| m
->encrypted_cleaning
|| m
->cs_validated
|| m
->cs_tainted
||
3652 m
->error
|| m
->absent
|| m
->pageout_queue
|| m
->laundry
|| m
->wanted
|| m
->precious
||
3653 m
->cleaning
|| m
->overwriting
|| m
->restart
|| m
->unusual
|| m
->list_req_pending
||
3656 * page is in a transient state
3657 * or a state we don't want to deal
3658 * with, so don't consider it which
3659 * means starting a new run
3661 RESET_STATE_OF_RUN();
3663 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
) {
3665 * page needs to be on one of our queues
3666 * in order for it to be stable behind the
3667 * locks we hold at this point...
3668 * if not, don't consider it which
3669 * means starting a new run
3671 RESET_STATE_OF_RUN();
3673 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
3675 * pages on the free list are always 'busy'
3676 * so we couldn't test for 'busy' in the check
3677 * for the transient states... pages that are
3678 * 'free' are never 'tabled', so we also couldn't
3679 * test for 'tabled'. So we check here to make
3680 * sure that a non-free page is not busy and is
3681 * tabled on an object...
3682 * if not, don't consider it which
3683 * means starting a new run
3685 RESET_STATE_OF_RUN();
3688 if (m
->phys_page
!= prevcontaddr
+ 1) {
3689 if ((m
->phys_page
& pnum_mask
) != 0) {
3690 RESET_STATE_OF_RUN();
3694 start_idx
= page_idx
;
3695 start_pnum
= m
->phys_page
;
3700 prevcontaddr
= m
->phys_page
;
3707 * This page is not free.
3708 * If we can't steal used pages,
3709 * we have to give up this run
3711 * Otherwise, we might need to
3712 * move the contents of this page
3713 * into a substitute page.
3715 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3716 if (m
->pmapped
|| m
->dirty
) {
3717 substitute_needed
++;
3720 RESET_STATE_OF_RUN();
3724 if ((free_considered
+ substitute_needed
) > free_available
) {
3726 * if we let this run continue
3727 * we will end up dropping the vm_page_free_count
3728 * below the reserve limit... we need to abort
3729 * this run, but we can at least re-consider this
3730 * page... thus the jump back to 'retry'
3732 RESET_STATE_OF_RUN();
3734 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
3739 * free_available == 0
3740 * so can't consider any free pages... if
3741 * we went to retry in this case, we'd
3742 * get stuck looking at the same page
3743 * w/o making any forward progress
3744 * we also want to take this path if we've already
3745 * reached our limit that controls the lock latency
3750 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
3752 lck_mtx_unlock(&vm_page_queue_free_lock
);
3753 vm_page_unlock_queues();
3757 vm_page_lock_queues();
3758 lck_mtx_lock(&vm_page_queue_free_lock
);
3760 RESET_STATE_OF_RUN();
3762 * reset our free page limit since we
3763 * dropped the lock protecting the vm_page_free_queue
3765 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3776 if (npages
!= contig_pages
) {
3779 * We didn't find a contiguous range but we didn't
3780 * start from the very first page.
3781 * Start again from the very first page.
3783 RESET_STATE_OF_RUN();
3784 if( flags
& KMA_LOMEM
)
3785 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
3787 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
3789 page_idx
= last_idx
;
3793 lck_mtx_unlock(&vm_page_queue_free_lock
);
3797 unsigned int cur_idx
;
3798 unsigned int tmp_start_idx
;
3799 vm_object_t locked_object
= VM_OBJECT_NULL
;
3800 boolean_t abort_run
= FALSE
;
3802 assert(page_idx
- start_idx
== contig_pages
);
3804 tmp_start_idx
= start_idx
;
3807 * first pass through to pull the free pages
3808 * off of the free queue so that in case we
3809 * need substitute pages, we won't grab any
3810 * of the free pages in the run... we'll clear
3811 * the 'free' bit in the 2nd pass, and even in
3812 * an abort_run case, we'll collect all of the
3813 * free pages in this run and return them to the free list
3815 while (start_idx
< page_idx
) {
3817 m1
= &vm_pages
[start_idx
++];
3819 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3826 color
= m1
->phys_page
& vm_color_mask
;
3828 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, m1
, TRUE
);
3830 queue_remove(&vm_page_queue_free
[color
],
3834 m1
->pageq
.next
= NULL
;
3835 m1
->pageq
.prev
= NULL
;
3837 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, VM_PAGE_NULL
, FALSE
);
3840 * Clear the "free" bit so that this page
3841 * does not get considered for another
3842 * concurrent physically-contiguous allocation.
3847 vm_page_free_count
--;
3851 * adjust global freelist counts
3853 if (vm_page_free_count
< vm_page_free_count_minimum
)
3854 vm_page_free_count_minimum
= vm_page_free_count
;
3856 if( flags
& KMA_LOMEM
)
3857 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
3859 vm_page_find_contiguous_last_idx
= page_idx
;
3862 * we can drop the free queue lock at this point since
3863 * we've pulled any 'free' candidates off of the list
3864 * we need it dropped so that we can do a vm_page_grab
3865 * when substituing for pmapped/dirty pages
3867 lck_mtx_unlock(&vm_page_queue_free_lock
);
3869 start_idx
= tmp_start_idx
;
3870 cur_idx
= page_idx
- 1;
3872 while (start_idx
++ < page_idx
) {
3874 * must go through the list from back to front
3875 * so that the page list is created in the
3876 * correct order - low -> high phys addresses
3878 m1
= &vm_pages
[cur_idx
--];
3881 if (m1
->object
== VM_OBJECT_NULL
) {
3883 * page has already been removed from
3884 * the free list in the 1st pass
3886 assert(m1
->offset
== (vm_object_offset_t
) -1);
3888 assert(!m1
->wanted
);
3889 assert(!m1
->laundry
);
3893 if (abort_run
== TRUE
)
3896 object
= m1
->object
;
3898 if (object
!= locked_object
) {
3899 if (locked_object
) {
3900 vm_object_unlock(locked_object
);
3901 locked_object
= VM_OBJECT_NULL
;
3903 if (vm_object_lock_try(object
))
3904 locked_object
= object
;
3906 if (locked_object
== VM_OBJECT_NULL
||
3907 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
3908 m1
->encrypted
|| m1
->encrypted_cleaning
|| m1
->cs_validated
|| m1
->cs_tainted
||
3909 m1
->error
|| m1
->absent
|| m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
|| m1
->precious
||
3910 m1
->cleaning
|| m1
->overwriting
|| m1
->restart
|| m1
->unusual
|| m1
->list_req_pending
|| m1
->busy
)) {
3912 if (locked_object
) {
3913 vm_object_unlock(locked_object
);
3914 locked_object
= VM_OBJECT_NULL
;
3916 tmp_start_idx
= cur_idx
;
3920 if (m1
->pmapped
|| m1
->dirty
) {
3922 vm_object_offset_t offset
;
3924 m2
= vm_page_grab();
3926 if (m2
== VM_PAGE_NULL
) {
3927 if (locked_object
) {
3928 vm_object_unlock(locked_object
);
3929 locked_object
= VM_OBJECT_NULL
;
3931 tmp_start_idx
= cur_idx
;
3936 refmod
= pmap_disconnect(m1
->phys_page
);
3939 vm_page_copy(m1
, m2
);
3941 m2
->reference
= m1
->reference
;
3942 m2
->dirty
= m1
->dirty
;
3944 if (refmod
& VM_MEM_REFERENCED
)
3945 m2
->reference
= TRUE
;
3946 if (refmod
& VM_MEM_MODIFIED
)
3948 offset
= m1
->offset
;
3951 * completely cleans up the state
3952 * of the page so that it is ready
3953 * to be put onto the free list, or
3954 * for this purpose it looks like it
3955 * just came off of the free list
3957 vm_page_free_prepare(m1
);
3960 * make sure we clear the ref/mod state
3961 * from the pmap layer... else we risk
3962 * inheriting state from the last time
3963 * this page was used...
3965 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
3967 * now put the substitute page on the object
3969 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
);
3972 vm_page_activate(m2
);
3974 vm_page_deactivate(m2
);
3976 PAGE_WAKEUP_DONE(m2
);
3980 * completely cleans up the state
3981 * of the page so that it is ready
3982 * to be put onto the free list, or
3983 * for this purpose it looks like it
3984 * just came off of the free list
3986 vm_page_free_prepare(m1
);
3992 m1
->pageq
.next
= (queue_entry_t
) m
;
3993 m1
->pageq
.prev
= NULL
;
3996 if (locked_object
) {
3997 vm_object_unlock(locked_object
);
3998 locked_object
= VM_OBJECT_NULL
;
4001 if (abort_run
== TRUE
) {
4002 if (m
!= VM_PAGE_NULL
) {
4003 vm_page_free_list(m
, FALSE
);
4009 * want the index of the last
4010 * page in this run that was
4011 * successfully 'stolen', so back
4012 * it up 1 for the auto-decrement on use
4013 * and 1 more to bump back over this page
4015 page_idx
= tmp_start_idx
+ 2;
4016 if (page_idx
>= vm_pages_count
) {
4019 page_idx
= last_idx
= 0;
4025 * We didn't find a contiguous range but we didn't
4026 * start from the very first page.
4027 * Start again from the very first page.
4029 RESET_STATE_OF_RUN();
4031 if( flags
& KMA_LOMEM
)
4032 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4034 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4036 last_idx
= page_idx
;
4038 lck_mtx_lock(&vm_page_queue_free_lock
);
4040 * reset our free page limit since we
4041 * dropped the lock protecting the vm_page_free_queue
4043 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4047 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4055 vm_page_gobble_count
+= npages
;
4058 * gobbled pages are also counted as wired pages
4060 vm_page_wire_count
+= npages
;
4062 assert(vm_page_verify_contiguous(m
, npages
));
4065 vm_page_unlock_queues();
4068 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4070 tv_end_sec
-= tv_start_sec
;
4071 if (tv_end_usec
< tv_start_usec
) {
4073 tv_end_usec
+= 1000000;
4075 tv_end_usec
-= tv_start_usec
;
4076 if (tv_end_usec
>= 1000000) {
4078 tv_end_sec
-= 1000000;
4080 if (vm_page_find_contig_debug
) {
4081 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4082 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4083 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4084 scanned
, yielded
, dumped_run
, stolen_pages
);
4089 vm_page_verify_free_lists();
4095 * Allocate a list of contiguous, wired pages.
4107 unsigned int npages
;
4109 if (size
% PAGE_SIZE
!= 0)
4110 return KERN_INVALID_ARGUMENT
;
4112 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4113 if (npages
!= size
/ PAGE_SIZE
) {
4114 /* 32-bit overflow */
4115 return KERN_INVALID_ARGUMENT
;
4119 * Obtain a pointer to a subset of the free
4120 * list large enough to satisfy the request;
4121 * the region will be physically contiguous.
4123 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4125 if (pages
== VM_PAGE_NULL
)
4126 return KERN_NO_SPACE
;
4128 * determine need for wakeups
4130 if ((vm_page_free_count
< vm_page_free_min
) ||
4131 ((vm_page_free_count
< vm_page_free_target
) &&
4132 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4133 thread_wakeup((event_t
) &vm_page_free_wanted
);
4135 VM_CHECK_MEMORYSTATUS
;
4138 * The CPM pages should now be available and
4139 * ordered by ascending physical address.
4141 assert(vm_page_verify_contiguous(pages
, npages
));
4144 return KERN_SUCCESS
;
4148 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
4151 * when working on a 'run' of pages, it is necessary to hold
4152 * the vm_page_queue_lock (a hot global lock) for certain operations
4153 * on the page... however, the majority of the work can be done
4154 * while merely holding the object lock... in fact there are certain
4155 * collections of pages that don't require any work brokered by the
4156 * vm_page_queue_lock... to mitigate the time spent behind the global
4157 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4158 * while doing all of the work that doesn't require the vm_page_queue_lock...
4159 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4160 * necessary work for each page... we will grab the busy bit on the page
4161 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4162 * if it can't immediately take the vm_page_queue_lock in order to compete
4163 * for the locks in the same order that vm_pageout_scan takes them.
4164 * the operation names are modeled after the names of the routines that
4165 * need to be called in order to make the changes very obvious in the
4170 vm_page_do_delayed_work(
4172 struct vm_page_delayed_work
*dwp
,
4177 vm_page_t local_free_q
= VM_PAGE_NULL
;
4178 boolean_t dropped_obj_lock
= FALSE
;
4181 * pageout_scan takes the vm_page_lock_queues first
4182 * then tries for the object lock... to avoid what
4183 * is effectively a lock inversion, we'll go to the
4184 * trouble of taking them in that same order... otherwise
4185 * if this object contains the majority of the pages resident
4186 * in the UBC (or a small set of large objects actively being
4187 * worked on contain the majority of the pages), we could
4188 * cause the pageout_scan thread to 'starve' in its attempt
4189 * to find pages to move to the free queue, since it has to
4190 * successfully acquire the object lock of any candidate page
4191 * before it can steal/clean it.
4193 if (!vm_page_trylockspin_queues()) {
4194 vm_object_unlock(object
);
4196 vm_page_lockspin_queues();
4198 for (j
= 0; ; j
++) {
4199 if (!vm_object_lock_avoid(object
) &&
4200 _vm_object_lock_try(object
))
4202 vm_page_unlock_queues();
4204 vm_page_lockspin_queues();
4206 dropped_obj_lock
= TRUE
;
4208 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
4212 if (dwp
->dw_mask
& DW_set_list_req_pending
) {
4213 m
->list_req_pending
= TRUE
;
4215 if (dropped_obj_lock
== TRUE
) {
4217 * need to make sure anyone that might have
4218 * blocked on busy == TRUE when we dropped
4219 * the object lock gets a chance to re-evaluate
4220 * its state since we have several places
4221 * where we avoid potential deadlocks with
4222 * the fileysystem by stealing pages with
4223 * list_req_pending == TRUE and busy == TRUE
4225 dwp
->dw_mask
|= DW_PAGE_WAKEUP
;
4228 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
4229 vm_pageout_throttle_up(m
);
4231 if (dwp
->dw_mask
& DW_vm_page_wire
)
4233 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
4236 queueit
= (dwp
->dw_mask
& DW_vm_page_free
) ? FALSE
: TRUE
;
4238 vm_page_unwire(m
, queueit
);
4240 if (dwp
->dw_mask
& DW_vm_page_free
) {
4241 vm_page_free_prepare_queues(m
);
4243 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
4245 * Add this page to our list of reclaimed pages,
4246 * to be freed later.
4248 m
->pageq
.next
= (queue_entry_t
) local_free_q
;
4251 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
4252 vm_page_deactivate_internal(m
, FALSE
);
4253 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
4254 if (m
->active
== FALSE
) {
4255 vm_page_activate(m
);
4258 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
4259 vm_page_speculate(m
, TRUE
);
4260 else if (dwp
->dw_mask
& DW_vm_page_lru
)
4262 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
)
4263 VM_PAGE_QUEUES_REMOVE(m
);
4265 if (dwp
->dw_mask
& DW_set_reference
)
4266 m
->reference
= TRUE
;
4267 else if (dwp
->dw_mask
& DW_clear_reference
)
4268 m
->reference
= FALSE
;
4270 if (dwp
->dw_mask
& DW_move_page
) {
4271 VM_PAGE_QUEUES_REMOVE(m
);
4273 assert(!m
->laundry
);
4274 assert(m
->object
!= kernel_object
);
4275 assert(m
->pageq
.next
== NULL
&&
4276 m
->pageq
.prev
== NULL
);
4278 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
4280 if (dwp
->dw_mask
& DW_clear_busy
)
4283 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
4287 vm_page_unlock_queues();
4290 vm_page_free_list(local_free_q
, TRUE
);
4292 VM_CHECK_MEMORYSTATUS
;
4299 void vm_check_memorystatus()
4302 static boolean_t in_critical
= FALSE
;
4303 static unsigned int last_memorystatus
= 0;
4304 unsigned int pages_avail
;
4306 if (!kern_memorystatus_delta
) {
4310 pages_avail
= (vm_page_active_count
+
4311 vm_page_inactive_count
+
4312 vm_page_speculative_count
+
4313 vm_page_free_count
+
4314 (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) ? 0 : vm_page_purgeable_count
));
4315 if ( (!in_critical
&& (pages_avail
< kern_memorystatus_delta
)) ||
4316 (pages_avail
>= (last_memorystatus
+ kern_memorystatus_delta
)) ||
4317 (last_memorystatus
>= (pages_avail
+ kern_memorystatus_delta
)) ) {
4318 kern_memorystatus_level
= pages_avail
* 100 / atop_64(max_mem
);
4319 last_memorystatus
= pages_avail
;
4321 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
4323 in_critical
= (pages_avail
< kern_memorystatus_delta
) ? TRUE
: FALSE
;
4334 vm_page_t lo_page_list
= VM_PAGE_NULL
;
4338 if ( !(flags
& KMA_LOMEM
))
4339 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4341 for (i
= 0; i
< page_count
; i
++) {
4343 mem
= vm_page_grablo();
4345 if (mem
== VM_PAGE_NULL
) {
4347 vm_page_free_list(lo_page_list
, FALSE
);
4349 *list
= VM_PAGE_NULL
;
4351 return (KERN_RESOURCE_SHORTAGE
);
4353 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
4356 *list
= lo_page_list
;
4358 return (KERN_SUCCESS
);
4362 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
4364 page
->offset
= offset
;
4368 vm_page_get_next(vm_page_t page
)
4370 return ((vm_page_t
) page
->pageq
.next
);
4374 vm_page_get_offset(vm_page_t page
)
4376 return (page
->offset
);
4380 vm_page_get_phys_page(vm_page_t page
)
4382 return (page
->phys_page
);
4386 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4390 static vm_page_t hibernate_gobble_queue
;
4392 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4394 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
4395 static int hibernate_flush_dirty_pages(void);
4396 static int hibernate_flush_queue(queue_head_t
*, int);
4397 static void hibernate_dirty_page(vm_page_t
);
4399 void hibernate_flush_wait(void);
4400 void hibernate_mark_in_progress(void);
4401 void hibernate_clear_in_progress(void);
4404 struct hibernate_statistics
{
4405 int hibernate_considered
;
4406 int hibernate_reentered_on_q
;
4407 int hibernate_found_dirty
;
4408 int hibernate_skipped_cleaning
;
4409 int hibernate_skipped_transient
;
4410 int hibernate_skipped_precious
;
4411 int hibernate_queue_nolock
;
4412 int hibernate_queue_paused
;
4413 int hibernate_throttled
;
4414 int hibernate_throttle_timeout
;
4415 int hibernate_drained
;
4416 int hibernate_drain_timeout
;
4418 int cd_found_precious
;
4421 int cd_found_unusual
;
4422 int cd_found_cleaning
;
4423 int cd_found_laundry
;
4427 int cd_vm_page_wire_count
;
4436 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
4438 wait_result_t wait_result
;
4440 vm_page_lock_queues();
4442 while (q
->pgo_laundry
) {
4444 q
->pgo_draining
= TRUE
;
4446 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
4448 vm_page_unlock_queues();
4450 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4452 if (wait_result
== THREAD_TIMED_OUT
) {
4453 hibernate_stats
.hibernate_drain_timeout
++;
4456 vm_page_lock_queues();
4458 hibernate_stats
.hibernate_drained
++;
4460 vm_page_unlock_queues();
4466 hibernate_dirty_page(vm_page_t m
)
4468 vm_object_t object
= m
->object
;
4469 struct vm_pageout_queue
*q
;
4472 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4474 vm_object_lock_assert_exclusive(object
);
4477 * protect the object from collapse -
4478 * locking in the object's paging_offset.
4480 vm_object_paging_begin(object
);
4482 m
->list_req_pending
= TRUE
;
4486 if (object
->internal
== TRUE
)
4487 q
= &vm_pageout_queue_internal
;
4489 q
= &vm_pageout_queue_external
;
4492 * pgo_laundry count is tied to the laundry bit
4497 m
->pageout_queue
= TRUE
;
4498 queue_enter(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
4500 if (q
->pgo_idle
== TRUE
) {
4501 q
->pgo_idle
= FALSE
;
4502 thread_wakeup((event_t
) &q
->pgo_pending
);
4507 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
4510 vm_object_t l_object
= NULL
;
4511 vm_object_t m_object
= NULL
;
4512 int refmod_state
= 0;
4513 int try_failed_count
= 0;
4515 int current_run
= 0;
4516 struct vm_pageout_queue
*iq
;
4517 struct vm_pageout_queue
*eq
;
4518 struct vm_pageout_queue
*tq
;
4521 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
4523 iq
= &vm_pageout_queue_internal
;
4524 eq
= &vm_pageout_queue_external
;
4526 vm_page_lock_queues();
4528 while (qcount
&& !queue_empty(q
)) {
4530 if (current_run
++ == 1000) {
4531 if (hibernate_should_abort()) {
4538 m
= (vm_page_t
) queue_first(q
);
4539 m_object
= m
->object
;
4542 * check to see if we currently are working
4543 * with the same object... if so, we've
4544 * already got the lock
4546 if (m_object
!= l_object
) {
4548 * the object associated with candidate page is
4549 * different from the one we were just working
4550 * with... dump the lock if we still own it
4552 if (l_object
!= NULL
) {
4553 vm_object_unlock(l_object
);
4557 * Try to lock object; since we've alread got the
4558 * page queues lock, we can only 'try' for this one.
4559 * if the 'try' fails, we need to do a mutex_pause
4560 * to allow the owner of the object lock a chance to
4563 if ( !vm_object_lock_try_scan(m_object
)) {
4565 if (try_failed_count
> 20) {
4566 hibernate_stats
.hibernate_queue_nolock
++;
4568 goto reenter_pg_on_q
;
4570 vm_pageout_scan_wants_object
= m_object
;
4572 vm_page_unlock_queues();
4573 mutex_pause(try_failed_count
++);
4574 vm_page_lock_queues();
4576 hibernate_stats
.hibernate_queue_paused
++;
4579 l_object
= m_object
;
4580 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4583 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->busy
|| m
->absent
|| m
->error
) {
4585 * page is not to be cleaned
4586 * put it back on the head of its queue
4589 hibernate_stats
.hibernate_skipped_cleaning
++;
4591 hibernate_stats
.hibernate_skipped_transient
++;
4593 goto reenter_pg_on_q
;
4595 if ( !m_object
->pager_initialized
&& m_object
->pager_created
)
4596 goto reenter_pg_on_q
;
4598 if (m_object
->copy
== VM_OBJECT_NULL
) {
4599 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
4601 * let the normal hibernate image path
4604 goto reenter_pg_on_q
;
4607 if ( !m
->dirty
&& m
->pmapped
) {
4608 refmod_state
= pmap_get_refmod(m
->phys_page
);
4610 if ((refmod_state
& VM_MEM_MODIFIED
))
4617 * page is not to be cleaned
4618 * put it back on the head of its queue
4621 hibernate_stats
.hibernate_skipped_precious
++;
4623 goto reenter_pg_on_q
;
4627 if (m_object
->internal
) {
4628 if (VM_PAGE_Q_THROTTLED(iq
))
4630 } else if (VM_PAGE_Q_THROTTLED(eq
))
4634 wait_result_t wait_result
;
4637 if (l_object
!= NULL
) {
4638 vm_object_unlock(l_object
);
4641 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4643 tq
->pgo_throttled
= TRUE
;
4645 while (retval
== 0) {
4647 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
4649 vm_page_unlock_queues();
4651 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4653 vm_page_lock_queues();
4655 if (hibernate_should_abort())
4658 if (wait_result
!= THREAD_TIMED_OUT
)
4661 if (--wait_count
== 0) {
4662 hibernate_stats
.hibernate_throttle_timeout
++;
4669 hibernate_stats
.hibernate_throttled
++;
4673 VM_PAGE_QUEUES_REMOVE(m
);
4675 hibernate_dirty_page(m
);
4677 hibernate_stats
.hibernate_found_dirty
++;
4682 queue_remove(q
, m
, vm_page_t
, pageq
);
4683 queue_enter(q
, m
, vm_page_t
, pageq
);
4685 hibernate_stats
.hibernate_reentered_on_q
++;
4687 hibernate_stats
.hibernate_considered
++;
4690 try_failed_count
= 0;
4692 if (l_object
!= NULL
) {
4693 vm_object_unlock(l_object
);
4696 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4698 vm_page_unlock_queues();
4700 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
4707 hibernate_flush_dirty_pages()
4709 struct vm_speculative_age_q
*aq
;
4712 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
4714 if (vm_page_local_q
) {
4715 for (i
= 0; i
< vm_page_local_q_count
; i
++)
4716 vm_page_reactivate_local(i
, TRUE
, FALSE
);
4719 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
4723 aq
= &vm_page_queue_speculative
[i
];
4725 if (queue_empty(&aq
->age_q
))
4729 vm_page_lockspin_queues();
4731 queue_iterate(&aq
->age_q
,
4738 vm_page_unlock_queues();
4741 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
4745 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
))
4747 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_zf_queue_count
))
4749 if (hibernate_flush_queue(&vm_page_queue_zf
, vm_zf_queue_count
))
4752 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
4754 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external
));
4758 extern void IOSleep(unsigned int);
4759 extern int sync_internal(void);
4762 hibernate_flush_memory()
4766 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
4770 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_NONE
, vm_page_free_count
, 0, 0, 0, 0);
4772 if ((retval
= hibernate_flush_dirty_pages()) == 0) {
4773 if (consider_buffer_cache_collect
!= NULL
) {
4775 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, vm_page_wire_count
, 0, 0, 0, 0);
4778 (void)(*consider_buffer_cache_collect
)(1);
4779 consider_zone_gc(1);
4781 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, vm_page_wire_count
, 0, 0, 0, 0);
4784 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
4786 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4787 hibernate_stats
.hibernate_considered
,
4788 hibernate_stats
.hibernate_reentered_on_q
,
4789 hibernate_stats
.hibernate_found_dirty
);
4790 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4791 hibernate_stats
.hibernate_skipped_cleaning
,
4792 hibernate_stats
.hibernate_skipped_transient
,
4793 hibernate_stats
.hibernate_skipped_precious
,
4794 hibernate_stats
.hibernate_queue_nolock
);
4795 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4796 hibernate_stats
.hibernate_queue_paused
,
4797 hibernate_stats
.hibernate_throttled
,
4798 hibernate_stats
.hibernate_throttle_timeout
,
4799 hibernate_stats
.hibernate_drained
,
4800 hibernate_stats
.hibernate_drain_timeout
);
4807 hibernate_page_list_zero(hibernate_page_list_t
*list
)
4810 hibernate_bitmap_t
* bitmap
;
4812 bitmap
= &list
->bank_bitmap
[0];
4813 for (bank
= 0; bank
< list
->bank_count
; bank
++)
4817 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
4818 // set out-of-bound bits at end of bitmap.
4819 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
4821 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
4823 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
4828 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
4832 uint64_t start
, end
, timeout
, nsec
;
4833 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
4834 clock_get_uptime(&start
);
4836 for (i
= 0; i
< gobble_count
; i
++)
4838 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
4840 clock_get_uptime(&end
);
4850 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
4851 hibernate_gobble_queue
= m
;
4854 clock_get_uptime(&end
);
4855 absolutetime_to_nanoseconds(end
- start
, &nsec
);
4856 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
4860 hibernate_free_gobble_pages(void)
4865 m
= (vm_page_t
) hibernate_gobble_queue
;
4868 next
= (vm_page_t
) m
->pageq
.next
;
4873 hibernate_gobble_queue
= VM_PAGE_NULL
;
4876 HIBLOG("Freed %d pages\n", count
);
4880 hibernate_consider_discard(vm_page_t m
)
4882 vm_object_t object
= NULL
;
4884 boolean_t discard
= FALSE
;
4889 panic("hibernate_consider_discard: private");
4891 if (!vm_object_lock_try(m
->object
)) {
4892 hibernate_stats
.cd_lock_failed
++;
4897 if (VM_PAGE_WIRED(m
)) {
4898 hibernate_stats
.cd_found_wired
++;
4902 hibernate_stats
.cd_found_precious
++;
4905 if (m
->busy
|| !object
->alive
) {
4907 * Somebody is playing with this page.
4909 hibernate_stats
.cd_found_busy
++;
4912 if (m
->absent
|| m
->unusual
|| m
->error
) {
4914 * If it's unusual in anyway, ignore it
4916 hibernate_stats
.cd_found_unusual
++;
4920 hibernate_stats
.cd_found_cleaning
++;
4923 if (m
->laundry
|| m
->list_req_pending
) {
4924 hibernate_stats
.cd_found_laundry
++;
4929 refmod_state
= pmap_get_refmod(m
->phys_page
);
4931 if (refmod_state
& VM_MEM_REFERENCED
)
4932 m
->reference
= TRUE
;
4933 if (refmod_state
& VM_MEM_MODIFIED
)
4938 * If it's clean or purgeable we can discard the page on wakeup.
4940 discard
= (!m
->dirty
)
4941 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
4942 || (VM_PURGABLE_EMPTY
== object
->purgable
);
4944 if (discard
== FALSE
)
4945 hibernate_stats
.cd_found_dirty
++;
4950 vm_object_unlock(object
);
4957 hibernate_discard_page(vm_page_t m
)
4959 if (m
->absent
|| m
->unusual
|| m
->error
)
4961 * If it's unusual in anyway, ignore
4965 if (m
->pmapped
== TRUE
)
4967 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
4971 panic("hibernate_discard_page(%p) laundry", m
);
4973 panic("hibernate_discard_page(%p) private", m
);
4975 panic("hibernate_discard_page(%p) fictitious", m
);
4977 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
4979 /* object should be on a queue */
4980 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
4981 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
4983 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4984 makes sure these locks are uncontended before sleep */
4985 vm_purgeable_token_delete_first(old_queue
);
4986 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
4993 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4994 pages known to VM to not need saving are subtracted.
4995 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4999 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
5000 hibernate_page_list_t
* page_list_wired
,
5001 hibernate_page_list_t
* page_list_pal
,
5002 uint32_t * pagesOut
)
5004 uint64_t start
, end
, nsec
;
5006 uint32_t pages
= page_list
->page_count
;
5007 uint32_t count_zf
= 0, count_throttled
= 0;
5008 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0;
5009 uint32_t count_wire
= pages
;
5010 uint32_t count_discard_active
= 0;
5011 uint32_t count_discard_inactive
= 0;
5012 uint32_t count_discard_purgeable
= 0;
5013 uint32_t count_discard_speculative
= 0;
5016 hibernate_bitmap_t
* bitmap
;
5017 hibernate_bitmap_t
* bitmap_wired
;
5020 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list
, page_list_wired
);
5022 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
5024 clock_get_uptime(&start
);
5026 hibernate_page_list_zero(page_list
);
5027 hibernate_page_list_zero(page_list_wired
);
5028 hibernate_page_list_zero(page_list_pal
);
5030 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
5031 hibernate_stats
.cd_pages
= pages
;
5033 if (vm_page_local_q
) {
5034 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5035 vm_page_reactivate_local(i
, TRUE
, TRUE
);
5038 m
= (vm_page_t
) hibernate_gobble_queue
;
5043 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5044 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5045 m
= (vm_page_t
) m
->pageq
.next
;
5048 for( i
= 0; i
< real_ncpus
; i
++ )
5050 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
5052 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
5056 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5057 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5059 hibernate_stats
.cd_local_free
++;
5060 hibernate_stats
.cd_total_free
++;
5065 for( i
= 0; i
< vm_colors
; i
++ )
5067 queue_iterate(&vm_page_queue_free
[i
],
5074 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5075 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5077 hibernate_stats
.cd_total_free
++;
5081 queue_iterate(&vm_lopage_queue_free
,
5088 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5089 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5091 hibernate_stats
.cd_total_free
++;
5094 queue_iterate( &vm_page_queue_throttled
,
5099 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5100 && hibernate_consider_discard(m
))
5102 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5103 count_discard_inactive
++;
5108 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5111 queue_iterate( &vm_page_queue_zf
,
5116 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5117 && hibernate_consider_discard(m
))
5119 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5121 count_discard_purgeable
++;
5123 count_discard_inactive
++;
5128 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5131 queue_iterate( &vm_page_queue_inactive
,
5136 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5137 && hibernate_consider_discard(m
))
5139 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5141 count_discard_purgeable
++;
5143 count_discard_inactive
++;
5148 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5151 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5153 queue_iterate(&vm_page_queue_speculative
[i
].age_q
,
5158 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5159 && hibernate_consider_discard(m
))
5161 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5162 count_discard_speculative
++;
5165 count_speculative
++;
5167 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5171 queue_iterate( &vm_page_queue_active
,
5176 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
5177 && hibernate_consider_discard(m
))
5179 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5181 count_discard_purgeable
++;
5183 count_discard_active
++;
5188 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5191 // pull wired from hibernate_bitmap
5193 bitmap
= &page_list
->bank_bitmap
[0];
5194 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
5195 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
5197 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
5198 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
5199 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5200 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
5203 // machine dependent adjustments
5204 hibernate_page_list_setall_machine(page_list
, page_list_wired
, &pages
);
5206 hibernate_stats
.cd_count_wire
= count_wire
;
5207 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+ count_discard_speculative
;
5209 clock_get_uptime(&end
);
5210 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5211 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
5213 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5214 pages
, count_wire
, count_active
, count_inactive
, count_speculative
, count_zf
, count_throttled
,
5215 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
);
5217 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
;
5219 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
5223 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
5225 uint64_t start
, end
, nsec
;
5229 uint32_t count_discard_active
= 0;
5230 uint32_t count_discard_inactive
= 0;
5231 uint32_t count_discard_purgeable
= 0;
5232 uint32_t count_discard_speculative
= 0;
5234 clock_get_uptime(&start
);
5236 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
5237 while (m
&& !queue_end(&vm_page_queue_zf
, (queue_entry_t
)m
))
5239 next
= (vm_page_t
) m
->pageq
.next
;
5240 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5243 count_discard_purgeable
++;
5245 count_discard_inactive
++;
5246 hibernate_discard_page(m
);
5251 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5253 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5254 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5256 next
= (vm_page_t
) m
->pageq
.next
;
5257 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5259 count_discard_speculative
++;
5260 hibernate_discard_page(m
);
5266 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5267 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5269 next
= (vm_page_t
) m
->pageq
.next
;
5270 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5273 count_discard_purgeable
++;
5275 count_discard_inactive
++;
5276 hibernate_discard_page(m
);
5281 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5282 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5284 next
= (vm_page_t
) m
->pageq
.next
;
5285 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5288 count_discard_purgeable
++;
5290 count_discard_active
++;
5291 hibernate_discard_page(m
);
5296 clock_get_uptime(&end
);
5297 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5298 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5300 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
);
5303 #endif /* HIBERNATION */
5305 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5307 #include <mach_vm_debug.h>
5310 #include <mach_debug/hash_info.h>
5311 #include <vm/vm_debug.h>
5314 * Routine: vm_page_info
5316 * Return information about the global VP table.
5317 * Fills the buffer with as much information as possible
5318 * and returns the desired size of the buffer.
5320 * Nothing locked. The caller should provide
5321 * possibly-pageable memory.
5326 hash_info_bucket_t
*info
,
5330 lck_spin_t
*bucket_lock
;
5332 if (vm_page_bucket_count
< count
)
5333 count
= vm_page_bucket_count
;
5335 for (i
= 0; i
< count
; i
++) {
5336 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
5337 unsigned int bucket_count
= 0;
5340 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
5341 lck_spin_lock(bucket_lock
);
5343 for (m
= bucket
->pages
; m
!= VM_PAGE_NULL
; m
= m
->next
)
5346 lck_spin_unlock(bucket_lock
);
5348 /* don't touch pageable memory while holding locks */
5349 info
[i
].hib_count
= bucket_count
;
5352 return vm_page_bucket_count
;
5354 #endif /* MACH_VM_DEBUG */
5356 #include <mach_kdb.h>
5359 #include <ddb/db_output.h>
5360 #include <vm/vm_print.h>
5361 #define printf kdbprintf
5364 * Routine: vm_page_print [exported]
5372 p
= (vm_page_t
) (long) db_addr
;
5374 iprintf("page 0x%x\n", p
);
5378 iprintf("object=0x%x", p
->object
);
5379 printf(", offset=0x%x", p
->offset
);
5380 printf(", wire_count=%d", p
->wire_count
);
5382 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5383 (p
->local
? "" : "!"),
5384 (p
->inactive
? "" : "!"),
5385 (p
->active
? "" : "!"),
5386 (p
->throttled
? "" : "!"),
5387 (p
->gobbled
? "" : "!"),
5388 (p
->laundry
? "" : "!"),
5389 (p
->free
? "" : "!"),
5390 (p
->reference
? "" : "!"),
5391 (p
->encrypted
? "" : "!"));
5392 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5393 (p
->busy
? "" : "!"),
5394 (p
->wanted
? "" : "!"),
5395 (p
->tabled
? "" : "!"),
5396 (p
->fictitious
? "" : "!"),
5397 (p
->private ? "" : "!"),
5398 (p
->precious
? "" : "!"));
5399 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5400 (p
->absent
? "" : "!"),
5401 (p
->error
? "" : "!"),
5402 (p
->dirty
? "" : "!"),
5403 (p
->cleaning
? "" : "!"),
5404 (p
->pageout
? "" : "!"),
5405 (p
->clustered
? "" : "!"));
5406 iprintf("%soverwriting, %srestart, %sunusual\n",
5407 (p
->overwriting
? "" : "!"),
5408 (p
->restart
? "" : "!"),
5409 (p
->unusual
? "" : "!"));
5411 iprintf("phys_page=0x%x", p
->phys_page
);
5415 #endif /* MACH_KDB */