2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
88 #include <pexpert/pexpert.h>
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
94 #include <IOKit/IOHibernatePrivate.h>
96 #include <sys/kdebug.h>
98 boolean_t hibernate_cleaning_in_progress
= FALSE
;
99 boolean_t vm_page_free_verify
= TRUE
;
101 uint32_t vm_lopage_free_count
= 0;
102 uint32_t vm_lopage_free_limit
= 0;
103 uint32_t vm_lopage_lowater
= 0;
104 boolean_t vm_lopage_refill
= FALSE
;
105 boolean_t vm_lopage_needed
= FALSE
;
107 lck_mtx_ext_t vm_page_queue_lock_ext
;
108 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
109 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
111 int speculative_age_index
= 0;
112 int speculative_steal_index
= 0;
113 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
116 __private_extern__
void vm_page_init_lck_grp(void);
118 static void vm_page_free_prepare(vm_page_t page
);
119 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
125 * Associated with page of user-allocatable memory is a
130 * These variables record the values returned by vm_page_bootstrap,
131 * for debugging purposes. The implementation of pmap_steal_memory
132 * and pmap_startup here also uses them internally.
135 vm_offset_t virtual_space_start
;
136 vm_offset_t virtual_space_end
;
137 uint32_t vm_page_pages
;
140 * The vm_page_lookup() routine, which provides for fast
141 * (virtual memory object, offset) to page lookup, employs
142 * the following hash table. The vm_page_{insert,remove}
143 * routines install and remove associations in the table.
144 * [This table is often called the virtual-to-physical,
149 #if MACH_PAGE_HASH_STATS
150 int cur_count
; /* current count */
151 int hi_count
; /* high water mark */
152 #endif /* MACH_PAGE_HASH_STATS */
156 #define BUCKETS_PER_LOCK 16
158 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
159 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
160 unsigned int vm_page_hash_mask
; /* Mask for hash function */
161 unsigned int vm_page_hash_shift
; /* Shift for hash function */
162 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
163 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
165 lck_spin_t
*vm_page_bucket_locks
;
168 #if MACH_PAGE_HASH_STATS
169 /* This routine is only for debug. It is intended to be called by
170 * hand by a developer using a kernel debugger. This routine prints
171 * out vm_page_hash table statistics to the kernel debug console.
181 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
182 if (vm_page_buckets
[i
].hi_count
) {
184 highsum
+= vm_page_buckets
[i
].hi_count
;
185 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
186 maxdepth
= vm_page_buckets
[i
].hi_count
;
189 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
190 printf("Number used buckets: %d = %d%%\n",
191 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
192 printf("Number unused buckets: %d = %d%%\n",
193 vm_page_bucket_count
- numbuckets
,
194 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
195 printf("Sum of bucket max depth: %d\n", highsum
);
196 printf("Average bucket depth: %d.%2d\n",
197 highsum
/vm_page_bucket_count
,
198 highsum%vm_page_bucket_count
);
199 printf("Maximum bucket depth: %d\n", maxdepth
);
201 #endif /* MACH_PAGE_HASH_STATS */
204 * The virtual page size is currently implemented as a runtime
205 * variable, but is constant once initialized using vm_set_page_size.
206 * This initialization must be done in the machine-dependent
207 * bootstrap sequence, before calling other machine-independent
210 * All references to the virtual page size outside this
211 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
214 vm_size_t page_size
= PAGE_SIZE
;
215 vm_size_t page_mask
= PAGE_MASK
;
216 int page_shift
= PAGE_SHIFT
;
219 * Resident page structures are initialized from
220 * a template (see vm_page_alloc).
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
224 * (see vm_page_bootstrap).
226 struct vm_page vm_page_template
;
228 vm_page_t vm_pages
= VM_PAGE_NULL
;
229 unsigned int vm_pages_count
= 0;
230 ppnum_t vm_page_lowest
= 0;
233 * Resident pages that represent real memory
234 * are allocated from a set of free lists,
237 unsigned int vm_colors
;
238 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
239 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
240 queue_head_t vm_page_queue_free
[MAX_COLORS
];
241 unsigned int vm_page_free_wanted
;
242 unsigned int vm_page_free_wanted_privileged
;
243 unsigned int vm_page_free_count
;
244 unsigned int vm_page_fictitious_count
;
246 unsigned int vm_page_free_count_minimum
; /* debugging */
249 * Occasionally, the virtual memory system uses
250 * resident page structures that do not refer to
251 * real pages, for example to leave a page with
252 * important state information in the VP table.
254 * These page structures are allocated the way
255 * most other kernel structures are.
258 vm_locks_array_t vm_page_locks
;
259 decl_lck_mtx_data(,vm_page_alloc_lock
)
260 lck_mtx_ext_t vm_page_alloc_lock_ext
;
262 unsigned int io_throttle_zero_fill
;
264 unsigned int vm_page_local_q_count
= 0;
265 unsigned int vm_page_local_q_soft_limit
= 250;
266 unsigned int vm_page_local_q_hard_limit
= 500;
267 struct vplq
*vm_page_local_q
= NULL
;
269 /* N.B. Guard and fictitious pages must not
270 * be assigned a zero phys_page value.
273 * Fictitious pages don't have a physical address,
274 * but we must initialize phys_page to something.
275 * For debugging, this should be a strange value
276 * that the pmap module can recognize in assertions.
278 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
281 * Guard pages are not accessible so they don't
282 * need a physical address, but we need to enter
284 * Let's make it recognizable and make sure that
285 * we don't use a real physical page with that
288 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
291 * Resident page structures are also chained on
292 * queues that are used by the page replacement
293 * system (pageout daemon). These queues are
294 * defined here, but are shared by the pageout
295 * module. The inactive queue is broken into
296 * inactive and zf for convenience as the
297 * pageout daemon often assignes a higher
298 * affinity to zf pages
300 queue_head_t vm_page_queue_active
;
301 queue_head_t vm_page_queue_inactive
;
302 queue_head_t vm_page_queue_anonymous
; /* inactive memory queue for anonymous pages */
303 queue_head_t vm_page_queue_throttled
;
305 unsigned int vm_page_active_count
;
306 unsigned int vm_page_inactive_count
;
307 unsigned int vm_page_anonymous_count
;
308 unsigned int vm_page_throttled_count
;
309 unsigned int vm_page_speculative_count
;
310 unsigned int vm_page_wire_count
;
311 unsigned int vm_page_wire_count_initial
;
312 unsigned int vm_page_gobble_count
= 0;
313 unsigned int vm_page_wire_count_warning
= 0;
314 unsigned int vm_page_gobble_count_warning
= 0;
316 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
317 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
318 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
320 #if DEVELOPMENT || DEBUG
321 unsigned int vm_page_speculative_recreated
= 0;
322 unsigned int vm_page_speculative_created
= 0;
323 unsigned int vm_page_speculative_used
= 0;
326 queue_head_t vm_page_queue_cleaned
;
328 unsigned int vm_page_cleaned_count
= 0;
329 unsigned int vm_pageout_enqueued_cleaned
= 0;
331 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
332 ppnum_t max_valid_low_ppnum
= 0xffffffff;
336 * Several page replacement parameters are also
337 * shared with this module, so that page allocation
338 * (done here in vm_page_alloc) can trigger the
341 unsigned int vm_page_free_target
= 0;
342 unsigned int vm_page_free_min
= 0;
343 unsigned int vm_page_throttle_limit
= 0;
344 uint32_t vm_page_creation_throttle
= 0;
345 unsigned int vm_page_inactive_target
= 0;
346 unsigned int vm_page_anonymous_min
= 0;
347 unsigned int vm_page_inactive_min
= 0;
348 unsigned int vm_page_free_reserved
= 0;
349 unsigned int vm_page_throttle_count
= 0;
353 * The VM system has a couple of heuristics for deciding
354 * that pages are "uninteresting" and should be placed
355 * on the inactive queue as likely candidates for replacement.
356 * These variables let the heuristics be controlled at run-time
357 * to make experimentation easier.
360 boolean_t vm_page_deactivate_hint
= TRUE
;
362 struct vm_page_stats_reusable vm_page_stats_reusable
;
367 * Sets the page size, perhaps based upon the memory
368 * size. Must be called before any use of page-size
369 * dependent functions.
371 * Sets page_shift and page_mask from page_size.
374 vm_set_page_size(void)
376 page_mask
= page_size
- 1;
378 if ((page_mask
& page_size
) != 0)
379 panic("vm_set_page_size: page size not a power of two");
381 for (page_shift
= 0; ; page_shift
++)
382 if ((1U << page_shift
) == page_size
)
387 /* Called once during statup, once the cache geometry is known.
390 vm_page_set_colors( void )
392 unsigned int n
, override
;
394 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
396 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
397 n
= vm_cache_geometry_colors
;
398 else n
= DEFAULT_COLORS
; /* use default if all else fails */
402 if ( n
> MAX_COLORS
)
405 /* the count must be a power of 2 */
406 if ( ( n
& (n
- 1)) != 0 )
407 panic("vm_page_set_colors");
410 vm_color_mask
= n
- 1;
414 lck_grp_t vm_page_lck_grp_free
;
415 lck_grp_t vm_page_lck_grp_queue
;
416 lck_grp_t vm_page_lck_grp_local
;
417 lck_grp_t vm_page_lck_grp_purge
;
418 lck_grp_t vm_page_lck_grp_alloc
;
419 lck_grp_t vm_page_lck_grp_bucket
;
420 lck_grp_attr_t vm_page_lck_grp_attr
;
421 lck_attr_t vm_page_lck_attr
;
424 __private_extern__
void
425 vm_page_init_lck_grp(void)
428 * initialze the vm_page lock world
430 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
431 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
432 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
433 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
434 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
435 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
436 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
437 lck_attr_setdefault(&vm_page_lck_attr
);
438 lck_mtx_init_ext(&vm_page_alloc_lock
, &vm_page_alloc_lock_ext
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
442 vm_page_init_local_q()
444 unsigned int num_cpus
;
446 struct vplq
*t_local_q
;
448 num_cpus
= ml_get_max_cpus();
451 * no point in this for a uni-processor system
454 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
456 for (i
= 0; i
< num_cpus
; i
++) {
459 lq
= &t_local_q
[i
].vpl_un
.vpl
;
460 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
461 queue_init(&lq
->vpl_queue
);
464 vm_page_local_q_count
= num_cpus
;
466 vm_page_local_q
= (struct vplq
*)t_local_q
;
474 * Initializes the resident memory module.
476 * Allocates memory for the page cells, and
477 * for the object/offset-to-page hash table headers.
478 * Each page cell is initialized and placed on the free list.
479 * Returns the range of available kernel virtual memory.
487 register vm_page_t m
;
494 * Initialize the vm_page template.
497 m
= &vm_page_template
;
498 bzero(m
, sizeof (*m
));
500 m
->pageq
.next
= NULL
;
501 m
->pageq
.prev
= NULL
;
502 m
->listq
.next
= NULL
;
503 m
->listq
.prev
= NULL
;
504 m
->next
= VM_PAGE_NULL
;
506 m
->object
= VM_OBJECT_NULL
; /* reset later */
507 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
513 m
->pageout_queue
= FALSE
;
514 m
->speculative
= FALSE
;
517 m
->reference
= FALSE
;
520 m
->throttled
= FALSE
;
521 m
->__unused_pageq_bits
= 0;
523 m
->phys_page
= 0; /* reset later */
528 m
->fictitious
= FALSE
;
537 m
->clustered
= FALSE
;
538 m
->overwriting
= FALSE
;
541 m
->encrypted
= FALSE
;
542 m
->encrypted_cleaning
= FALSE
;
543 m
->cs_validated
= FALSE
;
544 m
->cs_tainted
= FALSE
;
548 m
->was_dirty
= FALSE
;
549 m
->__unused_object_bits
= 0;
553 * Initialize the page queues.
555 vm_page_init_lck_grp();
557 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
558 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
559 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
561 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
564 purgeable_queues
[i
].token_q_head
= 0;
565 purgeable_queues
[i
].token_q_tail
= 0;
566 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
567 queue_init(&purgeable_queues
[i
].objq
[group
]);
569 purgeable_queues
[i
].type
= i
;
570 purgeable_queues
[i
].new_pages
= 0;
572 purgeable_queues
[i
].debug_count_tokens
= 0;
573 purgeable_queues
[i
].debug_count_objects
= 0;
577 for (i
= 0; i
< MAX_COLORS
; i
++ )
578 queue_init(&vm_page_queue_free
[i
]);
580 queue_init(&vm_lopage_queue_free
);
581 queue_init(&vm_page_queue_active
);
582 queue_init(&vm_page_queue_inactive
);
583 queue_init(&vm_page_queue_cleaned
);
584 queue_init(&vm_page_queue_throttled
);
585 queue_init(&vm_page_queue_anonymous
);
587 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
588 queue_init(&vm_page_queue_speculative
[i
].age_q
);
590 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
591 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
593 vm_page_free_wanted
= 0;
594 vm_page_free_wanted_privileged
= 0;
596 vm_page_set_colors();
600 * Steal memory for the map and zone subsystems.
603 vm_map_steal_memory();
606 * Allocate (and initialize) the virtual-to-physical
607 * table hash buckets.
609 * The number of buckets should be a power of two to
610 * get a good hash function. The following computation
611 * chooses the first power of two that is greater
612 * than the number of physical pages in the system.
615 if (vm_page_bucket_count
== 0) {
616 unsigned int npages
= pmap_free_pages();
618 vm_page_bucket_count
= 1;
619 while (vm_page_bucket_count
< npages
)
620 vm_page_bucket_count
<<= 1;
622 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
624 vm_page_hash_mask
= vm_page_bucket_count
- 1;
627 * Calculate object shift value for hashing algorithm:
628 * O = log2(sizeof(struct vm_object))
629 * B = log2(vm_page_bucket_count)
630 * hash shifts the object left by
633 size
= vm_page_bucket_count
;
634 for (log1
= 0; size
> 1; log1
++)
636 size
= sizeof(struct vm_object
);
637 for (log2
= 0; size
> 1; log2
++)
639 vm_page_hash_shift
= log1
/2 - log2
+ 1;
641 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
642 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
643 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
645 if (vm_page_hash_mask
& vm_page_bucket_count
)
646 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
648 vm_page_buckets
= (vm_page_bucket_t
*)
649 pmap_steal_memory(vm_page_bucket_count
*
650 sizeof(vm_page_bucket_t
));
652 vm_page_bucket_locks
= (lck_spin_t
*)
653 pmap_steal_memory(vm_page_bucket_lock_count
*
656 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
657 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
659 bucket
->pages
= VM_PAGE_NULL
;
660 #if MACH_PAGE_HASH_STATS
661 bucket
->cur_count
= 0;
662 bucket
->hi_count
= 0;
663 #endif /* MACH_PAGE_HASH_STATS */
666 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
667 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
670 * Machine-dependent code allocates the resident page table.
671 * It uses vm_page_init to initialize the page frames.
672 * The code also returns to us the virtual space available
673 * to the kernel. We don't trust the pmap module
674 * to get the alignment right.
677 pmap_startup(&virtual_space_start
, &virtual_space_end
);
678 virtual_space_start
= round_page(virtual_space_start
);
679 virtual_space_end
= trunc_page(virtual_space_end
);
681 *startp
= virtual_space_start
;
682 *endp
= virtual_space_end
;
685 * Compute the initial "wire" count.
686 * Up until now, the pages which have been set aside are not under
687 * the VM system's control, so although they aren't explicitly
688 * wired, they nonetheless can't be moved. At this moment,
689 * all VM managed pages are "free", courtesy of pmap_startup.
691 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
692 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
693 vm_page_wire_count_initial
= vm_page_wire_count
;
694 vm_page_free_count_minimum
= vm_page_free_count
;
696 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
697 vm_page_free_count
, vm_page_wire_count
);
699 simple_lock_init(&vm_paging_lock
, 0);
702 #ifndef MACHINE_PAGES
704 * We implement pmap_steal_memory and pmap_startup with the help
705 * of two simpler functions, pmap_virtual_space and pmap_next_page.
712 vm_offset_t addr
, vaddr
;
716 * We round the size to a round multiple.
719 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
722 * If this is the first call to pmap_steal_memory,
723 * we have to initialize ourself.
726 if (virtual_space_start
== virtual_space_end
) {
727 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
730 * The initial values must be aligned properly, and
731 * we don't trust the pmap module to do it right.
734 virtual_space_start
= round_page(virtual_space_start
);
735 virtual_space_end
= trunc_page(virtual_space_end
);
739 * Allocate virtual memory for this request.
742 addr
= virtual_space_start
;
743 virtual_space_start
+= size
;
745 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
748 * Allocate and map physical pages to back new virtual pages.
751 for (vaddr
= round_page(addr
);
753 vaddr
+= PAGE_SIZE
) {
755 if (!pmap_next_page_hi(&phys_page
))
756 panic("pmap_steal_memory");
759 * XXX Logically, these mappings should be wired,
760 * but some pmap modules barf if they are.
762 #if defined(__LP64__)
763 pmap_pre_expand(kernel_pmap
, vaddr
);
766 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
767 VM_PROT_READ
|VM_PROT_WRITE
, VM_PROT_NONE
,
768 VM_WIMG_USE_DEFAULT
, FALSE
);
770 * Account for newly stolen memory
772 vm_page_wire_count
++;
776 return (void *) addr
;
784 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
789 * We calculate how many page frames we will have
790 * and then allocate the page structures in one chunk.
793 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
794 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
795 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
797 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
800 * Initialize the page frames.
802 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
803 if (!pmap_next_page(&phys_page
))
805 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
806 vm_page_lowest
= phys_page
;
808 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
812 vm_pages_count
= pages_initialized
;
815 * Check if we want to initialize pages to a known value
817 fill
= 0; /* Assume no fill */
818 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
820 /* This slows down booting the DEBUG kernel, particularly on
821 * large memory systems, but is worthwhile in deterministically
822 * trapping uninitialized memory usage.
826 fillval
= 0xDEB8F177;
830 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval
);
831 // -debug code remove
832 if (2 == vm_himemory_mode
) {
833 // free low -> high so high is preferred
834 for (i
= 1; i
<= pages_initialized
; i
++) {
835 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
836 vm_page_release(&vm_pages
[i
- 1]);
840 // debug code remove-
843 * Release pages in reverse order so that physical pages
844 * initially get allocated in ascending addresses. This keeps
845 * the devices (which must address physical memory) happy if
846 * they require several consecutive pages.
848 for (i
= pages_initialized
; i
> 0; i
--) {
849 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
850 vm_page_release(&vm_pages
[i
- 1]);
855 vm_page_t xx
, xxo
, xxl
;
858 j
= 0; /* (BRINGUP) */
861 for( i
= 0; i
< vm_colors
; i
++ ) {
862 queue_iterate(&vm_page_queue_free
[i
],
865 pageq
) { /* BRINGUP */
867 if(j
> vm_page_free_count
) { /* (BRINGUP) */
868 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
871 l
= vm_page_free_count
- j
; /* (BRINGUP) */
872 k
= 0; /* (BRINGUP) */
874 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
876 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
878 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
879 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
880 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
888 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
889 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
896 * We have to re-align virtual_space_start,
897 * because pmap_steal_memory has been using it.
900 virtual_space_start
= round_page(virtual_space_start
);
902 *startp
= virtual_space_start
;
903 *endp
= virtual_space_end
;
905 #endif /* MACHINE_PAGES */
908 * Routine: vm_page_module_init
910 * Second initialization pass, to be done after
911 * the basic VM system is ready.
914 vm_page_module_init(void)
916 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
917 0, PAGE_SIZE
, "vm pages");
920 zone_debug_disable(vm_page_zone
);
921 #endif /* ZONE_DEBUG */
923 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
924 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
925 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
926 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
927 zone_change(vm_page_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
929 * Adjust zone statistics to account for the real pages allocated
930 * in vm_page_create(). [Q: is this really what we want?]
932 vm_page_zone
->count
+= vm_page_pages
;
933 vm_page_zone
->sum_count
+= vm_page_pages
;
934 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
938 * Routine: vm_page_create
940 * After the VM system is up, machine-dependent code
941 * may stumble across more physical memory. For example,
942 * memory that it was reserving for a frame buffer.
943 * vm_page_create turns this memory into available pages.
954 for (phys_page
= start
;
957 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
959 vm_page_more_fictitious();
961 m
->fictitious
= FALSE
;
962 pmap_clear_noencrypt(phys_page
);
972 * Distributes the object/offset key pair among hash buckets.
974 * NOTE: The bucket count must be a power of 2
976 #define vm_page_hash(object, offset) (\
977 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
982 * vm_page_insert: [ internal use only ]
984 * Inserts the given mem entry into the object/object-page
985 * table and object list.
987 * The object must be locked.
993 vm_object_offset_t offset
)
995 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
, FALSE
);
999 vm_page_insert_internal(
1002 vm_object_offset_t offset
,
1003 boolean_t queues_lock_held
,
1004 boolean_t insert_in_hash
,
1005 boolean_t batch_pmap_op
)
1007 vm_page_bucket_t
*bucket
;
1008 lck_spin_t
*bucket_lock
;
1012 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1013 object
, offset
, mem
, 0,0);
1016 * we may not hold the page queue lock
1017 * so this check isn't safe to make
1022 if (object
== vm_submap_object
) {
1023 /* the vm_submap_object is only a placeholder for submaps */
1024 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset
);
1027 vm_object_lock_assert_exclusive(object
);
1029 lck_mtx_assert(&vm_page_queue_lock
,
1030 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1031 : LCK_MTX_ASSERT_NOTOWNED
);
1034 if (insert_in_hash
== TRUE
) {
1036 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1037 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1038 "already in (obj=%p,off=0x%llx)",
1039 mem
, object
, offset
, mem
->object
, mem
->offset
);
1041 assert(!object
->internal
|| offset
< object
->vo_size
);
1043 /* only insert "pageout" pages into "pageout" objects,
1044 * and normal pages into normal objects */
1045 assert(object
->pageout
== mem
->pageout
);
1047 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1050 * Record the object/offset pair in this page
1053 mem
->object
= object
;
1054 mem
->offset
= offset
;
1057 * Insert it into the object_object/offset hash table
1059 hash_id
= vm_page_hash(object
, offset
);
1060 bucket
= &vm_page_buckets
[hash_id
];
1061 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1063 lck_spin_lock(bucket_lock
);
1065 mem
->next
= bucket
->pages
;
1066 bucket
->pages
= mem
;
1067 #if MACH_PAGE_HASH_STATS
1068 if (++bucket
->cur_count
> bucket
->hi_count
)
1069 bucket
->hi_count
= bucket
->cur_count
;
1070 #endif /* MACH_PAGE_HASH_STATS */
1072 lck_spin_unlock(bucket_lock
);
1076 unsigned int cache_attr
;
1078 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1080 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1081 PMAP_SET_CACHE_ATTR(mem
, object
, cache_attr
, batch_pmap_op
);
1085 * Now link into the object's list of backed pages.
1088 VM_PAGE_INSERT(mem
, object
);
1092 * Show that the object has one more resident page.
1095 object
->resident_page_count
++;
1096 if (VM_PAGE_WIRED(mem
)) {
1097 object
->wired_page_count
++;
1099 assert(object
->resident_page_count
>= object
->wired_page_count
);
1101 assert(!mem
->reusable
);
1103 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1104 if (VM_PAGE_WIRED(mem
)) {
1105 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
1107 OSAddAtomic(1, &vm_page_purgeable_count
);
1109 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1112 * This page belongs to a purged VM object but hasn't
1113 * been purged (because it was "busy").
1114 * It's in the "throttled" queue and hence not
1115 * visible to vm_pageout_scan(). Move it to a pageable
1116 * queue, so that it can eventually be reclaimed, instead
1117 * of lingering in the "empty" object.
1119 if (queues_lock_held
== FALSE
)
1120 vm_page_lockspin_queues();
1121 vm_page_deactivate(mem
);
1122 if (queues_lock_held
== FALSE
)
1123 vm_page_unlock_queues();
1130 * Exactly like vm_page_insert, except that we first
1131 * remove any existing page at the given offset in object.
1133 * The object must be locked.
1137 register vm_page_t mem
,
1138 register vm_object_t object
,
1139 register vm_object_offset_t offset
)
1141 vm_page_bucket_t
*bucket
;
1142 vm_page_t found_m
= VM_PAGE_NULL
;
1143 lck_spin_t
*bucket_lock
;
1148 * we don't hold the page queue lock
1149 * so this check isn't safe to make
1153 vm_object_lock_assert_exclusive(object
);
1155 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1156 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1157 "already in (obj=%p,off=0x%llx)",
1158 mem
, object
, offset
, mem
->object
, mem
->offset
);
1159 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1162 * Record the object/offset pair in this page
1165 mem
->object
= object
;
1166 mem
->offset
= offset
;
1169 * Insert it into the object_object/offset hash table,
1170 * replacing any page that might have been there.
1173 hash_id
= vm_page_hash(object
, offset
);
1174 bucket
= &vm_page_buckets
[hash_id
];
1175 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1177 lck_spin_lock(bucket_lock
);
1179 if (bucket
->pages
) {
1180 vm_page_t
*mp
= &bucket
->pages
;
1184 if (m
->object
== object
&& m
->offset
== offset
) {
1186 * Remove old page from hash list
1194 } while ((m
= *mp
));
1196 mem
->next
= bucket
->pages
;
1198 mem
->next
= VM_PAGE_NULL
;
1201 * insert new page at head of hash list
1203 bucket
->pages
= mem
;
1205 lck_spin_unlock(bucket_lock
);
1209 * there was already a page at the specified
1210 * offset for this object... remove it from
1211 * the object and free it back to the free list
1213 vm_page_free_unlocked(found_m
, FALSE
);
1215 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
, FALSE
);
1219 * vm_page_remove: [ internal use only ]
1221 * Removes the given mem entry from the object/offset-page
1222 * table and the object page list.
1224 * The object must be locked.
1230 boolean_t remove_from_hash
)
1232 vm_page_bucket_t
*bucket
;
1234 lck_spin_t
*bucket_lock
;
1238 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1239 mem
->object
, mem
->offset
,
1242 vm_object_lock_assert_exclusive(mem
->object
);
1243 assert(mem
->tabled
);
1244 assert(!mem
->cleaning
);
1245 assert(!mem
->laundry
);
1248 * we don't hold the page queue lock
1249 * so this check isn't safe to make
1253 if (remove_from_hash
== TRUE
) {
1255 * Remove from the object_object/offset hash table
1257 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1258 bucket
= &vm_page_buckets
[hash_id
];
1259 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1261 lck_spin_lock(bucket_lock
);
1263 if ((this = bucket
->pages
) == mem
) {
1264 /* optimize for common case */
1266 bucket
->pages
= mem
->next
;
1270 for (prev
= &this->next
;
1271 (this = *prev
) != mem
;
1276 #if MACH_PAGE_HASH_STATS
1277 bucket
->cur_count
--;
1278 #endif /* MACH_PAGE_HASH_STATS */
1280 lck_spin_unlock(bucket_lock
);
1283 * Now remove from the object's list of backed pages.
1286 VM_PAGE_REMOVE(mem
);
1289 * And show that the object has one fewer resident
1293 assert(mem
->object
->resident_page_count
> 0);
1294 mem
->object
->resident_page_count
--;
1296 if (!mem
->object
->internal
&& (mem
->object
->objq
.next
|| mem
->object
->objq
.prev
)) {
1297 if (mem
->object
->resident_page_count
== 0)
1298 vm_object_cache_remove(mem
->object
);
1301 if (VM_PAGE_WIRED(mem
)) {
1302 assert(mem
->object
->wired_page_count
> 0);
1303 mem
->object
->wired_page_count
--;
1305 assert(mem
->object
->resident_page_count
>=
1306 mem
->object
->wired_page_count
);
1307 if (mem
->reusable
) {
1308 assert(mem
->object
->reusable_page_count
> 0);
1309 mem
->object
->reusable_page_count
--;
1310 assert(mem
->object
->reusable_page_count
<=
1311 mem
->object
->resident_page_count
);
1312 mem
->reusable
= FALSE
;
1313 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1314 vm_page_stats_reusable
.reused_remove
++;
1315 } else if (mem
->object
->all_reusable
) {
1316 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1317 vm_page_stats_reusable
.reused_remove
++;
1320 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1321 if (VM_PAGE_WIRED(mem
)) {
1322 assert(vm_page_purgeable_wired_count
> 0);
1323 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1325 assert(vm_page_purgeable_count
> 0);
1326 OSAddAtomic(-1, &vm_page_purgeable_count
);
1329 if (mem
->object
->set_cache_attr
== TRUE
)
1330 pmap_set_cache_attributes(mem
->phys_page
, 0);
1332 mem
->tabled
= FALSE
;
1333 mem
->object
= VM_OBJECT_NULL
;
1334 mem
->offset
= (vm_object_offset_t
) -1;
1341 * Returns the page associated with the object/offset
1342 * pair specified; if none is found, VM_PAGE_NULL is returned.
1344 * The object must be locked. No side effects.
1347 unsigned long vm_page_lookup_hint
= 0;
1348 unsigned long vm_page_lookup_hint_next
= 0;
1349 unsigned long vm_page_lookup_hint_prev
= 0;
1350 unsigned long vm_page_lookup_hint_miss
= 0;
1351 unsigned long vm_page_lookup_bucket_NULL
= 0;
1352 unsigned long vm_page_lookup_miss
= 0;
1358 vm_object_offset_t offset
)
1361 vm_page_bucket_t
*bucket
;
1363 lck_spin_t
*bucket_lock
;
1366 vm_object_lock_assert_held(object
);
1367 mem
= object
->memq_hint
;
1369 if (mem
!= VM_PAGE_NULL
) {
1370 assert(mem
->object
== object
);
1372 if (mem
->offset
== offset
) {
1373 vm_page_lookup_hint
++;
1376 qe
= queue_next(&mem
->listq
);
1378 if (! queue_end(&object
->memq
, qe
)) {
1379 vm_page_t next_page
;
1381 next_page
= (vm_page_t
) qe
;
1382 assert(next_page
->object
== object
);
1384 if (next_page
->offset
== offset
) {
1385 vm_page_lookup_hint_next
++;
1386 object
->memq_hint
= next_page
; /* new hint */
1390 qe
= queue_prev(&mem
->listq
);
1392 if (! queue_end(&object
->memq
, qe
)) {
1393 vm_page_t prev_page
;
1395 prev_page
= (vm_page_t
) qe
;
1396 assert(prev_page
->object
== object
);
1398 if (prev_page
->offset
== offset
) {
1399 vm_page_lookup_hint_prev
++;
1400 object
->memq_hint
= prev_page
; /* new hint */
1406 * Search the hash table for this object/offset pair
1408 hash_id
= vm_page_hash(object
, offset
);
1409 bucket
= &vm_page_buckets
[hash_id
];
1412 * since we hold the object lock, we are guaranteed that no
1413 * new pages can be inserted into this object... this in turn
1414 * guarantess that the page we're looking for can't exist
1415 * if the bucket it hashes to is currently NULL even when looked
1416 * at outside the scope of the hash bucket lock... this is a
1417 * really cheap optimiztion to avoid taking the lock
1419 if (bucket
->pages
== VM_PAGE_NULL
) {
1420 vm_page_lookup_bucket_NULL
++;
1422 return (VM_PAGE_NULL
);
1424 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1426 lck_spin_lock(bucket_lock
);
1428 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
1431 * we don't hold the page queue lock
1432 * so this check isn't safe to make
1436 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1439 lck_spin_unlock(bucket_lock
);
1441 if (mem
!= VM_PAGE_NULL
) {
1442 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1443 vm_page_lookup_hint_miss
++;
1445 assert(mem
->object
== object
);
1446 object
->memq_hint
= mem
;
1448 vm_page_lookup_miss
++;
1457 * Move the given memory entry from its
1458 * current object to the specified target object/offset.
1460 * The object must be locked.
1464 register vm_page_t mem
,
1465 register vm_object_t new_object
,
1466 vm_object_offset_t new_offset
,
1467 boolean_t encrypted_ok
)
1469 assert(mem
->object
!= new_object
);
1473 * The encryption key is based on the page's memory object
1474 * (aka "pager") and paging offset. Moving the page to
1475 * another VM object changes its "pager" and "paging_offset"
1476 * so it has to be decrypted first, or we would lose the key.
1478 * One exception is VM object collapsing, where we transfer pages
1479 * from one backing object to its parent object. This operation also
1480 * transfers the paging information, so the <pager,paging_offset> info
1481 * should remain consistent. The caller (vm_object_do_collapse())
1482 * sets "encrypted_ok" in this case.
1484 if (!encrypted_ok
&& mem
->encrypted
) {
1485 panic("vm_page_rename: page %p is encrypted\n", mem
);
1489 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1490 new_object
, new_offset
,
1494 * Changes to mem->object require the page lock because
1495 * the pageout daemon uses that lock to get the object.
1497 vm_page_lockspin_queues();
1499 vm_page_remove(mem
, TRUE
);
1500 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
, FALSE
);
1502 vm_page_unlock_queues();
1508 * Initialize the fields in a new page.
1509 * This takes a structure with random values and initializes it
1510 * so that it can be given to vm_page_release or vm_page_insert.
1521 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
1522 if (!(pmap_valid_page(phys_page
))) {
1523 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
1527 *mem
= vm_page_template
;
1528 mem
->phys_page
= phys_page
;
1531 * we're leaving this turned off for now... currently pages
1532 * come off the free list and are either immediately dirtied/referenced
1533 * due to zero-fill or COW faults, or are used to read or write files...
1534 * in the file I/O case, the UPL mechanism takes care of clearing
1535 * the state of the HW ref/mod bits in a somewhat fragile way.
1536 * Since we may change the way this works in the future (to toughen it up),
1537 * I'm leaving this as a reminder of where these bits could get cleared
1541 * make sure both the h/w referenced and modified bits are
1542 * clear at this point... we are especially dependent on
1543 * not finding a 'stale' h/w modified in a number of spots
1544 * once this page goes back into use
1546 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
1548 mem
->lopage
= lopage
;
1552 * vm_page_grab_fictitious:
1554 * Remove a fictitious page from the free list.
1555 * Returns VM_PAGE_NULL if there are no free pages.
1557 int c_vm_page_grab_fictitious
= 0;
1558 int c_vm_page_grab_fictitious_failed
= 0;
1559 int c_vm_page_release_fictitious
= 0;
1560 int c_vm_page_more_fictitious
= 0;
1563 vm_page_grab_fictitious_common(
1568 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
1570 vm_page_init(m
, phys_addr
, FALSE
);
1571 m
->fictitious
= TRUE
;
1573 c_vm_page_grab_fictitious
++;
1575 c_vm_page_grab_fictitious_failed
++;
1581 vm_page_grab_fictitious(void)
1583 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1587 vm_page_grab_guard(void)
1589 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1594 * vm_page_release_fictitious:
1596 * Release a fictitious page to the zone pool
1599 vm_page_release_fictitious(
1603 assert(m
->fictitious
);
1604 assert(m
->phys_page
== vm_page_fictitious_addr
||
1605 m
->phys_page
== vm_page_guard_addr
);
1607 c_vm_page_release_fictitious
++;
1609 zfree(vm_page_zone
, m
);
1613 * vm_page_more_fictitious:
1615 * Add more fictitious pages to the zone.
1616 * Allowed to block. This routine is way intimate
1617 * with the zones code, for several reasons:
1618 * 1. we need to carve some page structures out of physical
1619 * memory before zones work, so they _cannot_ come from
1621 * 2. the zone needs to be collectable in order to prevent
1622 * growth without bound. These structures are used by
1623 * the device pager (by the hundreds and thousands), as
1624 * private pages for pageout, and as blocking pages for
1625 * pagein. Temporary bursts in demand should not result in
1626 * permanent allocation of a resource.
1627 * 3. To smooth allocation humps, we allocate single pages
1628 * with kernel_memory_allocate(), and cram them into the
1632 void vm_page_more_fictitious(void)
1635 kern_return_t retval
;
1637 c_vm_page_more_fictitious
++;
1640 * Allocate a single page from the zone_map. Do not wait if no physical
1641 * pages are immediately available, and do not zero the space. We need
1642 * our own blocking lock here to prevent having multiple,
1643 * simultaneous requests from piling up on the zone_map lock. Exactly
1644 * one (of our) threads should be potentially waiting on the map lock.
1645 * If winner is not vm-privileged, then the page allocation will fail,
1646 * and it will temporarily block here in the vm_page_wait().
1648 lck_mtx_lock(&vm_page_alloc_lock
);
1650 * If another thread allocated space, just bail out now.
1652 if (zone_free_count(vm_page_zone
) > 5) {
1654 * The number "5" is a small number that is larger than the
1655 * number of fictitious pages that any single caller will
1656 * attempt to allocate. Otherwise, a thread will attempt to
1657 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1658 * release all of the resources and locks already acquired,
1659 * and then call this routine. This routine finds the pages
1660 * that the caller released, so fails to allocate new space.
1661 * The process repeats infinitely. The largest known number
1662 * of fictitious pages required in this manner is 2. 5 is
1663 * simply a somewhat larger number.
1665 lck_mtx_unlock(&vm_page_alloc_lock
);
1669 retval
= kernel_memory_allocate(zone_map
,
1670 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1671 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1672 if (retval
!= KERN_SUCCESS
) {
1674 * No page was available. Drop the
1675 * lock to give another thread a chance at it, and
1676 * wait for the pageout daemon to make progress.
1678 lck_mtx_unlock(&vm_page_alloc_lock
);
1679 vm_page_wait(THREAD_UNINT
);
1682 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
1684 lck_mtx_unlock(&vm_page_alloc_lock
);
1691 * Return true if it is not likely that a non-vm_privileged thread
1692 * can get memory without blocking. Advisory only, since the
1693 * situation may change under us.
1698 /* No locking, at worst we will fib. */
1699 return( vm_page_free_count
<= vm_page_free_reserved
);
1705 * this is an interface to support bring-up of drivers
1706 * on platforms with physical memory > 4G...
1708 int vm_himemory_mode
= 0;
1712 * this interface exists to support hardware controllers
1713 * incapable of generating DMAs with more than 32 bits
1714 * of address on platforms with physical memory > 4G...
1716 unsigned int vm_lopages_allocated_q
= 0;
1717 unsigned int vm_lopages_allocated_cpm_success
= 0;
1718 unsigned int vm_lopages_allocated_cpm_failed
= 0;
1719 queue_head_t vm_lopage_queue_free
;
1722 vm_page_grablo(void)
1726 if (vm_lopage_needed
== FALSE
)
1727 return (vm_page_grab());
1729 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1731 if ( !queue_empty(&vm_lopage_queue_free
)) {
1732 queue_remove_first(&vm_lopage_queue_free
,
1736 assert(vm_lopage_free_count
);
1738 vm_lopage_free_count
--;
1739 vm_lopages_allocated_q
++;
1741 if (vm_lopage_free_count
< vm_lopage_lowater
)
1742 vm_lopage_refill
= TRUE
;
1744 lck_mtx_unlock(&vm_page_queue_free_lock
);
1746 lck_mtx_unlock(&vm_page_queue_free_lock
);
1748 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
1750 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1751 vm_lopages_allocated_cpm_failed
++;
1752 lck_mtx_unlock(&vm_page_queue_free_lock
);
1754 return (VM_PAGE_NULL
);
1758 vm_page_lockspin_queues();
1760 mem
->gobbled
= FALSE
;
1761 vm_page_gobble_count
--;
1762 vm_page_wire_count
--;
1764 vm_lopages_allocated_cpm_success
++;
1765 vm_page_unlock_queues();
1769 assert(!mem
->pmapped
);
1770 assert(!mem
->wpmapped
);
1771 assert(!pmap_is_noencrypt(mem
->phys_page
));
1773 mem
->pageq
.next
= NULL
;
1774 mem
->pageq
.prev
= NULL
;
1783 * first try to grab a page from the per-cpu free list...
1784 * this must be done while pre-emption is disabled... if
1785 * a page is available, we're done...
1786 * if no page is available, grab the vm_page_queue_free_lock
1787 * and see if current number of free pages would allow us
1788 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1789 * if there are pages available, disable preemption and
1790 * recheck the state of the per-cpu free list... we could
1791 * have been preempted and moved to a different cpu, or
1792 * some other thread could have re-filled it... if still
1793 * empty, figure out how many pages we can steal from the
1794 * global free queue and move to the per-cpu queue...
1795 * return 1 of these pages when done... only wakeup the
1796 * pageout_scan thread if we moved pages from the global
1797 * list... no need for the wakeup if we've satisfied the
1798 * request from the per-cpu queue.
1801 #define COLOR_GROUPS_TO_STEAL 4
1805 vm_page_grab( void )
1810 disable_preemption();
1812 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1813 return_page_from_cpu_list
:
1814 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1815 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
1816 mem
->pageq
.next
= NULL
;
1818 enable_preemption();
1820 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1821 assert(mem
->tabled
== FALSE
);
1822 assert(mem
->object
== VM_OBJECT_NULL
);
1823 assert(!mem
->laundry
);
1825 assert(pmap_verify_free(mem
->phys_page
));
1827 assert(!mem
->encrypted
);
1828 assert(!mem
->pmapped
);
1829 assert(!mem
->wpmapped
);
1830 assert(!mem
->active
);
1831 assert(!mem
->inactive
);
1832 assert(!mem
->throttled
);
1833 assert(!mem
->speculative
);
1834 assert(!pmap_is_noencrypt(mem
->phys_page
));
1838 enable_preemption();
1842 * Optionally produce warnings if the wire or gobble
1843 * counts exceed some threshold.
1845 if (vm_page_wire_count_warning
> 0
1846 && vm_page_wire_count
>= vm_page_wire_count_warning
) {
1847 printf("mk: vm_page_grab(): high wired page count of %d\n",
1848 vm_page_wire_count
);
1849 assert(vm_page_wire_count
< vm_page_wire_count_warning
);
1851 if (vm_page_gobble_count_warning
> 0
1852 && vm_page_gobble_count
>= vm_page_gobble_count_warning
) {
1853 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1854 vm_page_gobble_count
);
1855 assert(vm_page_gobble_count
< vm_page_gobble_count_warning
);
1858 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1861 * Only let privileged threads (involved in pageout)
1862 * dip into the reserved pool.
1864 if ((vm_page_free_count
< vm_page_free_reserved
) &&
1865 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
1866 lck_mtx_unlock(&vm_page_queue_free_lock
);
1872 unsigned int pages_to_steal
;
1875 while ( vm_page_free_count
== 0 ) {
1877 lck_mtx_unlock(&vm_page_queue_free_lock
);
1879 * must be a privileged thread to be
1880 * in this state since a non-privileged
1881 * thread would have bailed if we were
1882 * under the vm_page_free_reserved mark
1885 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1888 disable_preemption();
1890 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1891 lck_mtx_unlock(&vm_page_queue_free_lock
);
1894 * we got preempted and moved to another processor
1895 * or we got preempted and someone else ran and filled the cache
1897 goto return_page_from_cpu_list
;
1899 if (vm_page_free_count
<= vm_page_free_reserved
)
1902 pages_to_steal
= COLOR_GROUPS_TO_STEAL
* vm_colors
;
1904 if (pages_to_steal
> (vm_page_free_count
- vm_page_free_reserved
))
1905 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
1907 color
= PROCESSOR_DATA(current_processor(), start_color
);
1910 while (pages_to_steal
--) {
1911 if (--vm_page_free_count
< vm_page_free_count_minimum
)
1912 vm_page_free_count_minimum
= vm_page_free_count
;
1914 while (queue_empty(&vm_page_queue_free
[color
]))
1915 color
= (color
+ 1) & vm_color_mask
;
1917 queue_remove_first(&vm_page_queue_free
[color
],
1921 mem
->pageq
.next
= NULL
;
1922 mem
->pageq
.prev
= NULL
;
1924 assert(!mem
->active
);
1925 assert(!mem
->inactive
);
1926 assert(!mem
->throttled
);
1927 assert(!mem
->speculative
);
1929 color
= (color
+ 1) & vm_color_mask
;
1934 tail
->pageq
.next
= (queue_t
)mem
;
1937 mem
->pageq
.prev
= NULL
;
1938 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1939 assert(mem
->tabled
== FALSE
);
1940 assert(mem
->object
== VM_OBJECT_NULL
);
1941 assert(!mem
->laundry
);
1945 assert(pmap_verify_free(mem
->phys_page
));
1948 assert(!mem
->encrypted
);
1949 assert(!mem
->pmapped
);
1950 assert(!mem
->wpmapped
);
1951 assert(!pmap_is_noencrypt(mem
->phys_page
));
1953 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
1954 PROCESSOR_DATA(current_processor(), start_color
) = color
;
1957 * satisfy this request
1959 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1961 mem
->pageq
.next
= NULL
;
1963 lck_mtx_unlock(&vm_page_queue_free_lock
);
1965 enable_preemption();
1968 * Decide if we should poke the pageout daemon.
1969 * We do this if the free count is less than the low
1970 * water mark, or if the free count is less than the high
1971 * water mark (but above the low water mark) and the inactive
1972 * count is less than its target.
1974 * We don't have the counts locked ... if they change a little,
1975 * it doesn't really matter.
1977 if ((vm_page_free_count
< vm_page_free_min
) ||
1978 ((vm_page_free_count
< vm_page_free_target
) &&
1979 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
1980 thread_wakeup((event_t
) &vm_page_free_wanted
);
1982 VM_CHECK_MEMORYSTATUS
;
1984 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1992 * Return a page to the free list.
1997 register vm_page_t mem
)
2000 int need_wakeup
= 0;
2001 int need_priv_wakeup
= 0;
2004 assert(!mem
->private && !mem
->fictitious
);
2005 if (vm_page_free_verify
) {
2006 assert(pmap_verify_free(mem
->phys_page
));
2008 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2010 pmap_clear_noencrypt(mem
->phys_page
);
2012 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2015 panic("vm_page_release");
2019 assert(!mem
->laundry
);
2020 assert(mem
->object
== VM_OBJECT_NULL
);
2021 assert(mem
->pageq
.next
== NULL
&&
2022 mem
->pageq
.prev
== NULL
);
2023 assert(mem
->listq
.next
== NULL
&&
2024 mem
->listq
.prev
== NULL
);
2026 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2027 vm_lopage_free_count
< vm_lopage_free_limit
&&
2028 mem
->phys_page
< max_valid_low_ppnum
) {
2030 * this exists to support hardware controllers
2031 * incapable of generating DMAs with more than 32 bits
2032 * of address on platforms with physical memory > 4G...
2034 queue_enter_first(&vm_lopage_queue_free
,
2038 vm_lopage_free_count
++;
2040 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
2041 vm_lopage_refill
= FALSE
;
2045 mem
->lopage
= FALSE
;
2048 color
= mem
->phys_page
& vm_color_mask
;
2049 queue_enter_first(&vm_page_queue_free
[color
],
2053 vm_page_free_count
++;
2055 * Check if we should wake up someone waiting for page.
2056 * But don't bother waking them unless they can allocate.
2058 * We wakeup only one thread, to prevent starvation.
2059 * Because the scheduling system handles wait queues FIFO,
2060 * if we wakeup all waiting threads, one greedy thread
2061 * can starve multiple niceguy threads. When the threads
2062 * all wakeup, the greedy threads runs first, grabs the page,
2063 * and waits for another page. It will be the first to run
2064 * when the next page is freed.
2066 * However, there is a slight danger here.
2067 * The thread we wake might not use the free page.
2068 * Then the other threads could wait indefinitely
2069 * while the page goes unused. To forestall this,
2070 * the pageout daemon will keep making free pages
2071 * as long as vm_page_free_wanted is non-zero.
2074 assert(vm_page_free_count
> 0);
2075 if (vm_page_free_wanted_privileged
> 0) {
2076 vm_page_free_wanted_privileged
--;
2077 need_priv_wakeup
= 1;
2078 } else if (vm_page_free_wanted
> 0 &&
2079 vm_page_free_count
> vm_page_free_reserved
) {
2080 vm_page_free_wanted
--;
2084 lck_mtx_unlock(&vm_page_queue_free_lock
);
2086 if (need_priv_wakeup
)
2087 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2088 else if (need_wakeup
)
2089 thread_wakeup_one((event_t
) &vm_page_free_count
);
2091 VM_CHECK_MEMORYSTATUS
;
2097 * Wait for a page to become available.
2098 * If there are plenty of free pages, then we don't sleep.
2101 * TRUE: There may be another page, try again
2102 * FALSE: We were interrupted out of our wait, don't try again
2110 * We can't use vm_page_free_reserved to make this
2111 * determination. Consider: some thread might
2112 * need to allocate two pages. The first allocation
2113 * succeeds, the second fails. After the first page is freed,
2114 * a call to vm_page_wait must really block.
2116 kern_return_t wait_result
;
2117 int need_wakeup
= 0;
2118 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2120 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2122 if (is_privileged
&& vm_page_free_count
) {
2123 lck_mtx_unlock(&vm_page_queue_free_lock
);
2126 if (vm_page_free_count
< vm_page_free_target
) {
2128 if (is_privileged
) {
2129 if (vm_page_free_wanted_privileged
++ == 0)
2131 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2133 if (vm_page_free_wanted
++ == 0)
2135 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2137 lck_mtx_unlock(&vm_page_queue_free_lock
);
2138 counter(c_vm_page_wait_block
++);
2141 thread_wakeup((event_t
)&vm_page_free_wanted
);
2143 if (wait_result
== THREAD_WAITING
)
2144 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2146 return(wait_result
== THREAD_AWAKENED
);
2148 lck_mtx_unlock(&vm_page_queue_free_lock
);
2156 * Allocate and return a memory cell associated
2157 * with this VM object/offset pair.
2159 * Object must be locked.
2165 vm_object_offset_t offset
)
2167 register vm_page_t mem
;
2169 vm_object_lock_assert_exclusive(object
);
2170 mem
= vm_page_grab();
2171 if (mem
== VM_PAGE_NULL
)
2172 return VM_PAGE_NULL
;
2174 vm_page_insert(mem
, object
, offset
);
2182 vm_object_offset_t offset
)
2184 register vm_page_t mem
;
2186 vm_object_lock_assert_exclusive(object
);
2187 mem
= vm_page_grablo();
2188 if (mem
== VM_PAGE_NULL
)
2189 return VM_PAGE_NULL
;
2191 vm_page_insert(mem
, object
, offset
);
2198 * vm_page_alloc_guard:
2200 * Allocate a fictitious page which will be used
2201 * as a guard page. The page will be inserted into
2202 * the object and returned to the caller.
2206 vm_page_alloc_guard(
2208 vm_object_offset_t offset
)
2210 register vm_page_t mem
;
2212 vm_object_lock_assert_exclusive(object
);
2213 mem
= vm_page_grab_guard();
2214 if (mem
== VM_PAGE_NULL
)
2215 return VM_PAGE_NULL
;
2217 vm_page_insert(mem
, object
, offset
);
2223 counter(unsigned int c_laundry_pages_freed
= 0;)
2226 * vm_page_free_prepare:
2228 * Removes page from any queue it may be on
2229 * and disassociates it from its VM object.
2231 * Object and page queues must be locked prior to entry.
2234 vm_page_free_prepare(
2237 vm_page_free_prepare_queues(mem
);
2238 vm_page_free_prepare_object(mem
, TRUE
);
2243 vm_page_free_prepare_queues(
2248 assert(!mem
->cleaning
);
2250 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2252 panic("vm_page_free: freeing page on free list\n");
2255 vm_object_lock_assert_exclusive(mem
->object
);
2259 * We may have to free a page while it's being laundered
2260 * if we lost its pager (due to a forced unmount, for example).
2261 * We need to call vm_pageout_steal_laundry() before removing
2262 * the page from its VM object, so that we can remove it
2263 * from its pageout queue and adjust the laundry accounting
2265 vm_pageout_steal_laundry(mem
, TRUE
);
2266 counter(++c_laundry_pages_freed
);
2269 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2271 if (VM_PAGE_WIRED(mem
)) {
2273 assert(mem
->object
->wired_page_count
> 0);
2274 mem
->object
->wired_page_count
--;
2275 assert(mem
->object
->resident_page_count
>=
2276 mem
->object
->wired_page_count
);
2278 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2279 OSAddAtomic(+1, &vm_page_purgeable_count
);
2280 assert(vm_page_purgeable_wired_count
> 0);
2281 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2284 if (!mem
->private && !mem
->fictitious
)
2285 vm_page_wire_count
--;
2286 mem
->wire_count
= 0;
2287 assert(!mem
->gobbled
);
2288 } else if (mem
->gobbled
) {
2289 if (!mem
->private && !mem
->fictitious
)
2290 vm_page_wire_count
--;
2291 vm_page_gobble_count
--;
2297 vm_page_free_prepare_object(
2299 boolean_t remove_from_hash
)
2302 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2304 PAGE_WAKEUP(mem
); /* clears wanted */
2307 mem
->private = FALSE
;
2308 mem
->fictitious
= TRUE
;
2309 mem
->phys_page
= vm_page_fictitious_addr
;
2311 if ( !mem
->fictitious
) {
2312 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2320 * Returns the given page to the free list,
2321 * disassociating it with any VM object.
2323 * Object and page queues must be locked prior to entry.
2329 vm_page_free_prepare(mem
);
2331 if (mem
->fictitious
) {
2332 vm_page_release_fictitious(mem
);
2334 vm_page_release(mem
);
2340 vm_page_free_unlocked(
2342 boolean_t remove_from_hash
)
2344 vm_page_lockspin_queues();
2345 vm_page_free_prepare_queues(mem
);
2346 vm_page_unlock_queues();
2348 vm_page_free_prepare_object(mem
, remove_from_hash
);
2350 if (mem
->fictitious
) {
2351 vm_page_release_fictitious(mem
);
2353 vm_page_release(mem
);
2359 * Free a list of pages. The list can be up to several hundred pages,
2360 * as blocked up by vm_pageout_scan().
2361 * The big win is not having to take the free list lock once
2367 boolean_t prepare_object
)
2371 vm_page_t local_freeq
;
2377 local_freeq
= VM_PAGE_NULL
;
2381 * break up the processing into smaller chunks so
2382 * that we can 'pipeline' the pages onto the
2383 * free list w/o introducing too much
2384 * contention on the global free queue lock
2386 while (mem
&& pg_count
< 64) {
2388 assert(!mem
->inactive
);
2389 assert(!mem
->active
);
2390 assert(!mem
->throttled
);
2392 assert(!mem
->speculative
);
2393 assert(!VM_PAGE_WIRED(mem
));
2394 assert(mem
->pageq
.prev
== NULL
);
2396 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2398 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2399 assert(pmap_verify_free(mem
->phys_page
));
2401 if (prepare_object
== TRUE
)
2402 vm_page_free_prepare_object(mem
, TRUE
);
2404 if (!mem
->fictitious
) {
2407 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2408 vm_lopage_free_count
< vm_lopage_free_limit
&&
2409 mem
->phys_page
< max_valid_low_ppnum
) {
2410 mem
->pageq
.next
= NULL
;
2411 vm_page_release(mem
);
2414 * IMPORTANT: we can't set the page "free" here
2415 * because that would make the page eligible for
2416 * a physically-contiguous allocation (see
2417 * vm_page_find_contiguous()) right away (we don't
2418 * hold the vm_page_queue_free lock). That would
2419 * cause trouble because the page is not actually
2420 * in the free queue yet...
2422 mem
->pageq
.next
= (queue_entry_t
)local_freeq
;
2426 pmap_clear_noencrypt(mem
->phys_page
);
2429 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2430 mem
->phys_page
== vm_page_guard_addr
);
2431 vm_page_release_fictitious(mem
);
2437 if ( (mem
= local_freeq
) ) {
2438 unsigned int avail_free_count
;
2439 unsigned int need_wakeup
= 0;
2440 unsigned int need_priv_wakeup
= 0;
2442 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2447 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2453 color
= mem
->phys_page
& vm_color_mask
;
2454 queue_enter_first(&vm_page_queue_free
[color
],
2460 vm_page_free_count
+= pg_count
;
2461 avail_free_count
= vm_page_free_count
;
2463 if (vm_page_free_wanted_privileged
> 0 && avail_free_count
> 0) {
2465 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2466 need_priv_wakeup
= avail_free_count
;
2467 vm_page_free_wanted_privileged
-= avail_free_count
;
2468 avail_free_count
= 0;
2470 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2471 vm_page_free_wanted_privileged
= 0;
2472 avail_free_count
-= vm_page_free_wanted_privileged
;
2475 if (vm_page_free_wanted
> 0 && avail_free_count
> vm_page_free_reserved
) {
2476 unsigned int available_pages
;
2478 available_pages
= avail_free_count
- vm_page_free_reserved
;
2480 if (available_pages
>= vm_page_free_wanted
) {
2481 need_wakeup
= vm_page_free_wanted
;
2482 vm_page_free_wanted
= 0;
2484 need_wakeup
= available_pages
;
2485 vm_page_free_wanted
-= available_pages
;
2488 lck_mtx_unlock(&vm_page_queue_free_lock
);
2490 if (need_priv_wakeup
!= 0) {
2492 * There shouldn't be that many VM-privileged threads,
2493 * so let's wake them all up, even if we don't quite
2494 * have enough pages to satisfy them all.
2496 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2498 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2500 * We don't expect to have any more waiters
2501 * after this, so let's wake them all up at
2504 thread_wakeup((event_t
) &vm_page_free_count
);
2505 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2507 * Wake up one waiter per page we just released.
2509 thread_wakeup_one((event_t
) &vm_page_free_count
);
2512 VM_CHECK_MEMORYSTATUS
;
2521 * Mark this page as wired down by yet
2522 * another map, removing it from paging queues
2525 * The page's object and the page queues must be locked.
2529 register vm_page_t mem
)
2532 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2536 vm_object_lock_assert_exclusive(mem
->object
);
2539 * In theory, the page should be in an object before it
2540 * gets wired, since we need to hold the object lock
2541 * to update some fields in the page structure.
2542 * However, some code (i386 pmap, for example) might want
2543 * to wire a page before it gets inserted into an object.
2544 * That's somewhat OK, as long as nobody else can get to
2545 * that page and update it at the same time.
2549 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2551 if ( !VM_PAGE_WIRED(mem
)) {
2553 if (mem
->pageout_queue
) {
2554 mem
->pageout
= FALSE
;
2555 vm_pageout_throttle_up(mem
);
2557 VM_PAGE_QUEUES_REMOVE(mem
);
2560 mem
->object
->wired_page_count
++;
2561 assert(mem
->object
->resident_page_count
>=
2562 mem
->object
->wired_page_count
);
2563 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2564 assert(vm_page_purgeable_count
> 0);
2565 OSAddAtomic(-1, &vm_page_purgeable_count
);
2566 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2568 if (mem
->object
->all_reusable
) {
2570 * Wired pages are not counted as "re-usable"
2571 * in "all_reusable" VM objects, so nothing
2574 } else if (mem
->reusable
) {
2576 * This page is not "re-usable" when it's
2577 * wired, so adjust its state and the
2580 vm_object_reuse_pages(mem
->object
,
2582 mem
->offset
+PAGE_SIZE_64
,
2586 assert(!mem
->reusable
);
2588 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2589 vm_page_wire_count
++;
2591 vm_page_gobble_count
--;
2592 mem
->gobbled
= FALSE
;
2594 VM_CHECK_MEMORYSTATUS
;
2598 * The page could be encrypted, but
2599 * We don't have to decrypt it here
2600 * because we don't guarantee that the
2601 * data is actually valid at this point.
2602 * The page will get decrypted in
2603 * vm_fault_wire() if needed.
2606 assert(!mem
->gobbled
);
2614 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2616 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2620 register vm_page_t mem
)
2622 vm_page_lockspin_queues();
2625 assert(!mem
->gobbled
);
2626 assert( !VM_PAGE_WIRED(mem
));
2628 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2629 if (!mem
->private && !mem
->fictitious
)
2630 vm_page_wire_count
++;
2632 vm_page_gobble_count
++;
2633 mem
->gobbled
= TRUE
;
2634 vm_page_unlock_queues();
2640 * Release one wiring of this page, potentially
2641 * enabling it to be paged again.
2643 * The page's object and the page queues must be locked.
2651 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2654 assert(VM_PAGE_WIRED(mem
));
2655 assert(mem
->object
!= VM_OBJECT_NULL
);
2657 vm_object_lock_assert_exclusive(mem
->object
);
2658 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2660 if (--mem
->wire_count
== 0) {
2661 assert(!mem
->private && !mem
->fictitious
);
2662 vm_page_wire_count
--;
2663 assert(mem
->object
->wired_page_count
> 0);
2664 mem
->object
->wired_page_count
--;
2665 assert(mem
->object
->resident_page_count
>=
2666 mem
->object
->wired_page_count
);
2667 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2668 OSAddAtomic(+1, &vm_page_purgeable_count
);
2669 assert(vm_page_purgeable_wired_count
> 0);
2670 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2672 assert(!mem
->laundry
);
2673 assert(mem
->object
!= kernel_object
);
2674 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
2676 if (queueit
== TRUE
) {
2677 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
2678 vm_page_deactivate(mem
);
2680 vm_page_activate(mem
);
2684 VM_CHECK_MEMORYSTATUS
;
2691 * vm_page_deactivate:
2693 * Returns the given page to the inactive list,
2694 * indicating that no physical maps have access
2695 * to this page. [Used by the physical mapping system.]
2697 * The page queues must be locked.
2703 vm_page_deactivate_internal(m
, TRUE
);
2708 vm_page_deactivate_internal(
2710 boolean_t clear_hw_reference
)
2714 assert(m
->object
!= kernel_object
);
2715 assert(m
->phys_page
!= vm_page_guard_addr
);
2717 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2719 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2722 * This page is no longer very interesting. If it was
2723 * interesting (active or inactive/referenced), then we
2724 * clear the reference bit and (re)enter it in the
2725 * inactive queue. Note wired pages should not have
2726 * their reference bit cleared.
2728 assert ( !(m
->absent
&& !m
->unusual
));
2730 if (m
->gobbled
) { /* can this happen? */
2731 assert( !VM_PAGE_WIRED(m
));
2733 if (!m
->private && !m
->fictitious
)
2734 vm_page_wire_count
--;
2735 vm_page_gobble_count
--;
2739 * if this page is currently on the pageout queue, we can't do the
2740 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2741 * and we can't remove it manually since we would need the object lock
2742 * (which is not required here) to decrement the activity_in_progress
2743 * reference which is held on the object while the page is in the pageout queue...
2744 * just let the normal laundry processing proceed
2746 if (m
->pageout_queue
|| m
->private || m
->fictitious
|| (VM_PAGE_WIRED(m
)))
2749 if (!m
->absent
&& clear_hw_reference
== TRUE
)
2750 pmap_clear_reference(m
->phys_page
);
2752 m
->reference
= FALSE
;
2753 m
->no_cache
= FALSE
;
2756 VM_PAGE_QUEUES_REMOVE(m
);
2758 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2759 m
->dirty
&& m
->object
->internal
&&
2760 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2761 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2762 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2763 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2764 m
->throttled
= TRUE
;
2765 vm_page_throttled_count
++;
2767 if (m
->object
->named
&& m
->object
->ref_count
== 1) {
2768 vm_page_speculate(m
, FALSE
);
2769 #if DEVELOPMENT || DEBUG
2770 vm_page_speculative_recreated
++;
2773 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
2780 * vm_page_enqueue_cleaned
2782 * Put the page on the cleaned queue, mark it cleaned, etc.
2783 * Being on the cleaned queue (and having m->clean_queue set)
2784 * does ** NOT ** guarantee that the page is clean!
2786 * Call with the queues lock held.
2789 void vm_page_enqueue_cleaned(vm_page_t m
)
2791 assert(m
->phys_page
!= vm_page_guard_addr
);
2793 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2795 assert( !(m
->absent
&& !m
->unusual
));
2798 assert( !VM_PAGE_WIRED(m
));
2799 if (!m
->private && !m
->fictitious
)
2800 vm_page_wire_count
--;
2801 vm_page_gobble_count
--;
2805 * if this page is currently on the pageout queue, we can't do the
2806 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2807 * and we can't remove it manually since we would need the object lock
2808 * (which is not required here) to decrement the activity_in_progress
2809 * reference which is held on the object while the page is in the pageout queue...
2810 * just let the normal laundry processing proceed
2812 if (m
->clean_queue
|| m
->pageout_queue
|| m
->private || m
->fictitious
)
2815 VM_PAGE_QUEUES_REMOVE(m
);
2817 queue_enter(&vm_page_queue_cleaned
, m
, vm_page_t
, pageq
);
2818 m
->clean_queue
= TRUE
;
2819 vm_page_cleaned_count
++;
2822 vm_page_inactive_count
++;
2824 vm_pageout_enqueued_cleaned
++;
2830 * Put the specified page on the active list (if appropriate).
2832 * The page queues must be locked.
2837 register vm_page_t m
)
2840 #ifdef FIXME_4778297
2841 assert(m
->object
!= kernel_object
);
2843 assert(m
->phys_page
!= vm_page_guard_addr
);
2845 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2847 assert( !(m
->absent
&& !m
->unusual
));
2850 assert( !VM_PAGE_WIRED(m
));
2851 if (!m
->private && !m
->fictitious
)
2852 vm_page_wire_count
--;
2853 vm_page_gobble_count
--;
2857 * if this page is currently on the pageout queue, we can't do the
2858 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2859 * and we can't remove it manually since we would need the object lock
2860 * (which is not required here) to decrement the activity_in_progress
2861 * reference which is held on the object while the page is in the pageout queue...
2862 * just let the normal laundry processing proceed
2864 if (m
->pageout_queue
|| m
->private || m
->fictitious
)
2869 panic("vm_page_activate: already active");
2872 if (m
->speculative
) {
2873 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
2874 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
2877 VM_PAGE_QUEUES_REMOVE(m
);
2879 if ( !VM_PAGE_WIRED(m
)) {
2881 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2882 m
->dirty
&& m
->object
->internal
&&
2883 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2884 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2885 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2886 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2887 m
->throttled
= TRUE
;
2888 vm_page_throttled_count
++;
2890 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
2892 vm_page_active_count
++;
2894 m
->reference
= TRUE
;
2895 m
->no_cache
= FALSE
;
2902 * vm_page_speculate:
2904 * Put the specified page on the speculative list (if appropriate).
2906 * The page queues must be locked.
2913 struct vm_speculative_age_q
*aq
;
2916 assert(m
->object
!= kernel_object
);
2917 assert(m
->phys_page
!= vm_page_guard_addr
);
2919 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2921 assert( !(m
->absent
&& !m
->unusual
));
2924 * if this page is currently on the pageout queue, we can't do the
2925 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2926 * and we can't remove it manually since we would need the object lock
2927 * (which is not required here) to decrement the activity_in_progress
2928 * reference which is held on the object while the page is in the pageout queue...
2929 * just let the normal laundry processing proceed
2931 if (m
->pageout_queue
|| m
->private || m
->fictitious
)
2934 VM_PAGE_QUEUES_REMOVE(m
);
2936 if ( !VM_PAGE_WIRED(m
)) {
2941 clock_get_system_nanotime(&sec
, &nsec
);
2942 ts
.tv_sec
= (unsigned int) sec
;
2945 if (vm_page_speculative_count
== 0) {
2947 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2948 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2950 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2953 * set the timer to begin a new group
2955 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
2956 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
2958 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2960 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2962 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
2964 speculative_age_index
++;
2966 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2967 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2968 if (speculative_age_index
== speculative_steal_index
) {
2969 speculative_steal_index
= speculative_age_index
+ 1;
2971 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2972 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2974 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2976 if (!queue_empty(&aq
->age_q
))
2977 vm_page_speculate_ageit(aq
);
2979 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
2980 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
2982 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2985 enqueue_tail(&aq
->age_q
, &m
->pageq
);
2986 m
->speculative
= TRUE
;
2987 vm_page_speculative_count
++;
2990 vm_object_lock_assert_exclusive(m
->object
);
2992 m
->object
->pages_created
++;
2993 #if DEVELOPMENT || DEBUG
2994 vm_page_speculative_created
++;
3003 * move pages from the specified aging bin to
3004 * the speculative bin that pageout_scan claims from
3006 * The page queues must be locked.
3009 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
3011 struct vm_speculative_age_q
*sq
;
3014 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
3016 if (queue_empty(&sq
->age_q
)) {
3017 sq
->age_q
.next
= aq
->age_q
.next
;
3018 sq
->age_q
.prev
= aq
->age_q
.prev
;
3020 t
= (vm_page_t
)sq
->age_q
.next
;
3021 t
->pageq
.prev
= &sq
->age_q
;
3023 t
= (vm_page_t
)sq
->age_q
.prev
;
3024 t
->pageq
.next
= &sq
->age_q
;
3026 t
= (vm_page_t
)sq
->age_q
.prev
;
3027 t
->pageq
.next
= aq
->age_q
.next
;
3029 t
= (vm_page_t
)aq
->age_q
.next
;
3030 t
->pageq
.prev
= sq
->age_q
.prev
;
3032 t
= (vm_page_t
)aq
->age_q
.prev
;
3033 t
->pageq
.next
= &sq
->age_q
;
3035 sq
->age_q
.prev
= aq
->age_q
.prev
;
3037 queue_init(&aq
->age_q
);
3046 assert(m
->object
!= kernel_object
);
3047 assert(m
->phys_page
!= vm_page_guard_addr
);
3050 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3053 * if this page is currently on the pageout queue, we can't do the
3054 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3055 * and we can't remove it manually since we would need the object lock
3056 * (which is not required here) to decrement the activity_in_progress
3057 * reference which is held on the object while the page is in the pageout queue...
3058 * just let the normal laundry processing proceed
3060 if (m
->pageout_queue
|| m
->private || (VM_PAGE_WIRED(m
)))
3063 m
->no_cache
= FALSE
;
3065 VM_PAGE_QUEUES_REMOVE(m
);
3067 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
3072 vm_page_reactivate_all_throttled(void)
3074 vm_page_t first_throttled
, last_throttled
;
3075 vm_page_t first_active
;
3077 int extra_active_count
;
3079 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
))
3082 extra_active_count
= 0;
3083 vm_page_lock_queues();
3084 if (! queue_empty(&vm_page_queue_throttled
)) {
3086 * Switch "throttled" pages to "active".
3088 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3090 assert(m
->throttled
);
3092 assert(!m
->inactive
);
3093 assert(!m
->speculative
);
3094 assert(!VM_PAGE_WIRED(m
));
3096 extra_active_count
++;
3098 m
->throttled
= FALSE
;
3104 * Transfer the entire throttled queue to a regular LRU page queues.
3105 * We insert it at the head of the active queue, so that these pages
3106 * get re-evaluated by the LRU algorithm first, since they've been
3107 * completely out of it until now.
3109 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3110 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3111 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3112 if (queue_empty(&vm_page_queue_active
)) {
3113 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3115 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3117 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3118 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3119 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3122 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3124 queue_init(&vm_page_queue_throttled
);
3126 * Adjust the global page counts.
3128 vm_page_active_count
+= extra_active_count
;
3129 vm_page_throttled_count
= 0;
3131 assert(vm_page_throttled_count
== 0);
3132 assert(queue_empty(&vm_page_queue_throttled
));
3133 vm_page_unlock_queues();
3138 * move pages from the indicated local queue to the global active queue
3139 * its ok to fail if we're below the hard limit and force == FALSE
3140 * the nolocks == TRUE case is to allow this function to be run on
3141 * the hibernate path
3145 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3148 vm_page_t first_local
, last_local
;
3149 vm_page_t first_active
;
3153 if (vm_page_local_q
== NULL
)
3156 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3158 if (nolocks
== FALSE
) {
3159 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3160 if ( !vm_page_trylockspin_queues())
3163 vm_page_lockspin_queues();
3165 VPL_LOCK(&lq
->vpl_lock
);
3167 if (lq
->vpl_count
) {
3169 * Switch "local" pages to "active".
3171 assert(!queue_empty(&lq
->vpl_queue
));
3173 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3177 assert(!m
->inactive
);
3178 assert(!m
->speculative
);
3179 assert(!VM_PAGE_WIRED(m
));
3180 assert(!m
->throttled
);
3181 assert(!m
->fictitious
);
3183 if (m
->local_id
!= lid
)
3184 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3193 if (count
!= lq
->vpl_count
)
3194 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3197 * Transfer the entire local queue to a regular LRU page queues.
3199 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3200 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3201 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3203 if (queue_empty(&vm_page_queue_active
)) {
3204 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3206 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3208 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3209 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3210 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3212 queue_init(&lq
->vpl_queue
);
3214 * Adjust the global page counts.
3216 vm_page_active_count
+= lq
->vpl_count
;
3219 assert(queue_empty(&lq
->vpl_queue
));
3221 if (nolocks
== FALSE
) {
3222 VPL_UNLOCK(&lq
->vpl_lock
);
3223 vm_page_unlock_queues();
3228 * vm_page_part_zero_fill:
3230 * Zero-fill a part of the page.
3233 vm_page_part_zero_fill(
3242 * we don't hold the page queue lock
3243 * so this check isn't safe to make
3248 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3249 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3252 tmp
= vm_page_grab();
3253 if (tmp
== VM_PAGE_NULL
) {
3254 vm_page_wait(THREAD_UNINT
);
3259 vm_page_zero_fill(tmp
);
3261 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3263 if((m_pa
+ len
) < PAGE_SIZE
) {
3264 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3265 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3267 vm_page_copy(tmp
,m
);
3274 * vm_page_zero_fill:
3276 * Zero-fill the specified page.
3283 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3284 m
->object
, m
->offset
, m
, 0,0);
3287 * we don't hold the page queue lock
3288 * so this check isn't safe to make
3293 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3294 pmap_zero_page(m
->phys_page
);
3298 * vm_page_part_copy:
3300 * copy part of one page to another
3313 * we don't hold the page queue lock
3314 * so this check isn't safe to make
3316 VM_PAGE_CHECK(src_m
);
3317 VM_PAGE_CHECK(dst_m
);
3319 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3320 dst_m
->phys_page
, dst_pa
, len
);
3326 * Copy one page to another
3329 * The source page should not be encrypted. The caller should
3330 * make sure the page is decrypted first, if necessary.
3333 int vm_page_copy_cs_validations
= 0;
3334 int vm_page_copy_cs_tainted
= 0;
3342 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3343 src_m
->object
, src_m
->offset
,
3344 dest_m
->object
, dest_m
->offset
,
3348 * we don't hold the page queue lock
3349 * so this check isn't safe to make
3351 VM_PAGE_CHECK(src_m
);
3352 VM_PAGE_CHECK(dest_m
);
3354 vm_object_lock_assert_held(src_m
->object
);
3358 * The source page should not be encrypted at this point.
3359 * The destination page will therefore not contain encrypted
3360 * data after the copy.
3362 if (src_m
->encrypted
) {
3363 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3365 dest_m
->encrypted
= FALSE
;
3367 if (src_m
->object
!= VM_OBJECT_NULL
&&
3368 src_m
->object
->code_signed
) {
3370 * We're copying a page from a code-signed object.
3371 * Whoever ends up mapping the copy page might care about
3372 * the original page's integrity, so let's validate the
3375 vm_page_copy_cs_validations
++;
3376 vm_page_validate_cs(src_m
);
3379 if (vm_page_is_slideable(src_m
)) {
3380 boolean_t was_busy
= src_m
->busy
;
3382 (void) vm_page_slide(src_m
, 0);
3383 assert(src_m
->busy
);
3385 PAGE_WAKEUP_DONE(src_m
);
3390 * Propagate the cs_tainted bit to the copy page. Do not propagate
3391 * the cs_validated bit.
3393 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3394 if (dest_m
->cs_tainted
) {
3395 vm_page_copy_cs_tainted
++;
3397 dest_m
->slid
= src_m
->slid
;
3398 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
3399 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3407 printf("vm_page %p: \n", p
);
3408 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3409 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3410 printf(" next=%p\n", p
->next
);
3411 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3412 printf(" wire_count=%u\n", p
->wire_count
);
3414 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3415 (p
->local
? "" : "!"),
3416 (p
->inactive
? "" : "!"),
3417 (p
->active
? "" : "!"),
3418 (p
->pageout_queue
? "" : "!"),
3419 (p
->speculative
? "" : "!"),
3420 (p
->laundry
? "" : "!"));
3421 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3422 (p
->free
? "" : "!"),
3423 (p
->reference
? "" : "!"),
3424 (p
->gobbled
? "" : "!"),
3425 (p
->private ? "" : "!"),
3426 (p
->throttled
? "" : "!"));
3427 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3428 (p
->busy
? "" : "!"),
3429 (p
->wanted
? "" : "!"),
3430 (p
->tabled
? "" : "!"),
3431 (p
->fictitious
? "" : "!"),
3432 (p
->pmapped
? "" : "!"),
3433 (p
->wpmapped
? "" : "!"));
3434 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3435 (p
->pageout
? "" : "!"),
3436 (p
->absent
? "" : "!"),
3437 (p
->error
? "" : "!"),
3438 (p
->dirty
? "" : "!"),
3439 (p
->cleaning
? "" : "!"),
3440 (p
->precious
? "" : "!"),
3441 (p
->clustered
? "" : "!"));
3442 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3443 (p
->overwriting
? "" : "!"),
3444 (p
->restart
? "" : "!"),
3445 (p
->unusual
? "" : "!"),
3446 (p
->encrypted
? "" : "!"),
3447 (p
->encrypted_cleaning
? "" : "!"));
3448 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3449 (p
->cs_validated
? "" : "!"),
3450 (p
->cs_tainted
? "" : "!"),
3451 (p
->no_cache
? "" : "!"));
3453 printf("phys_page=0x%x\n", p
->phys_page
);
3457 * Check that the list of pages is ordered by
3458 * ascending physical address and has no holes.
3461 vm_page_verify_contiguous(
3463 unsigned int npages
)
3465 register vm_page_t m
;
3466 unsigned int page_count
;
3467 vm_offset_t prev_addr
;
3469 prev_addr
= pages
->phys_page
;
3471 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3472 if (m
->phys_page
!= prev_addr
+ 1) {
3473 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3474 m
, (long)prev_addr
, m
->phys_page
);
3475 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
3476 panic("vm_page_verify_contiguous: not contiguous!");
3478 prev_addr
= m
->phys_page
;
3481 if (page_count
!= npages
) {
3482 printf("pages %p actual count 0x%x but requested 0x%x\n",
3483 pages
, page_count
, npages
);
3484 panic("vm_page_verify_contiguous: count error");
3491 * Check the free lists for proper length etc.
3494 vm_page_verify_free_list(
3495 queue_head_t
*vm_page_queue
,
3497 vm_page_t look_for_page
,
3498 boolean_t expect_page
)
3500 unsigned int npages
;
3503 boolean_t found_page
;
3507 prev_m
= (vm_page_t
) vm_page_queue
;
3508 queue_iterate(vm_page_queue
,
3513 if (m
== look_for_page
) {
3516 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3517 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3518 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3520 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3522 if (color
!= (unsigned int) -1) {
3523 if ((m
->phys_page
& vm_color_mask
) != color
)
3524 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3525 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3527 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3533 if (look_for_page
!= VM_PAGE_NULL
) {
3534 unsigned int other_color
;
3536 if (expect_page
&& !found_page
) {
3537 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3538 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3539 _vm_page_print(look_for_page
);
3540 for (other_color
= 0;
3541 other_color
< vm_colors
;
3543 if (other_color
== color
)
3545 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
3546 other_color
, look_for_page
, FALSE
);
3548 if (color
== (unsigned int) -1) {
3549 vm_page_verify_free_list(&vm_lopage_queue_free
,
3550 (unsigned int) -1, look_for_page
, FALSE
);
3552 panic("vm_page_verify_free_list(color=%u)\n", color
);
3554 if (!expect_page
&& found_page
) {
3555 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3556 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3562 static boolean_t vm_page_verify_free_lists_enabled
= FALSE
;
3564 vm_page_verify_free_lists( void )
3566 unsigned int color
, npages
, nlopages
;
3568 if (! vm_page_verify_free_lists_enabled
)
3573 lck_mtx_lock(&vm_page_queue_free_lock
);
3575 for( color
= 0; color
< vm_colors
; color
++ ) {
3576 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
3577 color
, VM_PAGE_NULL
, FALSE
);
3579 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
3581 VM_PAGE_NULL
, FALSE
);
3582 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
3583 panic("vm_page_verify_free_lists: "
3584 "npages %u free_count %d nlopages %u lo_free_count %u",
3585 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
3587 lck_mtx_unlock(&vm_page_queue_free_lock
);
3591 vm_page_queues_assert(
3596 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3598 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3599 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3600 _vm_page_print(mem
);
3601 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3603 if (VM_PAGE_WIRED(mem
)) {
3604 assert(!mem
->active
);
3605 assert(!mem
->inactive
);
3606 assert(!mem
->speculative
);
3607 assert(!mem
->throttled
);
3608 assert(!mem
->pageout_queue
);
3611 #endif /* MACH_ASSERT */
3615 * CONTIGUOUS PAGE ALLOCATION
3617 * Find a region large enough to contain at least n pages
3618 * of contiguous physical memory.
3620 * This is done by traversing the vm_page_t array in a linear fashion
3621 * we assume that the vm_page_t array has the avaiable physical pages in an
3622 * ordered, ascending list... this is currently true of all our implementations
3623 * and must remain so... there can be 'holes' in the array... we also can
3624 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3625 * which use to happen via 'vm_page_convert'... that function was no longer
3626 * being called and was removed...
3628 * The basic flow consists of stabilizing some of the interesting state of
3629 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3630 * sweep at the beginning of the array looking for pages that meet our criterea
3631 * for a 'stealable' page... currently we are pretty conservative... if the page
3632 * meets this criterea and is physically contiguous to the previous page in the 'run'
3633 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3634 * and start to develop a new run... if at this point we've already considered
3635 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3636 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3637 * to other threads trying to acquire free pages (or move pages from q to q),
3638 * and then continue from the spot we left off... we only make 1 pass through the
3639 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3640 * which steals the pages from the queues they're currently on... pages on the free
3641 * queue can be stolen directly... pages that are on any of the other queues
3642 * must be removed from the object they are tabled on... this requires taking the
3643 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3644 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3645 * dump the pages we've currently stolen back to the free list, and pick up our
3646 * scan from the point where we aborted the 'current' run.
3650 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3652 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3657 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3660 #define RESET_STATE_OF_RUN() \
3662 prevcontaddr = -2; \
3664 free_considered = 0; \
3665 substitute_needed = 0; \
3670 * Can we steal in-use (i.e. not free) pages when searching for
3671 * physically-contiguous pages ?
3673 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3675 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
3677 int vm_page_find_contig_debug
= 0;
3681 vm_page_find_contiguous(
3682 unsigned int contig_pages
,
3689 ppnum_t prevcontaddr
;
3691 unsigned int npages
, considered
, scanned
;
3692 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
3693 unsigned int idx_last_contig_page_found
= 0;
3694 int free_considered
, free_available
;
3695 int substitute_needed
;
3698 clock_sec_t tv_start_sec
, tv_end_sec
;
3699 clock_usec_t tv_start_usec
, tv_end_usec
;
3704 int stolen_pages
= 0;
3707 if (contig_pages
== 0)
3708 return VM_PAGE_NULL
;
3711 vm_page_verify_free_lists();
3714 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
3716 vm_page_lock_queues();
3717 lck_mtx_lock(&vm_page_queue_free_lock
);
3719 RESET_STATE_OF_RUN();
3723 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3727 if(flags
& KMA_LOMEM
)
3728 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
3730 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
3732 orig_last_idx
= idx_last_contig_page_found
;
3733 last_idx
= orig_last_idx
;
3735 for (page_idx
= last_idx
, start_idx
= last_idx
;
3736 npages
< contig_pages
&& page_idx
< vm_pages_count
;
3741 page_idx
>= orig_last_idx
) {
3743 * We're back where we started and we haven't
3744 * found any suitable contiguous range. Let's
3750 m
= &vm_pages
[page_idx
];
3752 assert(!m
->fictitious
);
3753 assert(!m
->private);
3755 if (max_pnum
&& m
->phys_page
> max_pnum
) {
3756 /* no more low pages... */
3759 if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
3763 RESET_STATE_OF_RUN();
3765 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
3766 m
->encrypted
|| m
->encrypted_cleaning
|| m
->cs_validated
|| m
->cs_tainted
||
3767 m
->error
|| m
->absent
|| m
->pageout_queue
|| m
->laundry
|| m
->wanted
|| m
->precious
||
3768 m
->cleaning
|| m
->overwriting
|| m
->restart
|| m
->unusual
|| m
->pageout
) {
3770 * page is in a transient state
3771 * or a state we don't want to deal
3772 * with, so don't consider it which
3773 * means starting a new run
3775 RESET_STATE_OF_RUN();
3777 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
) {
3779 * page needs to be on one of our queues
3780 * in order for it to be stable behind the
3781 * locks we hold at this point...
3782 * if not, don't consider it which
3783 * means starting a new run
3785 RESET_STATE_OF_RUN();
3787 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
3789 * pages on the free list are always 'busy'
3790 * so we couldn't test for 'busy' in the check
3791 * for the transient states... pages that are
3792 * 'free' are never 'tabled', so we also couldn't
3793 * test for 'tabled'. So we check here to make
3794 * sure that a non-free page is not busy and is
3795 * tabled on an object...
3796 * if not, don't consider it which
3797 * means starting a new run
3799 RESET_STATE_OF_RUN();
3802 if (m
->phys_page
!= prevcontaddr
+ 1) {
3803 if ((m
->phys_page
& pnum_mask
) != 0) {
3804 RESET_STATE_OF_RUN();
3808 start_idx
= page_idx
;
3809 start_pnum
= m
->phys_page
;
3814 prevcontaddr
= m
->phys_page
;
3821 * This page is not free.
3822 * If we can't steal used pages,
3823 * we have to give up this run
3825 * Otherwise, we might need to
3826 * move the contents of this page
3827 * into a substitute page.
3829 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3830 if (m
->pmapped
|| m
->dirty
) {
3831 substitute_needed
++;
3834 RESET_STATE_OF_RUN();
3838 if ((free_considered
+ substitute_needed
) > free_available
) {
3840 * if we let this run continue
3841 * we will end up dropping the vm_page_free_count
3842 * below the reserve limit... we need to abort
3843 * this run, but we can at least re-consider this
3844 * page... thus the jump back to 'retry'
3846 RESET_STATE_OF_RUN();
3848 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
3853 * free_available == 0
3854 * so can't consider any free pages... if
3855 * we went to retry in this case, we'd
3856 * get stuck looking at the same page
3857 * w/o making any forward progress
3858 * we also want to take this path if we've already
3859 * reached our limit that controls the lock latency
3864 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
3866 lck_mtx_unlock(&vm_page_queue_free_lock
);
3867 vm_page_unlock_queues();
3871 vm_page_lock_queues();
3872 lck_mtx_lock(&vm_page_queue_free_lock
);
3874 RESET_STATE_OF_RUN();
3876 * reset our free page limit since we
3877 * dropped the lock protecting the vm_page_free_queue
3879 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3890 if (npages
!= contig_pages
) {
3893 * We didn't find a contiguous range but we didn't
3894 * start from the very first page.
3895 * Start again from the very first page.
3897 RESET_STATE_OF_RUN();
3898 if( flags
& KMA_LOMEM
)
3899 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
3901 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
3903 page_idx
= last_idx
;
3907 lck_mtx_unlock(&vm_page_queue_free_lock
);
3911 unsigned int cur_idx
;
3912 unsigned int tmp_start_idx
;
3913 vm_object_t locked_object
= VM_OBJECT_NULL
;
3914 boolean_t abort_run
= FALSE
;
3916 assert(page_idx
- start_idx
== contig_pages
);
3918 tmp_start_idx
= start_idx
;
3921 * first pass through to pull the free pages
3922 * off of the free queue so that in case we
3923 * need substitute pages, we won't grab any
3924 * of the free pages in the run... we'll clear
3925 * the 'free' bit in the 2nd pass, and even in
3926 * an abort_run case, we'll collect all of the
3927 * free pages in this run and return them to the free list
3929 while (start_idx
< page_idx
) {
3931 m1
= &vm_pages
[start_idx
++];
3933 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3940 color
= m1
->phys_page
& vm_color_mask
;
3942 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, m1
, TRUE
);
3944 queue_remove(&vm_page_queue_free
[color
],
3948 m1
->pageq
.next
= NULL
;
3949 m1
->pageq
.prev
= NULL
;
3951 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, VM_PAGE_NULL
, FALSE
);
3954 * Clear the "free" bit so that this page
3955 * does not get considered for another
3956 * concurrent physically-contiguous allocation.
3961 vm_page_free_count
--;
3965 * adjust global freelist counts
3967 if (vm_page_free_count
< vm_page_free_count_minimum
)
3968 vm_page_free_count_minimum
= vm_page_free_count
;
3970 if( flags
& KMA_LOMEM
)
3971 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
3973 vm_page_find_contiguous_last_idx
= page_idx
;
3976 * we can drop the free queue lock at this point since
3977 * we've pulled any 'free' candidates off of the list
3978 * we need it dropped so that we can do a vm_page_grab
3979 * when substituing for pmapped/dirty pages
3981 lck_mtx_unlock(&vm_page_queue_free_lock
);
3983 start_idx
= tmp_start_idx
;
3984 cur_idx
= page_idx
- 1;
3986 while (start_idx
++ < page_idx
) {
3988 * must go through the list from back to front
3989 * so that the page list is created in the
3990 * correct order - low -> high phys addresses
3992 m1
= &vm_pages
[cur_idx
--];
3995 if (m1
->object
== VM_OBJECT_NULL
) {
3997 * page has already been removed from
3998 * the free list in the 1st pass
4000 assert(m1
->offset
== (vm_object_offset_t
) -1);
4002 assert(!m1
->wanted
);
4003 assert(!m1
->laundry
);
4007 if (abort_run
== TRUE
)
4010 object
= m1
->object
;
4012 if (object
!= locked_object
) {
4013 if (locked_object
) {
4014 vm_object_unlock(locked_object
);
4015 locked_object
= VM_OBJECT_NULL
;
4017 if (vm_object_lock_try(object
))
4018 locked_object
= object
;
4020 if (locked_object
== VM_OBJECT_NULL
||
4021 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
4022 m1
->encrypted
|| m1
->encrypted_cleaning
|| m1
->cs_validated
|| m1
->cs_tainted
||
4023 m1
->error
|| m1
->absent
|| m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
|| m1
->precious
||
4024 m1
->cleaning
|| m1
->overwriting
|| m1
->restart
|| m1
->unusual
|| m1
->busy
)) {
4026 if (locked_object
) {
4027 vm_object_unlock(locked_object
);
4028 locked_object
= VM_OBJECT_NULL
;
4030 tmp_start_idx
= cur_idx
;
4034 if (m1
->pmapped
|| m1
->dirty
) {
4036 vm_object_offset_t offset
;
4038 m2
= vm_page_grab();
4040 if (m2
== VM_PAGE_NULL
) {
4041 if (locked_object
) {
4042 vm_object_unlock(locked_object
);
4043 locked_object
= VM_OBJECT_NULL
;
4045 tmp_start_idx
= cur_idx
;
4050 refmod
= pmap_disconnect(m1
->phys_page
);
4053 vm_page_copy(m1
, m2
);
4055 m2
->reference
= m1
->reference
;
4056 m2
->dirty
= m1
->dirty
;
4058 if (refmod
& VM_MEM_REFERENCED
)
4059 m2
->reference
= TRUE
;
4060 if (refmod
& VM_MEM_MODIFIED
) {
4061 SET_PAGE_DIRTY(m2
, TRUE
);
4063 offset
= m1
->offset
;
4066 * completely cleans up the state
4067 * of the page so that it is ready
4068 * to be put onto the free list, or
4069 * for this purpose it looks like it
4070 * just came off of the free list
4072 vm_page_free_prepare(m1
);
4075 * make sure we clear the ref/mod state
4076 * from the pmap layer... else we risk
4077 * inheriting state from the last time
4078 * this page was used...
4080 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
4082 * now put the substitute page on the object
4084 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
, FALSE
);
4087 vm_page_activate(m2
);
4089 vm_page_deactivate(m2
);
4091 PAGE_WAKEUP_DONE(m2
);
4095 * completely cleans up the state
4096 * of the page so that it is ready
4097 * to be put onto the free list, or
4098 * for this purpose it looks like it
4099 * just came off of the free list
4101 vm_page_free_prepare(m1
);
4107 m1
->pageq
.next
= (queue_entry_t
) m
;
4108 m1
->pageq
.prev
= NULL
;
4111 if (locked_object
) {
4112 vm_object_unlock(locked_object
);
4113 locked_object
= VM_OBJECT_NULL
;
4116 if (abort_run
== TRUE
) {
4117 if (m
!= VM_PAGE_NULL
) {
4118 vm_page_free_list(m
, FALSE
);
4124 * want the index of the last
4125 * page in this run that was
4126 * successfully 'stolen', so back
4127 * it up 1 for the auto-decrement on use
4128 * and 1 more to bump back over this page
4130 page_idx
= tmp_start_idx
+ 2;
4131 if (page_idx
>= vm_pages_count
) {
4134 page_idx
= last_idx
= 0;
4140 * We didn't find a contiguous range but we didn't
4141 * start from the very first page.
4142 * Start again from the very first page.
4144 RESET_STATE_OF_RUN();
4146 if( flags
& KMA_LOMEM
)
4147 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4149 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4151 last_idx
= page_idx
;
4153 lck_mtx_lock(&vm_page_queue_free_lock
);
4155 * reset our free page limit since we
4156 * dropped the lock protecting the vm_page_free_queue
4158 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4162 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4170 vm_page_gobble_count
+= npages
;
4173 * gobbled pages are also counted as wired pages
4175 vm_page_wire_count
+= npages
;
4177 assert(vm_page_verify_contiguous(m
, npages
));
4180 vm_page_unlock_queues();
4183 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4185 tv_end_sec
-= tv_start_sec
;
4186 if (tv_end_usec
< tv_start_usec
) {
4188 tv_end_usec
+= 1000000;
4190 tv_end_usec
-= tv_start_usec
;
4191 if (tv_end_usec
>= 1000000) {
4193 tv_end_sec
-= 1000000;
4195 if (vm_page_find_contig_debug
) {
4196 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4197 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4198 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4199 scanned
, yielded
, dumped_run
, stolen_pages
);
4204 vm_page_verify_free_lists();
4210 * Allocate a list of contiguous, wired pages.
4222 unsigned int npages
;
4224 if (size
% PAGE_SIZE
!= 0)
4225 return KERN_INVALID_ARGUMENT
;
4227 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4228 if (npages
!= size
/ PAGE_SIZE
) {
4229 /* 32-bit overflow */
4230 return KERN_INVALID_ARGUMENT
;
4234 * Obtain a pointer to a subset of the free
4235 * list large enough to satisfy the request;
4236 * the region will be physically contiguous.
4238 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4240 if (pages
== VM_PAGE_NULL
)
4241 return KERN_NO_SPACE
;
4243 * determine need for wakeups
4245 if ((vm_page_free_count
< vm_page_free_min
) ||
4246 ((vm_page_free_count
< vm_page_free_target
) &&
4247 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4248 thread_wakeup((event_t
) &vm_page_free_wanted
);
4250 VM_CHECK_MEMORYSTATUS
;
4253 * The CPM pages should now be available and
4254 * ordered by ascending physical address.
4256 assert(vm_page_verify_contiguous(pages
, npages
));
4259 return KERN_SUCCESS
;
4263 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
4266 * when working on a 'run' of pages, it is necessary to hold
4267 * the vm_page_queue_lock (a hot global lock) for certain operations
4268 * on the page... however, the majority of the work can be done
4269 * while merely holding the object lock... in fact there are certain
4270 * collections of pages that don't require any work brokered by the
4271 * vm_page_queue_lock... to mitigate the time spent behind the global
4272 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4273 * while doing all of the work that doesn't require the vm_page_queue_lock...
4274 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4275 * necessary work for each page... we will grab the busy bit on the page
4276 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4277 * if it can't immediately take the vm_page_queue_lock in order to compete
4278 * for the locks in the same order that vm_pageout_scan takes them.
4279 * the operation names are modeled after the names of the routines that
4280 * need to be called in order to make the changes very obvious in the
4285 vm_page_do_delayed_work(
4287 struct vm_page_delayed_work
*dwp
,
4292 vm_page_t local_free_q
= VM_PAGE_NULL
;
4295 * pageout_scan takes the vm_page_lock_queues first
4296 * then tries for the object lock... to avoid what
4297 * is effectively a lock inversion, we'll go to the
4298 * trouble of taking them in that same order... otherwise
4299 * if this object contains the majority of the pages resident
4300 * in the UBC (or a small set of large objects actively being
4301 * worked on contain the majority of the pages), we could
4302 * cause the pageout_scan thread to 'starve' in its attempt
4303 * to find pages to move to the free queue, since it has to
4304 * successfully acquire the object lock of any candidate page
4305 * before it can steal/clean it.
4307 if (!vm_page_trylockspin_queues()) {
4308 vm_object_unlock(object
);
4310 vm_page_lockspin_queues();
4312 for (j
= 0; ; j
++) {
4313 if (!vm_object_lock_avoid(object
) &&
4314 _vm_object_lock_try(object
))
4316 vm_page_unlock_queues();
4318 vm_page_lockspin_queues();
4321 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
4325 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
4326 vm_pageout_throttle_up(m
);
4328 if (dwp
->dw_mask
& DW_vm_page_wire
)
4330 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
4333 queueit
= (dwp
->dw_mask
& DW_vm_page_free
) ? FALSE
: TRUE
;
4335 vm_page_unwire(m
, queueit
);
4337 if (dwp
->dw_mask
& DW_vm_page_free
) {
4338 vm_page_free_prepare_queues(m
);
4340 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
4342 * Add this page to our list of reclaimed pages,
4343 * to be freed later.
4345 m
->pageq
.next
= (queue_entry_t
) local_free_q
;
4348 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
4349 vm_page_deactivate_internal(m
, FALSE
);
4350 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
4351 if (m
->active
== FALSE
) {
4352 vm_page_activate(m
);
4355 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
4356 vm_page_speculate(m
, TRUE
);
4357 else if (dwp
->dw_mask
& DW_enqueue_cleaned
) {
4359 * if we didn't hold the object lock and did this,
4360 * we might disconnect the page, then someone might
4361 * soft fault it back in, then we would put it on the
4362 * cleaned queue, and so we would have a referenced (maybe even dirty)
4363 * page on that queue, which we don't want
4365 int refmod_state
= pmap_disconnect(m
->phys_page
);
4367 if ((refmod_state
& VM_MEM_REFERENCED
)) {
4369 * this page has been touched since it got cleaned; let's activate it
4370 * if it hasn't already been
4372 vm_pageout_enqueued_cleaned
++;
4373 vm_pageout_cleaned_reactivated
++;
4374 vm_pageout_cleaned_commit_reactivated
++;
4376 if (m
->active
== FALSE
)
4377 vm_page_activate(m
);
4379 m
->reference
= FALSE
;
4380 vm_page_enqueue_cleaned(m
);
4383 else if (dwp
->dw_mask
& DW_vm_page_lru
)
4385 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
) {
4386 if ( !m
->pageout_queue
)
4387 VM_PAGE_QUEUES_REMOVE(m
);
4389 if (dwp
->dw_mask
& DW_set_reference
)
4390 m
->reference
= TRUE
;
4391 else if (dwp
->dw_mask
& DW_clear_reference
)
4392 m
->reference
= FALSE
;
4394 if (dwp
->dw_mask
& DW_move_page
) {
4395 if ( !m
->pageout_queue
) {
4396 VM_PAGE_QUEUES_REMOVE(m
);
4398 assert(m
->object
!= kernel_object
);
4400 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
4403 if (dwp
->dw_mask
& DW_clear_busy
)
4406 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
4410 vm_page_unlock_queues();
4413 vm_page_free_list(local_free_q
, TRUE
);
4415 VM_CHECK_MEMORYSTATUS
;
4425 vm_page_t lo_page_list
= VM_PAGE_NULL
;
4429 if ( !(flags
& KMA_LOMEM
))
4430 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4432 for (i
= 0; i
< page_count
; i
++) {
4434 mem
= vm_page_grablo();
4436 if (mem
== VM_PAGE_NULL
) {
4438 vm_page_free_list(lo_page_list
, FALSE
);
4440 *list
= VM_PAGE_NULL
;
4442 return (KERN_RESOURCE_SHORTAGE
);
4444 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
4447 *list
= lo_page_list
;
4449 return (KERN_SUCCESS
);
4453 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
4455 page
->offset
= offset
;
4459 vm_page_get_next(vm_page_t page
)
4461 return ((vm_page_t
) page
->pageq
.next
);
4465 vm_page_get_offset(vm_page_t page
)
4467 return (page
->offset
);
4471 vm_page_get_phys_page(vm_page_t page
)
4473 return (page
->phys_page
);
4477 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4481 static vm_page_t hibernate_gobble_queue
;
4483 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4485 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
4486 static int hibernate_flush_dirty_pages(void);
4487 static int hibernate_flush_queue(queue_head_t
*, int);
4489 void hibernate_flush_wait(void);
4490 void hibernate_mark_in_progress(void);
4491 void hibernate_clear_in_progress(void);
4494 struct hibernate_statistics
{
4495 int hibernate_considered
;
4496 int hibernate_reentered_on_q
;
4497 int hibernate_found_dirty
;
4498 int hibernate_skipped_cleaning
;
4499 int hibernate_skipped_transient
;
4500 int hibernate_skipped_precious
;
4501 int hibernate_queue_nolock
;
4502 int hibernate_queue_paused
;
4503 int hibernate_throttled
;
4504 int hibernate_throttle_timeout
;
4505 int hibernate_drained
;
4506 int hibernate_drain_timeout
;
4508 int cd_found_precious
;
4511 int cd_found_unusual
;
4512 int cd_found_cleaning
;
4513 int cd_found_laundry
;
4517 int cd_vm_page_wire_count
;
4526 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
4528 wait_result_t wait_result
;
4530 vm_page_lock_queues();
4532 while (q
->pgo_laundry
) {
4534 q
->pgo_draining
= TRUE
;
4536 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
4538 vm_page_unlock_queues();
4540 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4542 if (wait_result
== THREAD_TIMED_OUT
) {
4543 hibernate_stats
.hibernate_drain_timeout
++;
4546 vm_page_lock_queues();
4548 hibernate_stats
.hibernate_drained
++;
4550 vm_page_unlock_queues();
4557 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
4560 vm_object_t l_object
= NULL
;
4561 vm_object_t m_object
= NULL
;
4562 int refmod_state
= 0;
4563 int try_failed_count
= 0;
4565 int current_run
= 0;
4566 struct vm_pageout_queue
*iq
;
4567 struct vm_pageout_queue
*eq
;
4568 struct vm_pageout_queue
*tq
;
4570 hibernate_cleaning_in_progress
= TRUE
;
4572 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
4574 iq
= &vm_pageout_queue_internal
;
4575 eq
= &vm_pageout_queue_external
;
4577 vm_page_lock_queues();
4579 while (qcount
&& !queue_empty(q
)) {
4581 if (current_run
++ == 1000) {
4582 if (hibernate_should_abort()) {
4589 m
= (vm_page_t
) queue_first(q
);
4590 m_object
= m
->object
;
4593 * check to see if we currently are working
4594 * with the same object... if so, we've
4595 * already got the lock
4597 if (m_object
!= l_object
) {
4599 * the object associated with candidate page is
4600 * different from the one we were just working
4601 * with... dump the lock if we still own it
4603 if (l_object
!= NULL
) {
4604 vm_object_unlock(l_object
);
4608 * Try to lock object; since we've alread got the
4609 * page queues lock, we can only 'try' for this one.
4610 * if the 'try' fails, we need to do a mutex_pause
4611 * to allow the owner of the object lock a chance to
4614 if ( !vm_object_lock_try_scan(m_object
)) {
4616 if (try_failed_count
> 20) {
4617 hibernate_stats
.hibernate_queue_nolock
++;
4619 goto reenter_pg_on_q
;
4621 vm_pageout_scan_wants_object
= m_object
;
4623 vm_page_unlock_queues();
4624 mutex_pause(try_failed_count
++);
4625 vm_page_lock_queues();
4627 hibernate_stats
.hibernate_queue_paused
++;
4630 l_object
= m_object
;
4631 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4634 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
) {
4636 * page is not to be cleaned
4637 * put it back on the head of its queue
4640 hibernate_stats
.hibernate_skipped_cleaning
++;
4642 hibernate_stats
.hibernate_skipped_transient
++;
4644 goto reenter_pg_on_q
;
4646 if ( !m_object
->pager_initialized
&& m_object
->pager_created
)
4647 goto reenter_pg_on_q
;
4649 if (m_object
->copy
== VM_OBJECT_NULL
) {
4650 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
4652 * let the normal hibernate image path
4655 goto reenter_pg_on_q
;
4658 if ( !m
->dirty
&& m
->pmapped
) {
4659 refmod_state
= pmap_get_refmod(m
->phys_page
);
4661 if ((refmod_state
& VM_MEM_MODIFIED
)) {
4662 SET_PAGE_DIRTY(m
, FALSE
);
4669 * page is not to be cleaned
4670 * put it back on the head of its queue
4673 hibernate_stats
.hibernate_skipped_precious
++;
4675 goto reenter_pg_on_q
;
4679 if (m_object
->internal
) {
4680 if (VM_PAGE_Q_THROTTLED(iq
))
4682 } else if (VM_PAGE_Q_THROTTLED(eq
))
4686 wait_result_t wait_result
;
4689 if (l_object
!= NULL
) {
4690 vm_object_unlock(l_object
);
4693 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4695 tq
->pgo_throttled
= TRUE
;
4697 while (retval
== 0) {
4699 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
4701 vm_page_unlock_queues();
4703 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4705 vm_page_lock_queues();
4707 if (hibernate_should_abort())
4710 if (wait_result
!= THREAD_TIMED_OUT
)
4713 if (--wait_count
== 0) {
4714 hibernate_stats
.hibernate_throttle_timeout
++;
4721 hibernate_stats
.hibernate_throttled
++;
4726 * we've already factored out pages in the laundry which
4727 * means this page can't be on the pageout queue so it's
4728 * safe to do the VM_PAGE_QUEUES_REMOVE
4730 assert(!m
->pageout_queue
);
4732 VM_PAGE_QUEUES_REMOVE(m
);
4734 vm_pageout_cluster(m
, FALSE
);
4736 hibernate_stats
.hibernate_found_dirty
++;
4741 queue_remove(q
, m
, vm_page_t
, pageq
);
4742 queue_enter(q
, m
, vm_page_t
, pageq
);
4744 hibernate_stats
.hibernate_reentered_on_q
++;
4746 hibernate_stats
.hibernate_considered
++;
4749 try_failed_count
= 0;
4751 if (l_object
!= NULL
) {
4752 vm_object_unlock(l_object
);
4755 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4757 vm_page_unlock_queues();
4759 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
4761 hibernate_cleaning_in_progress
= FALSE
;
4768 hibernate_flush_dirty_pages()
4770 struct vm_speculative_age_q
*aq
;
4773 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
4775 if (vm_page_local_q
) {
4776 for (i
= 0; i
< vm_page_local_q_count
; i
++)
4777 vm_page_reactivate_local(i
, TRUE
, FALSE
);
4780 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
4784 aq
= &vm_page_queue_speculative
[i
];
4786 if (queue_empty(&aq
->age_q
))
4790 vm_page_lockspin_queues();
4792 queue_iterate(&aq
->age_q
,
4799 vm_page_unlock_queues();
4802 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
4806 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
))
4808 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_page_anonymous_count
- vm_page_cleaned_count
))
4810 if (hibernate_flush_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
))
4812 if (hibernate_flush_queue(&vm_page_queue_cleaned
, vm_page_cleaned_count
))
4815 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
4817 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external
));
4821 extern void IOSleep(unsigned int);
4822 extern int sync_internal(void);
4825 hibernate_flush_memory()
4829 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
4833 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_NONE
, vm_page_free_count
, 0, 0, 0, 0);
4835 if ((retval
= hibernate_flush_dirty_pages()) == 0) {
4836 if (consider_buffer_cache_collect
!= NULL
) {
4838 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, vm_page_wire_count
, 0, 0, 0, 0);
4841 (void)(*consider_buffer_cache_collect
)(1);
4842 consider_zone_gc(TRUE
);
4844 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, vm_page_wire_count
, 0, 0, 0, 0);
4847 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
4849 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4850 hibernate_stats
.hibernate_considered
,
4851 hibernate_stats
.hibernate_reentered_on_q
,
4852 hibernate_stats
.hibernate_found_dirty
);
4853 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4854 hibernate_stats
.hibernate_skipped_cleaning
,
4855 hibernate_stats
.hibernate_skipped_transient
,
4856 hibernate_stats
.hibernate_skipped_precious
,
4857 hibernate_stats
.hibernate_queue_nolock
);
4858 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4859 hibernate_stats
.hibernate_queue_paused
,
4860 hibernate_stats
.hibernate_throttled
,
4861 hibernate_stats
.hibernate_throttle_timeout
,
4862 hibernate_stats
.hibernate_drained
,
4863 hibernate_stats
.hibernate_drain_timeout
);
4870 hibernate_page_list_zero(hibernate_page_list_t
*list
)
4873 hibernate_bitmap_t
* bitmap
;
4875 bitmap
= &list
->bank_bitmap
[0];
4876 for (bank
= 0; bank
< list
->bank_count
; bank
++)
4880 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
4881 // set out-of-bound bits at end of bitmap.
4882 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
4884 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
4886 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
4891 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
4895 uint64_t start
, end
, timeout
, nsec
;
4896 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
4897 clock_get_uptime(&start
);
4899 for (i
= 0; i
< gobble_count
; i
++)
4901 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
4903 clock_get_uptime(&end
);
4913 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
4914 hibernate_gobble_queue
= m
;
4917 clock_get_uptime(&end
);
4918 absolutetime_to_nanoseconds(end
- start
, &nsec
);
4919 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
4923 hibernate_free_gobble_pages(void)
4928 m
= (vm_page_t
) hibernate_gobble_queue
;
4931 next
= (vm_page_t
) m
->pageq
.next
;
4936 hibernate_gobble_queue
= VM_PAGE_NULL
;
4939 HIBLOG("Freed %d pages\n", count
);
4943 hibernate_consider_discard(vm_page_t m
)
4945 vm_object_t object
= NULL
;
4947 boolean_t discard
= FALSE
;
4952 panic("hibernate_consider_discard: private");
4954 if (!vm_object_lock_try(m
->object
)) {
4955 hibernate_stats
.cd_lock_failed
++;
4960 if (VM_PAGE_WIRED(m
)) {
4961 hibernate_stats
.cd_found_wired
++;
4965 hibernate_stats
.cd_found_precious
++;
4968 if (m
->busy
|| !object
->alive
) {
4970 * Somebody is playing with this page.
4972 hibernate_stats
.cd_found_busy
++;
4975 if (m
->absent
|| m
->unusual
|| m
->error
) {
4977 * If it's unusual in anyway, ignore it
4979 hibernate_stats
.cd_found_unusual
++;
4983 hibernate_stats
.cd_found_cleaning
++;
4987 hibernate_stats
.cd_found_laundry
++;
4992 refmod_state
= pmap_get_refmod(m
->phys_page
);
4994 if (refmod_state
& VM_MEM_REFERENCED
)
4995 m
->reference
= TRUE
;
4996 if (refmod_state
& VM_MEM_MODIFIED
) {
4997 SET_PAGE_DIRTY(m
, FALSE
);
5002 * If it's clean or purgeable we can discard the page on wakeup.
5004 discard
= (!m
->dirty
)
5005 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
5006 || (VM_PURGABLE_EMPTY
== object
->purgable
);
5008 if (discard
== FALSE
)
5009 hibernate_stats
.cd_found_dirty
++;
5014 vm_object_unlock(object
);
5021 hibernate_discard_page(vm_page_t m
)
5023 if (m
->absent
|| m
->unusual
|| m
->error
)
5025 * If it's unusual in anyway, ignore
5030 vm_object_t object
= m
->object
;
5031 if (!vm_object_lock_try(m
->object
))
5032 panic("hibernate_discard_page(%p) !vm_object_lock_try", m
);
5034 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5035 makes sure these locks are uncontended before sleep */
5038 if (m
->pmapped
== TRUE
)
5040 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
5044 panic("hibernate_discard_page(%p) laundry", m
);
5046 panic("hibernate_discard_page(%p) private", m
);
5048 panic("hibernate_discard_page(%p) fictitious", m
);
5050 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
5052 /* object should be on a queue */
5053 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
5054 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
5056 vm_purgeable_token_delete_first(old_queue
);
5057 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
5063 vm_object_unlock(object
);
5068 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5069 pages known to VM to not need saving are subtracted.
5070 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5074 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
5075 hibernate_page_list_t
* page_list_wired
,
5076 hibernate_page_list_t
* page_list_pal
,
5077 uint32_t * pagesOut
)
5079 uint64_t start
, end
, nsec
;
5081 uint32_t pages
= page_list
->page_count
;
5082 uint32_t count_zf
= 0, count_throttled
= 0;
5083 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0, count_cleaned
= 0;
5084 uint32_t count_wire
= pages
;
5085 uint32_t count_discard_active
= 0;
5086 uint32_t count_discard_inactive
= 0;
5087 uint32_t count_discard_cleaned
= 0;
5088 uint32_t count_discard_purgeable
= 0;
5089 uint32_t count_discard_speculative
= 0;
5092 hibernate_bitmap_t
* bitmap
;
5093 hibernate_bitmap_t
* bitmap_wired
;
5096 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list
, page_list_wired
);
5099 vm_page_lock_queues();
5100 if (vm_page_local_q
) {
5101 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5103 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5104 VPL_LOCK(&lq
->vpl_lock
);
5110 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
5112 clock_get_uptime(&start
);
5114 hibernate_page_list_zero(page_list
);
5115 hibernate_page_list_zero(page_list_wired
);
5116 hibernate_page_list_zero(page_list_pal
);
5118 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
5119 hibernate_stats
.cd_pages
= pages
;
5121 if (vm_page_local_q
) {
5122 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5123 vm_page_reactivate_local(i
, TRUE
, TRUE
);
5126 m
= (vm_page_t
) hibernate_gobble_queue
;
5131 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5132 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5133 m
= (vm_page_t
) m
->pageq
.next
;
5136 for( i
= 0; i
< real_ncpus
; i
++ )
5138 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
5140 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
5144 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5145 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5147 hibernate_stats
.cd_local_free
++;
5148 hibernate_stats
.cd_total_free
++;
5153 for( i
= 0; i
< vm_colors
; i
++ )
5155 queue_iterate(&vm_page_queue_free
[i
],
5162 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5163 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5165 hibernate_stats
.cd_total_free
++;
5169 queue_iterate(&vm_lopage_queue_free
,
5176 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5177 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5179 hibernate_stats
.cd_total_free
++;
5182 queue_iterate( &vm_page_queue_throttled
,
5187 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5188 && hibernate_consider_discard(m
))
5190 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5191 count_discard_inactive
++;
5196 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5199 queue_iterate( &vm_page_queue_anonymous
,
5204 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5205 && hibernate_consider_discard(m
))
5207 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5209 count_discard_purgeable
++;
5211 count_discard_inactive
++;
5216 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5219 queue_iterate( &vm_page_queue_inactive
,
5224 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5225 && hibernate_consider_discard(m
))
5227 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5229 count_discard_purgeable
++;
5231 count_discard_inactive
++;
5236 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5239 queue_iterate( &vm_page_queue_cleaned
,
5244 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5245 && hibernate_consider_discard(m
))
5247 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5249 count_discard_purgeable
++;
5251 count_discard_cleaned
++;
5256 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5259 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5261 queue_iterate(&vm_page_queue_speculative
[i
].age_q
,
5266 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5267 && hibernate_consider_discard(m
))
5269 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5270 count_discard_speculative
++;
5273 count_speculative
++;
5275 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5279 queue_iterate( &vm_page_queue_active
,
5284 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
5285 && hibernate_consider_discard(m
))
5287 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5289 count_discard_purgeable
++;
5291 count_discard_active
++;
5296 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5299 // pull wired from hibernate_bitmap
5301 bitmap
= &page_list
->bank_bitmap
[0];
5302 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
5303 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
5305 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
5306 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
5307 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5308 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
5311 // machine dependent adjustments
5312 hibernate_page_list_setall_machine(page_list
, page_list_wired
, &pages
);
5314 hibernate_stats
.cd_count_wire
= count_wire
;
5315 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+ count_discard_speculative
+ count_discard_cleaned
;
5317 clock_get_uptime(&end
);
5318 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5319 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
5321 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5322 pages
, count_wire
, count_active
, count_inactive
, count_cleaned
, count_speculative
, count_zf
, count_throttled
,
5323 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
5325 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
- count_discard_cleaned
;
5328 if (vm_page_local_q
) {
5329 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5331 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5332 VPL_UNLOCK(&lq
->vpl_lock
);
5335 vm_page_unlock_queues();
5338 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
5342 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
5344 uint64_t start
, end
, nsec
;
5348 uint32_t count_discard_active
= 0;
5349 uint32_t count_discard_inactive
= 0;
5350 uint32_t count_discard_purgeable
= 0;
5351 uint32_t count_discard_cleaned
= 0;
5352 uint32_t count_discard_speculative
= 0;
5355 vm_page_lock_queues();
5356 if (vm_page_local_q
) {
5357 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5359 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5360 VPL_LOCK(&lq
->vpl_lock
);
5365 clock_get_uptime(&start
);
5367 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
5368 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
5370 next
= (vm_page_t
) m
->pageq
.next
;
5371 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5374 count_discard_purgeable
++;
5376 count_discard_inactive
++;
5377 hibernate_discard_page(m
);
5382 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5384 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5385 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5387 next
= (vm_page_t
) m
->pageq
.next
;
5388 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5390 count_discard_speculative
++;
5391 hibernate_discard_page(m
);
5397 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5398 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5400 next
= (vm_page_t
) m
->pageq
.next
;
5401 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5404 count_discard_purgeable
++;
5406 count_discard_inactive
++;
5407 hibernate_discard_page(m
);
5412 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5413 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5415 next
= (vm_page_t
) m
->pageq
.next
;
5416 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5419 count_discard_purgeable
++;
5421 count_discard_active
++;
5422 hibernate_discard_page(m
);
5427 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
5428 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
5430 next
= (vm_page_t
) m
->pageq
.next
;
5431 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5434 count_discard_purgeable
++;
5436 count_discard_cleaned
++;
5437 hibernate_discard_page(m
);
5443 if (vm_page_local_q
) {
5444 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5446 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5447 VPL_UNLOCK(&lq
->vpl_lock
);
5450 vm_page_unlock_queues();
5453 clock_get_uptime(&end
);
5454 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5455 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5457 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
5460 #endif /* HIBERNATION */
5462 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5464 #include <mach_vm_debug.h>
5467 #include <mach_debug/hash_info.h>
5468 #include <vm/vm_debug.h>
5471 * Routine: vm_page_info
5473 * Return information about the global VP table.
5474 * Fills the buffer with as much information as possible
5475 * and returns the desired size of the buffer.
5477 * Nothing locked. The caller should provide
5478 * possibly-pageable memory.
5483 hash_info_bucket_t
*info
,
5487 lck_spin_t
*bucket_lock
;
5489 if (vm_page_bucket_count
< count
)
5490 count
= vm_page_bucket_count
;
5492 for (i
= 0; i
< count
; i
++) {
5493 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
5494 unsigned int bucket_count
= 0;
5497 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
5498 lck_spin_lock(bucket_lock
);
5500 for (m
= bucket
->pages
; m
!= VM_PAGE_NULL
; m
= m
->next
)
5503 lck_spin_unlock(bucket_lock
);
5505 /* don't touch pageable memory while holding locks */
5506 info
[i
].hib_count
= bucket_count
;
5509 return vm_page_bucket_count
;
5511 #endif /* MACH_VM_DEBUG */