2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
79 #include <kern/ledger.h>
81 #include <vm/vm_init.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
86 #include <kern/misc_protos.h>
87 #include <zone_debug.h>
89 #include <pexpert/pexpert.h>
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94 #include <vm/vm_compressor.h>
96 #if CONFIG_PHANTOM_CACHE
97 #include <vm/vm_phantom_cache.h>
100 #include <IOKit/IOHibernatePrivate.h>
102 #include <sys/kdebug.h>
104 boolean_t hibernate_cleaning_in_progress
= FALSE
;
105 boolean_t vm_page_free_verify
= TRUE
;
107 uint32_t vm_lopage_free_count
= 0;
108 uint32_t vm_lopage_free_limit
= 0;
109 uint32_t vm_lopage_lowater
= 0;
110 boolean_t vm_lopage_refill
= FALSE
;
111 boolean_t vm_lopage_needed
= FALSE
;
113 lck_mtx_ext_t vm_page_queue_lock_ext
;
114 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
115 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
117 int speculative_age_index
= 0;
118 int speculative_steal_index
= 0;
119 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
122 __private_extern__
void vm_page_init_lck_grp(void);
124 static void vm_page_free_prepare(vm_page_t page
);
125 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
131 * Associated with page of user-allocatable memory is a
136 * These variables record the values returned by vm_page_bootstrap,
137 * for debugging purposes. The implementation of pmap_steal_memory
138 * and pmap_startup here also uses them internally.
141 vm_offset_t virtual_space_start
;
142 vm_offset_t virtual_space_end
;
143 uint32_t vm_page_pages
;
146 * The vm_page_lookup() routine, which provides for fast
147 * (virtual memory object, offset) to page lookup, employs
148 * the following hash table. The vm_page_{insert,remove}
149 * routines install and remove associations in the table.
150 * [This table is often called the virtual-to-physical,
154 vm_page_packed_t page_list
;
155 #if MACH_PAGE_HASH_STATS
156 int cur_count
; /* current count */
157 int hi_count
; /* high water mark */
158 #endif /* MACH_PAGE_HASH_STATS */
162 #define BUCKETS_PER_LOCK 16
164 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
165 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
166 unsigned int vm_page_hash_mask
; /* Mask for hash function */
167 unsigned int vm_page_hash_shift
; /* Shift for hash function */
168 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
169 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
171 lck_spin_t
*vm_page_bucket_locks
;
173 #if VM_PAGE_BUCKETS_CHECK
174 boolean_t vm_page_buckets_check_ready
= FALSE
;
175 #if VM_PAGE_FAKE_BUCKETS
176 vm_page_bucket_t
*vm_page_fake_buckets
; /* decoy buckets */
177 vm_map_offset_t vm_page_fake_buckets_start
, vm_page_fake_buckets_end
;
178 #endif /* VM_PAGE_FAKE_BUCKETS */
179 #endif /* VM_PAGE_BUCKETS_CHECK */
181 #if MACH_PAGE_HASH_STATS
182 /* This routine is only for debug. It is intended to be called by
183 * hand by a developer using a kernel debugger. This routine prints
184 * out vm_page_hash table statistics to the kernel debug console.
194 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
195 if (vm_page_buckets
[i
].hi_count
) {
197 highsum
+= vm_page_buckets
[i
].hi_count
;
198 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
199 maxdepth
= vm_page_buckets
[i
].hi_count
;
202 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
203 printf("Number used buckets: %d = %d%%\n",
204 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
205 printf("Number unused buckets: %d = %d%%\n",
206 vm_page_bucket_count
- numbuckets
,
207 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
208 printf("Sum of bucket max depth: %d\n", highsum
);
209 printf("Average bucket depth: %d.%2d\n",
210 highsum
/vm_page_bucket_count
,
211 highsum%vm_page_bucket_count
);
212 printf("Maximum bucket depth: %d\n", maxdepth
);
214 #endif /* MACH_PAGE_HASH_STATS */
217 * The virtual page size is currently implemented as a runtime
218 * variable, but is constant once initialized using vm_set_page_size.
219 * This initialization must be done in the machine-dependent
220 * bootstrap sequence, before calling other machine-independent
223 * All references to the virtual page size outside this
224 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
227 vm_size_t page_size
= PAGE_SIZE
;
228 vm_size_t page_mask
= PAGE_MASK
;
229 int page_shift
= PAGE_SHIFT
;
232 * Resident page structures are initialized from
233 * a template (see vm_page_alloc).
235 * When adding a new field to the virtual memory
236 * object structure, be sure to add initialization
237 * (see vm_page_bootstrap).
239 struct vm_page vm_page_template
;
241 vm_page_t vm_pages
= VM_PAGE_NULL
;
242 unsigned int vm_pages_count
= 0;
243 ppnum_t vm_page_lowest
= 0;
246 * Resident pages that represent real memory
247 * are allocated from a set of free lists,
250 unsigned int vm_colors
;
251 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
252 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
253 unsigned int vm_free_magazine_refill_limit
= 0;
254 queue_head_t vm_page_queue_free
[MAX_COLORS
];
255 unsigned int vm_page_free_wanted
;
256 unsigned int vm_page_free_wanted_privileged
;
257 unsigned int vm_page_free_count
;
258 unsigned int vm_page_fictitious_count
;
261 * Occasionally, the virtual memory system uses
262 * resident page structures that do not refer to
263 * real pages, for example to leave a page with
264 * important state information in the VP table.
266 * These page structures are allocated the way
267 * most other kernel structures are.
270 vm_locks_array_t vm_page_locks
;
271 decl_lck_mtx_data(,vm_page_alloc_lock
)
272 lck_mtx_ext_t vm_page_alloc_lock_ext
;
274 unsigned int io_throttle_zero_fill
;
276 unsigned int vm_page_local_q_count
= 0;
277 unsigned int vm_page_local_q_soft_limit
= 250;
278 unsigned int vm_page_local_q_hard_limit
= 500;
279 struct vplq
*vm_page_local_q
= NULL
;
281 /* N.B. Guard and fictitious pages must not
282 * be assigned a zero phys_page value.
285 * Fictitious pages don't have a physical address,
286 * but we must initialize phys_page to something.
287 * For debugging, this should be a strange value
288 * that the pmap module can recognize in assertions.
290 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
293 * Guard pages are not accessible so they don't
294 * need a physical address, but we need to enter
296 * Let's make it recognizable and make sure that
297 * we don't use a real physical page with that
300 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
303 * Resident page structures are also chained on
304 * queues that are used by the page replacement
305 * system (pageout daemon). These queues are
306 * defined here, but are shared by the pageout
307 * module. The inactive queue is broken into
308 * file backed and anonymous for convenience as the
309 * pageout daemon often assignes a higher
310 * importance to anonymous pages (less likely to pick)
312 queue_head_t vm_page_queue_active
;
313 queue_head_t vm_page_queue_inactive
;
314 queue_head_t vm_page_queue_anonymous
; /* inactive memory queue for anonymous pages */
315 queue_head_t vm_page_queue_throttled
;
317 unsigned int vm_page_active_count
;
318 unsigned int vm_page_inactive_count
;
319 unsigned int vm_page_anonymous_count
;
320 unsigned int vm_page_throttled_count
;
321 unsigned int vm_page_speculative_count
;
322 unsigned int vm_page_wire_count
;
323 unsigned int vm_page_wire_count_initial
;
324 unsigned int vm_page_gobble_count
= 0;
326 #define VM_PAGE_WIRE_COUNT_WARNING 0
327 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
329 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
330 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
331 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
333 unsigned int vm_page_xpmapped_external_count
= 0;
334 unsigned int vm_page_external_count
= 0;
335 unsigned int vm_page_internal_count
= 0;
336 unsigned int vm_page_pageable_external_count
= 0;
337 unsigned int vm_page_pageable_internal_count
= 0;
339 #if DEVELOPMENT || DEBUG
340 unsigned int vm_page_speculative_recreated
= 0;
341 unsigned int vm_page_speculative_created
= 0;
342 unsigned int vm_page_speculative_used
= 0;
345 queue_head_t vm_page_queue_cleaned
;
347 unsigned int vm_page_cleaned_count
= 0;
348 unsigned int vm_pageout_enqueued_cleaned
= 0;
350 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
351 ppnum_t max_valid_low_ppnum
= 0xffffffff;
355 * Several page replacement parameters are also
356 * shared with this module, so that page allocation
357 * (done here in vm_page_alloc) can trigger the
360 unsigned int vm_page_free_target
= 0;
361 unsigned int vm_page_free_min
= 0;
362 unsigned int vm_page_throttle_limit
= 0;
363 unsigned int vm_page_inactive_target
= 0;
364 unsigned int vm_page_anonymous_min
= 0;
365 unsigned int vm_page_inactive_min
= 0;
366 unsigned int vm_page_free_reserved
= 0;
367 unsigned int vm_page_throttle_count
= 0;
371 * The VM system has a couple of heuristics for deciding
372 * that pages are "uninteresting" and should be placed
373 * on the inactive queue as likely candidates for replacement.
374 * These variables let the heuristics be controlled at run-time
375 * to make experimentation easier.
378 boolean_t vm_page_deactivate_hint
= TRUE
;
380 struct vm_page_stats_reusable vm_page_stats_reusable
;
385 * Sets the page size, perhaps based upon the memory
386 * size. Must be called before any use of page-size
387 * dependent functions.
389 * Sets page_shift and page_mask from page_size.
392 vm_set_page_size(void)
394 page_size
= PAGE_SIZE
;
395 page_mask
= PAGE_MASK
;
396 page_shift
= PAGE_SHIFT
;
398 if ((page_mask
& page_size
) != 0)
399 panic("vm_set_page_size: page size not a power of two");
401 for (page_shift
= 0; ; page_shift
++)
402 if ((1U << page_shift
) == page_size
)
406 #define COLOR_GROUPS_TO_STEAL 4
409 /* Called once during statup, once the cache geometry is known.
412 vm_page_set_colors( void )
414 unsigned int n
, override
;
416 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
418 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
419 n
= vm_cache_geometry_colors
;
420 else n
= DEFAULT_COLORS
; /* use default if all else fails */
424 if ( n
> MAX_COLORS
)
427 /* the count must be a power of 2 */
428 if ( ( n
& (n
- 1)) != 0 )
429 panic("vm_page_set_colors");
432 vm_color_mask
= n
- 1;
434 vm_free_magazine_refill_limit
= vm_colors
* COLOR_GROUPS_TO_STEAL
;
438 lck_grp_t vm_page_lck_grp_free
;
439 lck_grp_t vm_page_lck_grp_queue
;
440 lck_grp_t vm_page_lck_grp_local
;
441 lck_grp_t vm_page_lck_grp_purge
;
442 lck_grp_t vm_page_lck_grp_alloc
;
443 lck_grp_t vm_page_lck_grp_bucket
;
444 lck_grp_attr_t vm_page_lck_grp_attr
;
445 lck_attr_t vm_page_lck_attr
;
448 __private_extern__
void
449 vm_page_init_lck_grp(void)
452 * initialze the vm_page lock world
454 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
455 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
456 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
457 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
458 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
459 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
460 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
461 lck_attr_setdefault(&vm_page_lck_attr
);
462 lck_mtx_init_ext(&vm_page_alloc_lock
, &vm_page_alloc_lock_ext
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
464 vm_compressor_init_locks();
468 vm_page_init_local_q()
470 unsigned int num_cpus
;
472 struct vplq
*t_local_q
;
474 num_cpus
= ml_get_max_cpus();
477 * no point in this for a uni-processor system
480 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
482 for (i
= 0; i
< num_cpus
; i
++) {
485 lq
= &t_local_q
[i
].vpl_un
.vpl
;
486 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
487 queue_init(&lq
->vpl_queue
);
489 lq
->vpl_internal_count
= 0;
490 lq
->vpl_external_count
= 0;
492 vm_page_local_q_count
= num_cpus
;
494 vm_page_local_q
= (struct vplq
*)t_local_q
;
502 * Initializes the resident memory module.
504 * Allocates memory for the page cells, and
505 * for the object/offset-to-page hash table headers.
506 * Each page cell is initialized and placed on the free list.
507 * Returns the range of available kernel virtual memory.
515 register vm_page_t m
;
522 * Initialize the vm_page template.
525 m
= &vm_page_template
;
526 bzero(m
, sizeof (*m
));
528 m
->pageq
.next
= NULL
;
529 m
->pageq
.prev
= NULL
;
530 m
->listq
.next
= NULL
;
531 m
->listq
.prev
= NULL
;
532 m
->next_m
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
534 m
->object
= VM_OBJECT_NULL
; /* reset later */
535 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
541 m
->pageout_queue
= FALSE
;
542 m
->speculative
= FALSE
;
545 m
->reference
= FALSE
;
548 m
->throttled
= FALSE
;
549 m
->__unused_pageq_bits
= 0;
551 m
->phys_page
= 0; /* reset later */
557 m
->fictitious
= FALSE
;
566 m
->clustered
= FALSE
;
567 m
->overwriting
= FALSE
;
570 m
->encrypted
= FALSE
;
571 m
->encrypted_cleaning
= FALSE
;
572 m
->cs_validated
= FALSE
;
573 m
->cs_tainted
= FALSE
;
578 m
->compressor
= FALSE
;
579 m
->written_by_kernel
= FALSE
;
580 m
->__unused_object_bits
= 0;
583 * Initialize the page queues.
585 vm_page_init_lck_grp();
587 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
588 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
589 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
591 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
594 purgeable_queues
[i
].token_q_head
= 0;
595 purgeable_queues
[i
].token_q_tail
= 0;
596 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
597 queue_init(&purgeable_queues
[i
].objq
[group
]);
599 purgeable_queues
[i
].type
= i
;
600 purgeable_queues
[i
].new_pages
= 0;
602 purgeable_queues
[i
].debug_count_tokens
= 0;
603 purgeable_queues
[i
].debug_count_objects
= 0;
606 purgeable_nonvolatile_count
= 0;
607 queue_init(&purgeable_nonvolatile_queue
);
609 for (i
= 0; i
< MAX_COLORS
; i
++ )
610 queue_init(&vm_page_queue_free
[i
]);
612 queue_init(&vm_lopage_queue_free
);
613 queue_init(&vm_page_queue_active
);
614 queue_init(&vm_page_queue_inactive
);
615 queue_init(&vm_page_queue_cleaned
);
616 queue_init(&vm_page_queue_throttled
);
617 queue_init(&vm_page_queue_anonymous
);
619 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
620 queue_init(&vm_page_queue_speculative
[i
].age_q
);
622 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
623 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
625 vm_page_free_wanted
= 0;
626 vm_page_free_wanted_privileged
= 0;
628 vm_page_set_colors();
632 * Steal memory for the map and zone subsystems.
634 kernel_debug_string("zone_steal_memory");
636 kernel_debug_string("vm_map_steal_memory");
637 vm_map_steal_memory();
640 * Allocate (and initialize) the virtual-to-physical
641 * table hash buckets.
643 * The number of buckets should be a power of two to
644 * get a good hash function. The following computation
645 * chooses the first power of two that is greater
646 * than the number of physical pages in the system.
649 if (vm_page_bucket_count
== 0) {
650 unsigned int npages
= pmap_free_pages();
652 vm_page_bucket_count
= 1;
653 while (vm_page_bucket_count
< npages
)
654 vm_page_bucket_count
<<= 1;
656 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
658 vm_page_hash_mask
= vm_page_bucket_count
- 1;
661 * Calculate object shift value for hashing algorithm:
662 * O = log2(sizeof(struct vm_object))
663 * B = log2(vm_page_bucket_count)
664 * hash shifts the object left by
667 size
= vm_page_bucket_count
;
668 for (log1
= 0; size
> 1; log1
++)
670 size
= sizeof(struct vm_object
);
671 for (log2
= 0; size
> 1; log2
++)
673 vm_page_hash_shift
= log1
/2 - log2
+ 1;
675 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
676 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
677 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
679 if (vm_page_hash_mask
& vm_page_bucket_count
)
680 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
682 #if VM_PAGE_BUCKETS_CHECK
683 #if VM_PAGE_FAKE_BUCKETS
685 * Allocate a decoy set of page buckets, to detect
686 * any stomping there.
688 vm_page_fake_buckets
= (vm_page_bucket_t
*)
689 pmap_steal_memory(vm_page_bucket_count
*
690 sizeof(vm_page_bucket_t
));
691 vm_page_fake_buckets_start
= (vm_map_offset_t
) vm_page_fake_buckets
;
692 vm_page_fake_buckets_end
=
693 vm_map_round_page((vm_page_fake_buckets_start
+
694 (vm_page_bucket_count
*
695 sizeof (vm_page_bucket_t
))),
698 for (cp
= (char *)vm_page_fake_buckets_start
;
699 cp
< (char *)vm_page_fake_buckets_end
;
703 #endif /* VM_PAGE_FAKE_BUCKETS */
704 #endif /* VM_PAGE_BUCKETS_CHECK */
706 kernel_debug_string("vm_page_buckets");
707 vm_page_buckets
= (vm_page_bucket_t
*)
708 pmap_steal_memory(vm_page_bucket_count
*
709 sizeof(vm_page_bucket_t
));
711 kernel_debug_string("vm_page_bucket_locks");
712 vm_page_bucket_locks
= (lck_spin_t
*)
713 pmap_steal_memory(vm_page_bucket_lock_count
*
716 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
717 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
719 bucket
->page_list
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
720 #if MACH_PAGE_HASH_STATS
721 bucket
->cur_count
= 0;
722 bucket
->hi_count
= 0;
723 #endif /* MACH_PAGE_HASH_STATS */
726 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
727 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
729 #if VM_PAGE_BUCKETS_CHECK
730 vm_page_buckets_check_ready
= TRUE
;
731 #endif /* VM_PAGE_BUCKETS_CHECK */
734 * Machine-dependent code allocates the resident page table.
735 * It uses vm_page_init to initialize the page frames.
736 * The code also returns to us the virtual space available
737 * to the kernel. We don't trust the pmap module
738 * to get the alignment right.
741 kernel_debug_string("pmap_startup");
742 pmap_startup(&virtual_space_start
, &virtual_space_end
);
743 virtual_space_start
= round_page(virtual_space_start
);
744 virtual_space_end
= trunc_page(virtual_space_end
);
746 *startp
= virtual_space_start
;
747 *endp
= virtual_space_end
;
750 * Compute the initial "wire" count.
751 * Up until now, the pages which have been set aside are not under
752 * the VM system's control, so although they aren't explicitly
753 * wired, they nonetheless can't be moved. At this moment,
754 * all VM managed pages are "free", courtesy of pmap_startup.
756 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
757 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
758 vm_page_wire_count_initial
= vm_page_wire_count
;
760 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
761 vm_page_free_count
, vm_page_wire_count
);
763 kernel_debug_string("vm_page_bootstrap complete");
764 simple_lock_init(&vm_paging_lock
, 0);
767 #ifndef MACHINE_PAGES
769 * We implement pmap_steal_memory and pmap_startup with the help
770 * of two simpler functions, pmap_virtual_space and pmap_next_page.
777 vm_offset_t addr
, vaddr
;
781 * We round the size to a round multiple.
784 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
787 * If this is the first call to pmap_steal_memory,
788 * we have to initialize ourself.
791 if (virtual_space_start
== virtual_space_end
) {
792 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
795 * The initial values must be aligned properly, and
796 * we don't trust the pmap module to do it right.
799 virtual_space_start
= round_page(virtual_space_start
);
800 virtual_space_end
= trunc_page(virtual_space_end
);
804 * Allocate virtual memory for this request.
807 addr
= virtual_space_start
;
808 virtual_space_start
+= size
;
810 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
813 * Allocate and map physical pages to back new virtual pages.
816 for (vaddr
= round_page(addr
);
818 vaddr
+= PAGE_SIZE
) {
820 if (!pmap_next_page_hi(&phys_page
))
821 panic("pmap_steal_memory");
824 * XXX Logically, these mappings should be wired,
825 * but some pmap modules barf if they are.
827 #if defined(__LP64__)
828 pmap_pre_expand(kernel_pmap
, vaddr
);
831 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
832 VM_PROT_READ
|VM_PROT_WRITE
, VM_PROT_NONE
,
833 VM_WIMG_USE_DEFAULT
, FALSE
);
835 * Account for newly stolen memory
837 vm_page_wire_count
++;
841 return (void *) addr
;
844 void vm_page_release_startup(vm_page_t mem
);
850 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
855 #if defined(__LP64__)
857 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
859 assert(sizeof(struct vm_page
) == 64);
862 * make sure we are aligned on a 64 byte boundary
863 * for VM_PAGE_PACK_PTR (it clips off the low-order
864 * 6 bits of the pointer)
866 if (virtual_space_start
!= virtual_space_end
)
867 virtual_space_start
= round_page(virtual_space_start
);
871 * We calculate how many page frames we will have
872 * and then allocate the page structures in one chunk.
875 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
876 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
877 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
879 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
882 * Initialize the page frames.
884 kernel_debug_string("Initialize the page frames");
885 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
886 if (!pmap_next_page(&phys_page
))
888 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
889 vm_page_lowest
= phys_page
;
891 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
895 vm_pages_count
= pages_initialized
;
897 #if defined(__LP64__)
899 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[0])) != &vm_pages
[0])
900 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages
[0]);
902 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[vm_pages_count
-1])) != &vm_pages
[vm_pages_count
-1])
903 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages
[vm_pages_count
-1]);
905 kernel_debug_string("page fill/release");
907 * Check if we want to initialize pages to a known value
909 fill
= 0; /* Assume no fill */
910 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
912 /* This slows down booting the DEBUG kernel, particularly on
913 * large memory systems, but is worthwhile in deterministically
914 * trapping uninitialized memory usage.
918 fillval
= 0xDEB8F177;
922 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval
);
923 // -debug code remove
924 if (2 == vm_himemory_mode
) {
925 // free low -> high so high is preferred
926 for (i
= 1; i
<= pages_initialized
; i
++) {
927 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
928 vm_page_release_startup(&vm_pages
[i
- 1]);
932 // debug code remove-
935 * Release pages in reverse order so that physical pages
936 * initially get allocated in ascending addresses. This keeps
937 * the devices (which must address physical memory) happy if
938 * they require several consecutive pages.
940 for (i
= pages_initialized
; i
> 0; i
--) {
941 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
942 vm_page_release_startup(&vm_pages
[i
- 1]);
945 VM_CHECK_MEMORYSTATUS
;
949 vm_page_t xx
, xxo
, xxl
;
952 j
= 0; /* (BRINGUP) */
955 for( i
= 0; i
< vm_colors
; i
++ ) {
956 queue_iterate(&vm_page_queue_free
[i
],
959 pageq
) { /* BRINGUP */
961 if(j
> vm_page_free_count
) { /* (BRINGUP) */
962 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
965 l
= vm_page_free_count
- j
; /* (BRINGUP) */
966 k
= 0; /* (BRINGUP) */
968 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
970 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
972 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
973 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
974 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
982 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
983 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
990 * We have to re-align virtual_space_start,
991 * because pmap_steal_memory has been using it.
994 virtual_space_start
= round_page(virtual_space_start
);
996 *startp
= virtual_space_start
;
997 *endp
= virtual_space_end
;
999 #endif /* MACHINE_PAGES */
1002 * Routine: vm_page_module_init
1004 * Second initialization pass, to be done after
1005 * the basic VM system is ready.
1008 vm_page_module_init(void)
1010 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
1011 0, PAGE_SIZE
, "vm pages");
1014 zone_debug_disable(vm_page_zone
);
1015 #endif /* ZONE_DEBUG */
1017 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
1018 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
1019 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
1020 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
1021 zone_change(vm_page_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
1023 * Adjust zone statistics to account for the real pages allocated
1024 * in vm_page_create(). [Q: is this really what we want?]
1026 vm_page_zone
->count
+= vm_page_pages
;
1027 vm_page_zone
->sum_count
+= vm_page_pages
;
1028 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
1032 * Routine: vm_page_create
1034 * After the VM system is up, machine-dependent code
1035 * may stumble across more physical memory. For example,
1036 * memory that it was reserving for a frame buffer.
1037 * vm_page_create turns this memory into available pages.
1048 for (phys_page
= start
;
1051 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
1053 vm_page_more_fictitious();
1055 m
->fictitious
= FALSE
;
1056 pmap_clear_noencrypt(phys_page
);
1066 * Distributes the object/offset key pair among hash buckets.
1068 * NOTE: The bucket count must be a power of 2
1070 #define vm_page_hash(object, offset) (\
1071 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1072 & vm_page_hash_mask)
1076 * vm_page_insert: [ internal use only ]
1078 * Inserts the given mem entry into the object/object-page
1079 * table and object list.
1081 * The object must be locked.
1087 vm_object_offset_t offset
)
1089 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
, FALSE
);
1093 vm_page_insert_internal(
1096 vm_object_offset_t offset
,
1097 boolean_t queues_lock_held
,
1098 boolean_t insert_in_hash
,
1099 boolean_t batch_pmap_op
)
1101 vm_page_bucket_t
*bucket
;
1102 lck_spin_t
*bucket_lock
;
1107 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1108 object
, offset
, mem
, 0,0);
1111 * we may not hold the page queue lock
1112 * so this check isn't safe to make
1117 assert(page_aligned(offset
));
1119 /* the vm_submap_object is only a placeholder for submaps */
1120 assert(object
!= vm_submap_object
);
1122 vm_object_lock_assert_exclusive(object
);
1124 lck_mtx_assert(&vm_page_queue_lock
,
1125 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1126 : LCK_MTX_ASSERT_NOTOWNED
);
1129 if (insert_in_hash
== TRUE
) {
1130 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1131 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1132 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1133 "already in (obj=%p,off=0x%llx)",
1134 mem
, object
, offset
, mem
->object
, mem
->offset
);
1136 assert(!object
->internal
|| offset
< object
->vo_size
);
1138 /* only insert "pageout" pages into "pageout" objects,
1139 * and normal pages into normal objects */
1140 assert(object
->pageout
== mem
->pageout
);
1142 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1145 * Record the object/offset pair in this page
1148 mem
->object
= object
;
1149 mem
->offset
= offset
;
1152 * Insert it into the object_object/offset hash table
1154 hash_id
= vm_page_hash(object
, offset
);
1155 bucket
= &vm_page_buckets
[hash_id
];
1156 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1158 lck_spin_lock(bucket_lock
);
1160 mem
->next_m
= bucket
->page_list
;
1161 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1162 assert(mem
== VM_PAGE_UNPACK_PTR(bucket
->page_list
));
1164 #if MACH_PAGE_HASH_STATS
1165 if (++bucket
->cur_count
> bucket
->hi_count
)
1166 bucket
->hi_count
= bucket
->cur_count
;
1167 #endif /* MACH_PAGE_HASH_STATS */
1169 lck_spin_unlock(bucket_lock
);
1173 unsigned int cache_attr
;
1175 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1177 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1178 PMAP_SET_CACHE_ATTR(mem
, object
, cache_attr
, batch_pmap_op
);
1182 * Now link into the object's list of backed pages.
1184 VM_PAGE_INSERT(mem
, object
);
1188 * Show that the object has one more resident page.
1191 object
->resident_page_count
++;
1192 if (VM_PAGE_WIRED(mem
)) {
1193 object
->wired_page_count
++;
1195 assert(object
->resident_page_count
>= object
->wired_page_count
);
1197 if (object
->internal
) {
1198 OSAddAtomic(1, &vm_page_internal_count
);
1200 OSAddAtomic(1, &vm_page_external_count
);
1204 * It wouldn't make sense to insert a "reusable" page in
1205 * an object (the page would have been marked "reusable" only
1206 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1207 * in the object at that time).
1208 * But a page could be inserted in a "all_reusable" object, if
1209 * something faults it in (a vm_read() from another task or a
1210 * "use-after-free" issue in user space, for example). It can
1211 * also happen if we're relocating a page from that object to
1212 * a different physical page during a physically-contiguous
1215 assert(!mem
->reusable
);
1216 if (mem
->object
->all_reusable
) {
1217 OSAddAtomic(+1, &vm_page_stats_reusable
.reusable_count
);
1220 if (object
->purgable
== VM_PURGABLE_DENY
) {
1223 owner
= object
->vo_purgeable_owner
;
1226 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1227 VM_PAGE_WIRED(mem
))) {
1228 /* more non-volatile bytes */
1229 ledger_credit(owner
->ledger
,
1230 task_ledgers
.purgeable_nonvolatile
,
1232 /* more footprint */
1233 ledger_credit(owner
->ledger
,
1234 task_ledgers
.phys_footprint
,
1238 (object
->purgable
== VM_PURGABLE_VOLATILE
||
1239 object
->purgable
== VM_PURGABLE_EMPTY
)) {
1240 assert(! VM_PAGE_WIRED(mem
));
1241 /* more volatile bytes */
1242 ledger_credit(owner
->ledger
,
1243 task_ledgers
.purgeable_volatile
,
1247 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1248 if (VM_PAGE_WIRED(mem
)) {
1249 OSAddAtomic(+1, &vm_page_purgeable_wired_count
);
1251 OSAddAtomic(+1, &vm_page_purgeable_count
);
1253 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1256 * This page belongs to a purged VM object but hasn't
1257 * been purged (because it was "busy").
1258 * It's in the "throttled" queue and hence not
1259 * visible to vm_pageout_scan(). Move it to a pageable
1260 * queue, so that it can eventually be reclaimed, instead
1261 * of lingering in the "empty" object.
1263 if (queues_lock_held
== FALSE
)
1264 vm_page_lockspin_queues();
1265 vm_page_deactivate(mem
);
1266 if (queues_lock_held
== FALSE
)
1267 vm_page_unlock_queues();
1270 #if VM_OBJECT_TRACKING_OP_MODIFIED
1271 if (vm_object_tracking_inited
&&
1273 object
->resident_page_count
== 0 &&
1274 object
->pager
== NULL
&&
1275 object
->shadow
!= NULL
&&
1276 object
->shadow
->copy
== object
) {
1277 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
1280 numsaved
=OSBacktrace(bt
, VM_OBJECT_TRACKING_BTDEPTH
);
1281 btlog_add_entry(vm_object_tracking_btlog
,
1283 VM_OBJECT_TRACKING_OP_MODIFIED
,
1287 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1293 * Exactly like vm_page_insert, except that we first
1294 * remove any existing page at the given offset in object.
1296 * The object must be locked.
1300 register vm_page_t mem
,
1301 register vm_object_t object
,
1302 register vm_object_offset_t offset
)
1304 vm_page_bucket_t
*bucket
;
1305 vm_page_t found_m
= VM_PAGE_NULL
;
1306 lck_spin_t
*bucket_lock
;
1311 * we don't hold the page queue lock
1312 * so this check isn't safe to make
1316 vm_object_lock_assert_exclusive(object
);
1317 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1318 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1319 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1320 "already in (obj=%p,off=0x%llx)",
1321 mem
, object
, offset
, mem
->object
, mem
->offset
);
1322 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1325 * Record the object/offset pair in this page
1328 mem
->object
= object
;
1329 mem
->offset
= offset
;
1332 * Insert it into the object_object/offset hash table,
1333 * replacing any page that might have been there.
1336 hash_id
= vm_page_hash(object
, offset
);
1337 bucket
= &vm_page_buckets
[hash_id
];
1338 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1340 lck_spin_lock(bucket_lock
);
1342 if (bucket
->page_list
) {
1343 vm_page_packed_t
*mp
= &bucket
->page_list
;
1344 vm_page_t m
= VM_PAGE_UNPACK_PTR(*mp
);
1347 if (m
->object
== object
&& m
->offset
== offset
) {
1349 * Remove old page from hash list
1358 } while ((m
= VM_PAGE_UNPACK_PTR(*mp
)));
1360 mem
->next_m
= bucket
->page_list
;
1362 mem
->next_m
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
1365 * insert new page at head of hash list
1367 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1370 lck_spin_unlock(bucket_lock
);
1374 * there was already a page at the specified
1375 * offset for this object... remove it from
1376 * the object and free it back to the free list
1378 vm_page_free_unlocked(found_m
, FALSE
);
1380 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
, FALSE
);
1384 * vm_page_remove: [ internal use only ]
1386 * Removes the given mem entry from the object/offset-page
1387 * table and the object page list.
1389 * The object must be locked.
1395 boolean_t remove_from_hash
)
1397 vm_page_bucket_t
*bucket
;
1399 lck_spin_t
*bucket_lock
;
1404 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1405 mem
->object
, mem
->offset
,
1408 vm_object_lock_assert_exclusive(mem
->object
);
1409 assert(mem
->tabled
);
1410 assert(!mem
->cleaning
);
1411 assert(!mem
->laundry
);
1414 * we don't hold the page queue lock
1415 * so this check isn't safe to make
1419 if (remove_from_hash
== TRUE
) {
1421 * Remove from the object_object/offset hash table
1423 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1424 bucket
= &vm_page_buckets
[hash_id
];
1425 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1427 lck_spin_lock(bucket_lock
);
1429 if ((this = VM_PAGE_UNPACK_PTR(bucket
->page_list
)) == mem
) {
1430 /* optimize for common case */
1432 bucket
->page_list
= mem
->next_m
;
1434 vm_page_packed_t
*prev
;
1436 for (prev
= &this->next_m
;
1437 (this = VM_PAGE_UNPACK_PTR(*prev
)) != mem
;
1438 prev
= &this->next_m
)
1440 *prev
= this->next_m
;
1442 #if MACH_PAGE_HASH_STATS
1443 bucket
->cur_count
--;
1444 #endif /* MACH_PAGE_HASH_STATS */
1445 mem
->hashed
= FALSE
;
1446 lck_spin_unlock(bucket_lock
);
1449 * Now remove from the object's list of backed pages.
1452 VM_PAGE_REMOVE(mem
);
1455 * And show that the object has one fewer resident
1459 assert(mem
->object
->resident_page_count
> 0);
1460 mem
->object
->resident_page_count
--;
1462 if (mem
->object
->internal
) {
1464 assert(vm_page_internal_count
);
1467 OSAddAtomic(-1, &vm_page_internal_count
);
1469 assert(vm_page_external_count
);
1470 OSAddAtomic(-1, &vm_page_external_count
);
1472 if (mem
->xpmapped
) {
1473 assert(vm_page_xpmapped_external_count
);
1474 OSAddAtomic(-1, &vm_page_xpmapped_external_count
);
1477 if (!mem
->object
->internal
&& (mem
->object
->objq
.next
|| mem
->object
->objq
.prev
)) {
1478 if (mem
->object
->resident_page_count
== 0)
1479 vm_object_cache_remove(mem
->object
);
1482 if (VM_PAGE_WIRED(mem
)) {
1483 assert(mem
->object
->wired_page_count
> 0);
1484 mem
->object
->wired_page_count
--;
1486 assert(mem
->object
->resident_page_count
>=
1487 mem
->object
->wired_page_count
);
1488 if (mem
->reusable
) {
1489 assert(mem
->object
->reusable_page_count
> 0);
1490 mem
->object
->reusable_page_count
--;
1491 assert(mem
->object
->reusable_page_count
<=
1492 mem
->object
->resident_page_count
);
1493 mem
->reusable
= FALSE
;
1494 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1495 vm_page_stats_reusable
.reused_remove
++;
1496 } else if (mem
->object
->all_reusable
) {
1497 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1498 vm_page_stats_reusable
.reused_remove
++;
1501 if (mem
->object
->purgable
== VM_PURGABLE_DENY
) {
1504 owner
= mem
->object
->vo_purgeable_owner
;
1507 (mem
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1508 VM_PAGE_WIRED(mem
))) {
1509 /* less non-volatile bytes */
1510 ledger_debit(owner
->ledger
,
1511 task_ledgers
.purgeable_nonvolatile
,
1513 /* less footprint */
1514 ledger_debit(owner
->ledger
,
1515 task_ledgers
.phys_footprint
,
1518 (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
1519 mem
->object
->purgable
== VM_PURGABLE_EMPTY
)) {
1520 assert(! VM_PAGE_WIRED(mem
));
1521 /* less volatile bytes */
1522 ledger_debit(owner
->ledger
,
1523 task_ledgers
.purgeable_volatile
,
1526 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1527 if (VM_PAGE_WIRED(mem
)) {
1528 assert(vm_page_purgeable_wired_count
> 0);
1529 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1531 assert(vm_page_purgeable_count
> 0);
1532 OSAddAtomic(-1, &vm_page_purgeable_count
);
1535 if (mem
->object
->set_cache_attr
== TRUE
)
1536 pmap_set_cache_attributes(mem
->phys_page
, 0);
1538 mem
->tabled
= FALSE
;
1539 mem
->object
= VM_OBJECT_NULL
;
1540 mem
->offset
= (vm_object_offset_t
) -1;
1547 * Returns the page associated with the object/offset
1548 * pair specified; if none is found, VM_PAGE_NULL is returned.
1550 * The object must be locked. No side effects.
1553 unsigned long vm_page_lookup_hint
= 0;
1554 unsigned long vm_page_lookup_hint_next
= 0;
1555 unsigned long vm_page_lookup_hint_prev
= 0;
1556 unsigned long vm_page_lookup_hint_miss
= 0;
1557 unsigned long vm_page_lookup_bucket_NULL
= 0;
1558 unsigned long vm_page_lookup_miss
= 0;
1564 vm_object_offset_t offset
)
1567 vm_page_bucket_t
*bucket
;
1569 lck_spin_t
*bucket_lock
;
1572 vm_object_lock_assert_held(object
);
1573 mem
= object
->memq_hint
;
1575 if (mem
!= VM_PAGE_NULL
) {
1576 assert(mem
->object
== object
);
1578 if (mem
->offset
== offset
) {
1579 vm_page_lookup_hint
++;
1582 qe
= queue_next(&mem
->listq
);
1584 if (! queue_end(&object
->memq
, qe
)) {
1585 vm_page_t next_page
;
1587 next_page
= (vm_page_t
) qe
;
1588 assert(next_page
->object
== object
);
1590 if (next_page
->offset
== offset
) {
1591 vm_page_lookup_hint_next
++;
1592 object
->memq_hint
= next_page
; /* new hint */
1596 qe
= queue_prev(&mem
->listq
);
1598 if (! queue_end(&object
->memq
, qe
)) {
1599 vm_page_t prev_page
;
1601 prev_page
= (vm_page_t
) qe
;
1602 assert(prev_page
->object
== object
);
1604 if (prev_page
->offset
== offset
) {
1605 vm_page_lookup_hint_prev
++;
1606 object
->memq_hint
= prev_page
; /* new hint */
1612 * Search the hash table for this object/offset pair
1614 hash_id
= vm_page_hash(object
, offset
);
1615 bucket
= &vm_page_buckets
[hash_id
];
1618 * since we hold the object lock, we are guaranteed that no
1619 * new pages can be inserted into this object... this in turn
1620 * guarantess that the page we're looking for can't exist
1621 * if the bucket it hashes to is currently NULL even when looked
1622 * at outside the scope of the hash bucket lock... this is a
1623 * really cheap optimiztion to avoid taking the lock
1625 if (!bucket
->page_list
) {
1626 vm_page_lookup_bucket_NULL
++;
1628 return (VM_PAGE_NULL
);
1630 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1632 lck_spin_lock(bucket_lock
);
1634 for (mem
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); mem
!= VM_PAGE_NULL
; mem
= VM_PAGE_UNPACK_PTR(mem
->next_m
)) {
1637 * we don't hold the page queue lock
1638 * so this check isn't safe to make
1642 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1645 lck_spin_unlock(bucket_lock
);
1647 if (mem
!= VM_PAGE_NULL
) {
1648 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1649 vm_page_lookup_hint_miss
++;
1651 assert(mem
->object
== object
);
1652 object
->memq_hint
= mem
;
1654 vm_page_lookup_miss
++;
1663 * Move the given memory entry from its
1664 * current object to the specified target object/offset.
1666 * The object must be locked.
1670 register vm_page_t mem
,
1671 register vm_object_t new_object
,
1672 vm_object_offset_t new_offset
,
1673 boolean_t encrypted_ok
)
1675 boolean_t internal_to_external
, external_to_internal
;
1677 assert(mem
->object
!= new_object
);
1681 * The encryption key is based on the page's memory object
1682 * (aka "pager") and paging offset. Moving the page to
1683 * another VM object changes its "pager" and "paging_offset"
1684 * so it has to be decrypted first, or we would lose the key.
1686 * One exception is VM object collapsing, where we transfer pages
1687 * from one backing object to its parent object. This operation also
1688 * transfers the paging information, so the <pager,paging_offset> info
1689 * should remain consistent. The caller (vm_object_do_collapse())
1690 * sets "encrypted_ok" in this case.
1692 if (!encrypted_ok
&& mem
->encrypted
) {
1693 panic("vm_page_rename: page %p is encrypted\n", mem
);
1697 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1698 new_object
, new_offset
,
1702 * Changes to mem->object require the page lock because
1703 * the pageout daemon uses that lock to get the object.
1705 vm_page_lockspin_queues();
1707 internal_to_external
= FALSE
;
1708 external_to_internal
= FALSE
;
1712 * it's much easier to get the vm_page_pageable_xxx accounting correct
1713 * if we first move the page to the active queue... it's going to end
1714 * up there anyway, and we don't do vm_page_rename's frequently enough
1715 * for this to matter.
1717 VM_PAGE_QUEUES_REMOVE(mem
);
1718 vm_page_activate(mem
);
1720 if (mem
->active
|| mem
->inactive
|| mem
->speculative
) {
1721 if (mem
->object
->internal
&& !new_object
->internal
) {
1722 internal_to_external
= TRUE
;
1724 if (!mem
->object
->internal
&& new_object
->internal
) {
1725 external_to_internal
= TRUE
;
1729 vm_page_remove(mem
, TRUE
);
1730 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
, FALSE
);
1732 if (internal_to_external
) {
1733 vm_page_pageable_internal_count
--;
1734 vm_page_pageable_external_count
++;
1735 } else if (external_to_internal
) {
1736 vm_page_pageable_external_count
--;
1737 vm_page_pageable_internal_count
++;
1740 vm_page_unlock_queues();
1746 * Initialize the fields in a new page.
1747 * This takes a structure with random values and initializes it
1748 * so that it can be given to vm_page_release or vm_page_insert.
1759 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
1760 if (!(pmap_valid_page(phys_page
))) {
1761 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
1765 *mem
= vm_page_template
;
1766 mem
->phys_page
= phys_page
;
1769 * we're leaving this turned off for now... currently pages
1770 * come off the free list and are either immediately dirtied/referenced
1771 * due to zero-fill or COW faults, or are used to read or write files...
1772 * in the file I/O case, the UPL mechanism takes care of clearing
1773 * the state of the HW ref/mod bits in a somewhat fragile way.
1774 * Since we may change the way this works in the future (to toughen it up),
1775 * I'm leaving this as a reminder of where these bits could get cleared
1779 * make sure both the h/w referenced and modified bits are
1780 * clear at this point... we are especially dependent on
1781 * not finding a 'stale' h/w modified in a number of spots
1782 * once this page goes back into use
1784 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
1786 mem
->lopage
= lopage
;
1790 * vm_page_grab_fictitious:
1792 * Remove a fictitious page from the free list.
1793 * Returns VM_PAGE_NULL if there are no free pages.
1795 int c_vm_page_grab_fictitious
= 0;
1796 int c_vm_page_grab_fictitious_failed
= 0;
1797 int c_vm_page_release_fictitious
= 0;
1798 int c_vm_page_more_fictitious
= 0;
1801 vm_page_grab_fictitious_common(
1806 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
1808 vm_page_init(m
, phys_addr
, FALSE
);
1809 m
->fictitious
= TRUE
;
1811 c_vm_page_grab_fictitious
++;
1813 c_vm_page_grab_fictitious_failed
++;
1819 vm_page_grab_fictitious(void)
1821 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1825 vm_page_grab_guard(void)
1827 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1832 * vm_page_release_fictitious:
1834 * Release a fictitious page to the zone pool
1837 vm_page_release_fictitious(
1841 assert(m
->fictitious
);
1842 assert(m
->phys_page
== vm_page_fictitious_addr
||
1843 m
->phys_page
== vm_page_guard_addr
);
1845 c_vm_page_release_fictitious
++;
1847 zfree(vm_page_zone
, m
);
1851 * vm_page_more_fictitious:
1853 * Add more fictitious pages to the zone.
1854 * Allowed to block. This routine is way intimate
1855 * with the zones code, for several reasons:
1856 * 1. we need to carve some page structures out of physical
1857 * memory before zones work, so they _cannot_ come from
1859 * 2. the zone needs to be collectable in order to prevent
1860 * growth without bound. These structures are used by
1861 * the device pager (by the hundreds and thousands), as
1862 * private pages for pageout, and as blocking pages for
1863 * pagein. Temporary bursts in demand should not result in
1864 * permanent allocation of a resource.
1865 * 3. To smooth allocation humps, we allocate single pages
1866 * with kernel_memory_allocate(), and cram them into the
1870 void vm_page_more_fictitious(void)
1873 kern_return_t retval
;
1875 c_vm_page_more_fictitious
++;
1878 * Allocate a single page from the zone_map. Do not wait if no physical
1879 * pages are immediately available, and do not zero the space. We need
1880 * our own blocking lock here to prevent having multiple,
1881 * simultaneous requests from piling up on the zone_map lock. Exactly
1882 * one (of our) threads should be potentially waiting on the map lock.
1883 * If winner is not vm-privileged, then the page allocation will fail,
1884 * and it will temporarily block here in the vm_page_wait().
1886 lck_mtx_lock(&vm_page_alloc_lock
);
1888 * If another thread allocated space, just bail out now.
1890 if (zone_free_count(vm_page_zone
) > 5) {
1892 * The number "5" is a small number that is larger than the
1893 * number of fictitious pages that any single caller will
1894 * attempt to allocate. Otherwise, a thread will attempt to
1895 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1896 * release all of the resources and locks already acquired,
1897 * and then call this routine. This routine finds the pages
1898 * that the caller released, so fails to allocate new space.
1899 * The process repeats infinitely. The largest known number
1900 * of fictitious pages required in this manner is 2. 5 is
1901 * simply a somewhat larger number.
1903 lck_mtx_unlock(&vm_page_alloc_lock
);
1907 retval
= kernel_memory_allocate(zone_map
,
1908 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1909 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1910 if (retval
!= KERN_SUCCESS
) {
1912 * No page was available. Drop the
1913 * lock to give another thread a chance at it, and
1914 * wait for the pageout daemon to make progress.
1916 lck_mtx_unlock(&vm_page_alloc_lock
);
1917 vm_page_wait(THREAD_UNINT
);
1921 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1922 OSAddAtomic64(1, &(vm_page_zone
->page_count
));
1924 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
1926 lck_mtx_unlock(&vm_page_alloc_lock
);
1933 * Return true if it is not likely that a non-vm_privileged thread
1934 * can get memory without blocking. Advisory only, since the
1935 * situation may change under us.
1940 /* No locking, at worst we will fib. */
1941 return( vm_page_free_count
<= vm_page_free_reserved
);
1947 * this is an interface to support bring-up of drivers
1948 * on platforms with physical memory > 4G...
1950 int vm_himemory_mode
= 2;
1954 * this interface exists to support hardware controllers
1955 * incapable of generating DMAs with more than 32 bits
1956 * of address on platforms with physical memory > 4G...
1958 unsigned int vm_lopages_allocated_q
= 0;
1959 unsigned int vm_lopages_allocated_cpm_success
= 0;
1960 unsigned int vm_lopages_allocated_cpm_failed
= 0;
1961 queue_head_t vm_lopage_queue_free
;
1964 vm_page_grablo(void)
1968 if (vm_lopage_needed
== FALSE
)
1969 return (vm_page_grab());
1971 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1973 if ( !queue_empty(&vm_lopage_queue_free
)) {
1974 queue_remove_first(&vm_lopage_queue_free
,
1978 assert(vm_lopage_free_count
);
1980 vm_lopage_free_count
--;
1981 vm_lopages_allocated_q
++;
1983 if (vm_lopage_free_count
< vm_lopage_lowater
)
1984 vm_lopage_refill
= TRUE
;
1986 lck_mtx_unlock(&vm_page_queue_free_lock
);
1988 lck_mtx_unlock(&vm_page_queue_free_lock
);
1990 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
1992 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1993 vm_lopages_allocated_cpm_failed
++;
1994 lck_mtx_unlock(&vm_page_queue_free_lock
);
1996 return (VM_PAGE_NULL
);
2000 vm_page_lockspin_queues();
2002 mem
->gobbled
= FALSE
;
2003 vm_page_gobble_count
--;
2004 vm_page_wire_count
--;
2006 vm_lopages_allocated_cpm_success
++;
2007 vm_page_unlock_queues();
2011 assert(!mem
->pmapped
);
2012 assert(!mem
->wpmapped
);
2013 assert(!pmap_is_noencrypt(mem
->phys_page
));
2015 mem
->pageq
.next
= NULL
;
2016 mem
->pageq
.prev
= NULL
;
2025 * first try to grab a page from the per-cpu free list...
2026 * this must be done while pre-emption is disabled... if
2027 * a page is available, we're done...
2028 * if no page is available, grab the vm_page_queue_free_lock
2029 * and see if current number of free pages would allow us
2030 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2031 * if there are pages available, disable preemption and
2032 * recheck the state of the per-cpu free list... we could
2033 * have been preempted and moved to a different cpu, or
2034 * some other thread could have re-filled it... if still
2035 * empty, figure out how many pages we can steal from the
2036 * global free queue and move to the per-cpu queue...
2037 * return 1 of these pages when done... only wakeup the
2038 * pageout_scan thread if we moved pages from the global
2039 * list... no need for the wakeup if we've satisfied the
2040 * request from the per-cpu queue.
2045 vm_page_grab( void )
2050 disable_preemption();
2052 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2053 return_page_from_cpu_list
:
2054 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2055 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
2057 enable_preemption();
2058 mem
->pageq
.next
= NULL
;
2060 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2061 assert(mem
->tabled
== FALSE
);
2062 assert(mem
->object
== VM_OBJECT_NULL
);
2063 assert(!mem
->laundry
);
2065 assert(pmap_verify_free(mem
->phys_page
));
2067 assert(!mem
->encrypted
);
2068 assert(!mem
->pmapped
);
2069 assert(!mem
->wpmapped
);
2070 assert(!mem
->active
);
2071 assert(!mem
->inactive
);
2072 assert(!mem
->throttled
);
2073 assert(!mem
->speculative
);
2074 assert(!pmap_is_noencrypt(mem
->phys_page
));
2078 enable_preemption();
2082 * Optionally produce warnings if the wire or gobble
2083 * counts exceed some threshold.
2085 #if VM_PAGE_WIRE_COUNT_WARNING
2086 if (vm_page_wire_count
>= VM_PAGE_WIRE_COUNT_WARNING
) {
2087 printf("mk: vm_page_grab(): high wired page count of %d\n",
2088 vm_page_wire_count
);
2091 #if VM_PAGE_GOBBLE_COUNT_WARNING
2092 if (vm_page_gobble_count
>= VM_PAGE_GOBBLE_COUNT_WARNING
) {
2093 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2094 vm_page_gobble_count
);
2097 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2100 * Only let privileged threads (involved in pageout)
2101 * dip into the reserved pool.
2103 if ((vm_page_free_count
< vm_page_free_reserved
) &&
2104 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
2105 lck_mtx_unlock(&vm_page_queue_free_lock
);
2111 unsigned int pages_to_steal
;
2114 while ( vm_page_free_count
== 0 ) {
2116 lck_mtx_unlock(&vm_page_queue_free_lock
);
2118 * must be a privileged thread to be
2119 * in this state since a non-privileged
2120 * thread would have bailed if we were
2121 * under the vm_page_free_reserved mark
2124 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2127 disable_preemption();
2129 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2130 lck_mtx_unlock(&vm_page_queue_free_lock
);
2133 * we got preempted and moved to another processor
2134 * or we got preempted and someone else ran and filled the cache
2136 goto return_page_from_cpu_list
;
2138 if (vm_page_free_count
<= vm_page_free_reserved
)
2141 if (vm_free_magazine_refill_limit
<= (vm_page_free_count
- vm_page_free_reserved
))
2142 pages_to_steal
= vm_free_magazine_refill_limit
;
2144 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
2146 color
= PROCESSOR_DATA(current_processor(), start_color
);
2149 vm_page_free_count
-= pages_to_steal
;
2151 while (pages_to_steal
--) {
2153 while (queue_empty(&vm_page_queue_free
[color
]))
2154 color
= (color
+ 1) & vm_color_mask
;
2156 queue_remove_first(&vm_page_queue_free
[color
],
2160 mem
->pageq
.next
= NULL
;
2161 mem
->pageq
.prev
= NULL
;
2163 assert(!mem
->active
);
2164 assert(!mem
->inactive
);
2165 assert(!mem
->throttled
);
2166 assert(!mem
->speculative
);
2168 color
= (color
+ 1) & vm_color_mask
;
2173 tail
->pageq
.next
= (queue_t
)mem
;
2176 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2177 assert(mem
->tabled
== FALSE
);
2178 assert(mem
->object
== VM_OBJECT_NULL
);
2179 assert(!mem
->laundry
);
2183 assert(pmap_verify_free(mem
->phys_page
));
2186 assert(!mem
->encrypted
);
2187 assert(!mem
->pmapped
);
2188 assert(!mem
->wpmapped
);
2189 assert(!pmap_is_noencrypt(mem
->phys_page
));
2191 lck_mtx_unlock(&vm_page_queue_free_lock
);
2193 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
2194 PROCESSOR_DATA(current_processor(), start_color
) = color
;
2197 * satisfy this request
2199 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2201 mem
->pageq
.next
= NULL
;
2203 enable_preemption();
2206 * Decide if we should poke the pageout daemon.
2207 * We do this if the free count is less than the low
2208 * water mark, or if the free count is less than the high
2209 * water mark (but above the low water mark) and the inactive
2210 * count is less than its target.
2212 * We don't have the counts locked ... if they change a little,
2213 * it doesn't really matter.
2215 if ((vm_page_free_count
< vm_page_free_min
) ||
2216 ((vm_page_free_count
< vm_page_free_target
) &&
2217 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
2218 thread_wakeup((event_t
) &vm_page_free_wanted
);
2220 VM_CHECK_MEMORYSTATUS
;
2222 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2230 * Return a page to the free list.
2235 register vm_page_t mem
)
2238 int need_wakeup
= 0;
2239 int need_priv_wakeup
= 0;
2242 assert(!mem
->private && !mem
->fictitious
);
2243 if (vm_page_free_verify
) {
2244 assert(pmap_verify_free(mem
->phys_page
));
2246 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2248 pmap_clear_noencrypt(mem
->phys_page
);
2250 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2253 panic("vm_page_release");
2257 assert(!mem
->laundry
);
2258 assert(mem
->object
== VM_OBJECT_NULL
);
2259 assert(mem
->pageq
.next
== NULL
&&
2260 mem
->pageq
.prev
== NULL
);
2261 assert(mem
->listq
.next
== NULL
&&
2262 mem
->listq
.prev
== NULL
);
2264 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2265 vm_lopage_free_count
< vm_lopage_free_limit
&&
2266 mem
->phys_page
< max_valid_low_ppnum
) {
2268 * this exists to support hardware controllers
2269 * incapable of generating DMAs with more than 32 bits
2270 * of address on platforms with physical memory > 4G...
2272 queue_enter_first(&vm_lopage_queue_free
,
2276 vm_lopage_free_count
++;
2278 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
2279 vm_lopage_refill
= FALSE
;
2283 mem
->lopage
= FALSE
;
2286 color
= mem
->phys_page
& vm_color_mask
;
2287 queue_enter_first(&vm_page_queue_free
[color
],
2291 vm_page_free_count
++;
2293 * Check if we should wake up someone waiting for page.
2294 * But don't bother waking them unless they can allocate.
2296 * We wakeup only one thread, to prevent starvation.
2297 * Because the scheduling system handles wait queues FIFO,
2298 * if we wakeup all waiting threads, one greedy thread
2299 * can starve multiple niceguy threads. When the threads
2300 * all wakeup, the greedy threads runs first, grabs the page,
2301 * and waits for another page. It will be the first to run
2302 * when the next page is freed.
2304 * However, there is a slight danger here.
2305 * The thread we wake might not use the free page.
2306 * Then the other threads could wait indefinitely
2307 * while the page goes unused. To forestall this,
2308 * the pageout daemon will keep making free pages
2309 * as long as vm_page_free_wanted is non-zero.
2312 assert(vm_page_free_count
> 0);
2313 if (vm_page_free_wanted_privileged
> 0) {
2314 vm_page_free_wanted_privileged
--;
2315 need_priv_wakeup
= 1;
2316 } else if (vm_page_free_wanted
> 0 &&
2317 vm_page_free_count
> vm_page_free_reserved
) {
2318 vm_page_free_wanted
--;
2322 lck_mtx_unlock(&vm_page_queue_free_lock
);
2324 if (need_priv_wakeup
)
2325 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2326 else if (need_wakeup
)
2327 thread_wakeup_one((event_t
) &vm_page_free_count
);
2329 VM_CHECK_MEMORYSTATUS
;
2333 * This version of vm_page_release() is used only at startup
2334 * when we are single-threaded and pages are being released
2335 * for the first time. Hence, no locking or unnecessary checks are made.
2336 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2339 vm_page_release_startup(
2340 register vm_page_t mem
)
2344 if (vm_lopage_free_count
< vm_lopage_free_limit
&&
2345 mem
->phys_page
< max_valid_low_ppnum
) {
2347 vm_lopage_free_count
++;
2348 queue_free
= &vm_lopage_queue_free
;
2350 mem
->lopage
= FALSE
;
2352 vm_page_free_count
++;
2353 queue_free
= &vm_page_queue_free
[mem
->phys_page
& vm_color_mask
];
2355 queue_enter_first(queue_free
, mem
, vm_page_t
, pageq
);
2361 * Wait for a page to become available.
2362 * If there are plenty of free pages, then we don't sleep.
2365 * TRUE: There may be another page, try again
2366 * FALSE: We were interrupted out of our wait, don't try again
2374 * We can't use vm_page_free_reserved to make this
2375 * determination. Consider: some thread might
2376 * need to allocate two pages. The first allocation
2377 * succeeds, the second fails. After the first page is freed,
2378 * a call to vm_page_wait must really block.
2380 kern_return_t wait_result
;
2381 int need_wakeup
= 0;
2382 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2384 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2386 if (is_privileged
&& vm_page_free_count
) {
2387 lck_mtx_unlock(&vm_page_queue_free_lock
);
2390 if (vm_page_free_count
< vm_page_free_target
) {
2392 if (is_privileged
) {
2393 if (vm_page_free_wanted_privileged
++ == 0)
2395 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2397 if (vm_page_free_wanted
++ == 0)
2399 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2401 lck_mtx_unlock(&vm_page_queue_free_lock
);
2402 counter(c_vm_page_wait_block
++);
2405 thread_wakeup((event_t
)&vm_page_free_wanted
);
2407 if (wait_result
== THREAD_WAITING
) {
2408 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_START
,
2409 vm_page_free_wanted_privileged
, vm_page_free_wanted
, 0, 0);
2410 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2411 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_END
, 0, 0, 0, 0);
2414 return(wait_result
== THREAD_AWAKENED
);
2416 lck_mtx_unlock(&vm_page_queue_free_lock
);
2424 * Allocate and return a memory cell associated
2425 * with this VM object/offset pair.
2427 * Object must be locked.
2433 vm_object_offset_t offset
)
2435 register vm_page_t mem
;
2437 vm_object_lock_assert_exclusive(object
);
2438 mem
= vm_page_grab();
2439 if (mem
== VM_PAGE_NULL
)
2440 return VM_PAGE_NULL
;
2442 vm_page_insert(mem
, object
, offset
);
2450 vm_object_offset_t offset
)
2452 register vm_page_t mem
;
2454 vm_object_lock_assert_exclusive(object
);
2455 mem
= vm_page_grablo();
2456 if (mem
== VM_PAGE_NULL
)
2457 return VM_PAGE_NULL
;
2459 vm_page_insert(mem
, object
, offset
);
2466 * vm_page_alloc_guard:
2468 * Allocate a fictitious page which will be used
2469 * as a guard page. The page will be inserted into
2470 * the object and returned to the caller.
2474 vm_page_alloc_guard(
2476 vm_object_offset_t offset
)
2478 register vm_page_t mem
;
2480 vm_object_lock_assert_exclusive(object
);
2481 mem
= vm_page_grab_guard();
2482 if (mem
== VM_PAGE_NULL
)
2483 return VM_PAGE_NULL
;
2485 vm_page_insert(mem
, object
, offset
);
2491 counter(unsigned int c_laundry_pages_freed
= 0;)
2494 * vm_page_free_prepare:
2496 * Removes page from any queue it may be on
2497 * and disassociates it from its VM object.
2499 * Object and page queues must be locked prior to entry.
2502 vm_page_free_prepare(
2505 vm_page_free_prepare_queues(mem
);
2506 vm_page_free_prepare_object(mem
, TRUE
);
2511 vm_page_free_prepare_queues(
2516 assert(!mem
->cleaning
);
2518 #if MACH_ASSERT || DEBUG
2519 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2521 panic("vm_page_free: freeing page on free list\n");
2522 #endif /* MACH_ASSERT || DEBUG */
2524 vm_object_lock_assert_exclusive(mem
->object
);
2528 * We may have to free a page while it's being laundered
2529 * if we lost its pager (due to a forced unmount, for example).
2530 * We need to call vm_pageout_steal_laundry() before removing
2531 * the page from its VM object, so that we can remove it
2532 * from its pageout queue and adjust the laundry accounting
2534 vm_pageout_steal_laundry(mem
, TRUE
);
2535 counter(++c_laundry_pages_freed
);
2538 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2540 if (VM_PAGE_WIRED(mem
)) {
2542 assert(mem
->object
->wired_page_count
> 0);
2543 mem
->object
->wired_page_count
--;
2544 assert(mem
->object
->resident_page_count
>=
2545 mem
->object
->wired_page_count
);
2547 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2548 OSAddAtomic(+1, &vm_page_purgeable_count
);
2549 assert(vm_page_purgeable_wired_count
> 0);
2550 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2552 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2553 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2554 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2557 owner
= mem
->object
->vo_purgeable_owner
;
2559 * While wired, this page was accounted
2560 * as "non-volatile" but it should now
2561 * be accounted as "volatile".
2563 /* one less "non-volatile"... */
2564 ledger_debit(owner
->ledger
,
2565 task_ledgers
.purgeable_nonvolatile
,
2567 /* ... and "phys_footprint" */
2568 ledger_debit(owner
->ledger
,
2569 task_ledgers
.phys_footprint
,
2571 /* one more "volatile" */
2572 ledger_credit(owner
->ledger
,
2573 task_ledgers
.purgeable_volatile
,
2577 if (!mem
->private && !mem
->fictitious
)
2578 vm_page_wire_count
--;
2579 mem
->wire_count
= 0;
2580 assert(!mem
->gobbled
);
2581 } else if (mem
->gobbled
) {
2582 if (!mem
->private && !mem
->fictitious
)
2583 vm_page_wire_count
--;
2584 vm_page_gobble_count
--;
2590 vm_page_free_prepare_object(
2592 boolean_t remove_from_hash
)
2595 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2597 PAGE_WAKEUP(mem
); /* clears wanted */
2600 mem
->private = FALSE
;
2601 mem
->fictitious
= TRUE
;
2602 mem
->phys_page
= vm_page_fictitious_addr
;
2604 if ( !mem
->fictitious
) {
2605 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2613 * Returns the given page to the free list,
2614 * disassociating it with any VM object.
2616 * Object and page queues must be locked prior to entry.
2622 vm_page_free_prepare(mem
);
2624 if (mem
->fictitious
) {
2625 vm_page_release_fictitious(mem
);
2627 vm_page_release(mem
);
2633 vm_page_free_unlocked(
2635 boolean_t remove_from_hash
)
2637 vm_page_lockspin_queues();
2638 vm_page_free_prepare_queues(mem
);
2639 vm_page_unlock_queues();
2641 vm_page_free_prepare_object(mem
, remove_from_hash
);
2643 if (mem
->fictitious
) {
2644 vm_page_release_fictitious(mem
);
2646 vm_page_release(mem
);
2652 * Free a list of pages. The list can be up to several hundred pages,
2653 * as blocked up by vm_pageout_scan().
2654 * The big win is not having to take the free list lock once
2660 boolean_t prepare_object
)
2664 vm_page_t local_freeq
;
2670 local_freeq
= VM_PAGE_NULL
;
2674 * break up the processing into smaller chunks so
2675 * that we can 'pipeline' the pages onto the
2676 * free list w/o introducing too much
2677 * contention on the global free queue lock
2679 while (mem
&& pg_count
< 64) {
2681 assert(!mem
->inactive
);
2682 assert(!mem
->active
);
2683 assert(!mem
->throttled
);
2685 assert(!mem
->speculative
);
2686 assert(!VM_PAGE_WIRED(mem
));
2687 assert(mem
->pageq
.prev
== NULL
);
2689 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2691 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2692 assert(pmap_verify_free(mem
->phys_page
));
2694 if (prepare_object
== TRUE
)
2695 vm_page_free_prepare_object(mem
, TRUE
);
2697 if (!mem
->fictitious
) {
2700 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2701 vm_lopage_free_count
< vm_lopage_free_limit
&&
2702 mem
->phys_page
< max_valid_low_ppnum
) {
2703 mem
->pageq
.next
= NULL
;
2704 vm_page_release(mem
);
2707 * IMPORTANT: we can't set the page "free" here
2708 * because that would make the page eligible for
2709 * a physically-contiguous allocation (see
2710 * vm_page_find_contiguous()) right away (we don't
2711 * hold the vm_page_queue_free lock). That would
2712 * cause trouble because the page is not actually
2713 * in the free queue yet...
2715 mem
->pageq
.next
= (queue_entry_t
)local_freeq
;
2719 pmap_clear_noencrypt(mem
->phys_page
);
2722 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2723 mem
->phys_page
== vm_page_guard_addr
);
2724 vm_page_release_fictitious(mem
);
2730 if ( (mem
= local_freeq
) ) {
2731 unsigned int avail_free_count
;
2732 unsigned int need_wakeup
= 0;
2733 unsigned int need_priv_wakeup
= 0;
2735 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2740 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2746 color
= mem
->phys_page
& vm_color_mask
;
2747 queue_enter_first(&vm_page_queue_free
[color
],
2753 vm_page_free_count
+= pg_count
;
2754 avail_free_count
= vm_page_free_count
;
2756 if (vm_page_free_wanted_privileged
> 0 && avail_free_count
> 0) {
2758 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2759 need_priv_wakeup
= avail_free_count
;
2760 vm_page_free_wanted_privileged
-= avail_free_count
;
2761 avail_free_count
= 0;
2763 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2764 vm_page_free_wanted_privileged
= 0;
2765 avail_free_count
-= vm_page_free_wanted_privileged
;
2768 if (vm_page_free_wanted
> 0 && avail_free_count
> vm_page_free_reserved
) {
2769 unsigned int available_pages
;
2771 available_pages
= avail_free_count
- vm_page_free_reserved
;
2773 if (available_pages
>= vm_page_free_wanted
) {
2774 need_wakeup
= vm_page_free_wanted
;
2775 vm_page_free_wanted
= 0;
2777 need_wakeup
= available_pages
;
2778 vm_page_free_wanted
-= available_pages
;
2781 lck_mtx_unlock(&vm_page_queue_free_lock
);
2783 if (need_priv_wakeup
!= 0) {
2785 * There shouldn't be that many VM-privileged threads,
2786 * so let's wake them all up, even if we don't quite
2787 * have enough pages to satisfy them all.
2789 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2791 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2793 * We don't expect to have any more waiters
2794 * after this, so let's wake them all up at
2797 thread_wakeup((event_t
) &vm_page_free_count
);
2798 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2800 * Wake up one waiter per page we just released.
2802 thread_wakeup_one((event_t
) &vm_page_free_count
);
2805 VM_CHECK_MEMORYSTATUS
;
2814 * Mark this page as wired down by yet
2815 * another map, removing it from paging queues
2818 * The page's object and the page queues must be locked.
2822 register vm_page_t mem
)
2825 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2829 vm_object_lock_assert_exclusive(mem
->object
);
2832 * In theory, the page should be in an object before it
2833 * gets wired, since we need to hold the object lock
2834 * to update some fields in the page structure.
2835 * However, some code (i386 pmap, for example) might want
2836 * to wire a page before it gets inserted into an object.
2837 * That's somewhat OK, as long as nobody else can get to
2838 * that page and update it at the same time.
2842 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2844 if ( !VM_PAGE_WIRED(mem
)) {
2846 if (mem
->pageout_queue
) {
2847 mem
->pageout
= FALSE
;
2848 vm_pageout_throttle_up(mem
);
2850 VM_PAGE_QUEUES_REMOVE(mem
);
2853 mem
->object
->wired_page_count
++;
2854 assert(mem
->object
->resident_page_count
>=
2855 mem
->object
->wired_page_count
);
2856 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2857 assert(vm_page_purgeable_count
> 0);
2858 OSAddAtomic(-1, &vm_page_purgeable_count
);
2859 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2861 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2862 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2863 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2866 owner
= mem
->object
->vo_purgeable_owner
;
2867 /* less volatile bytes */
2868 ledger_debit(owner
->ledger
,
2869 task_ledgers
.purgeable_volatile
,
2871 /* more not-quite-volatile bytes */
2872 ledger_credit(owner
->ledger
,
2873 task_ledgers
.purgeable_nonvolatile
,
2875 /* more footprint */
2876 ledger_credit(owner
->ledger
,
2877 task_ledgers
.phys_footprint
,
2880 if (mem
->object
->all_reusable
) {
2882 * Wired pages are not counted as "re-usable"
2883 * in "all_reusable" VM objects, so nothing
2886 } else if (mem
->reusable
) {
2888 * This page is not "re-usable" when it's
2889 * wired, so adjust its state and the
2892 vm_object_reuse_pages(mem
->object
,
2894 mem
->offset
+PAGE_SIZE_64
,
2898 assert(!mem
->reusable
);
2900 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2901 vm_page_wire_count
++;
2903 vm_page_gobble_count
--;
2904 mem
->gobbled
= FALSE
;
2906 VM_CHECK_MEMORYSTATUS
;
2910 * The page could be encrypted, but
2911 * We don't have to decrypt it here
2912 * because we don't guarantee that the
2913 * data is actually valid at this point.
2914 * The page will get decrypted in
2915 * vm_fault_wire() if needed.
2918 assert(!mem
->gobbled
);
2926 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2928 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2932 register vm_page_t mem
)
2934 vm_page_lockspin_queues();
2937 assert(!mem
->gobbled
);
2938 assert( !VM_PAGE_WIRED(mem
));
2940 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2941 if (!mem
->private && !mem
->fictitious
)
2942 vm_page_wire_count
++;
2944 vm_page_gobble_count
++;
2945 mem
->gobbled
= TRUE
;
2946 vm_page_unlock_queues();
2952 * Release one wiring of this page, potentially
2953 * enabling it to be paged again.
2955 * The page's object and the page queues must be locked.
2963 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2966 assert(VM_PAGE_WIRED(mem
));
2967 assert(mem
->object
!= VM_OBJECT_NULL
);
2969 vm_object_lock_assert_exclusive(mem
->object
);
2970 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2972 if (--mem
->wire_count
== 0) {
2973 assert(!mem
->private && !mem
->fictitious
);
2974 vm_page_wire_count
--;
2975 assert(mem
->object
->wired_page_count
> 0);
2976 mem
->object
->wired_page_count
--;
2977 assert(mem
->object
->resident_page_count
>=
2978 mem
->object
->wired_page_count
);
2979 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2980 OSAddAtomic(+1, &vm_page_purgeable_count
);
2981 assert(vm_page_purgeable_wired_count
> 0);
2982 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2984 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2985 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2986 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2989 owner
= mem
->object
->vo_purgeable_owner
;
2990 /* more volatile bytes */
2991 ledger_credit(owner
->ledger
,
2992 task_ledgers
.purgeable_volatile
,
2994 /* less not-quite-volatile bytes */
2995 ledger_debit(owner
->ledger
,
2996 task_ledgers
.purgeable_nonvolatile
,
2998 /* less footprint */
2999 ledger_debit(owner
->ledger
,
3000 task_ledgers
.phys_footprint
,
3003 assert(mem
->object
!= kernel_object
);
3004 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
3006 if (queueit
== TRUE
) {
3007 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
3008 vm_page_deactivate(mem
);
3010 vm_page_activate(mem
);
3014 VM_CHECK_MEMORYSTATUS
;
3021 * vm_page_deactivate:
3023 * Returns the given page to the inactive list,
3024 * indicating that no physical maps have access
3025 * to this page. [Used by the physical mapping system.]
3027 * The page queues must be locked.
3033 vm_page_deactivate_internal(m
, TRUE
);
3038 vm_page_deactivate_internal(
3040 boolean_t clear_hw_reference
)
3044 assert(m
->object
!= kernel_object
);
3045 assert(m
->phys_page
!= vm_page_guard_addr
);
3047 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3049 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3052 * This page is no longer very interesting. If it was
3053 * interesting (active or inactive/referenced), then we
3054 * clear the reference bit and (re)enter it in the
3055 * inactive queue. Note wired pages should not have
3056 * their reference bit cleared.
3058 assert ( !(m
->absent
&& !m
->unusual
));
3060 if (m
->gobbled
) { /* can this happen? */
3061 assert( !VM_PAGE_WIRED(m
));
3063 if (!m
->private && !m
->fictitious
)
3064 vm_page_wire_count
--;
3065 vm_page_gobble_count
--;
3069 * if this page is currently on the pageout queue, we can't do the
3070 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3071 * and we can't remove it manually since we would need the object lock
3072 * (which is not required here) to decrement the activity_in_progress
3073 * reference which is held on the object while the page is in the pageout queue...
3074 * just let the normal laundry processing proceed
3076 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
|| (VM_PAGE_WIRED(m
)))
3079 if (!m
->absent
&& clear_hw_reference
== TRUE
)
3080 pmap_clear_reference(m
->phys_page
);
3082 m
->reference
= FALSE
;
3083 m
->no_cache
= FALSE
;
3086 VM_PAGE_QUEUES_REMOVE(m
);
3088 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
3089 m
->dirty
&& m
->object
->internal
&&
3090 (m
->object
->purgable
== VM_PURGABLE_DENY
||
3091 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3092 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3093 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
3094 m
->throttled
= TRUE
;
3095 vm_page_throttled_count
++;
3097 if (m
->object
->named
&& m
->object
->ref_count
== 1) {
3098 vm_page_speculate(m
, FALSE
);
3099 #if DEVELOPMENT || DEBUG
3100 vm_page_speculative_recreated
++;
3103 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
3110 * vm_page_enqueue_cleaned
3112 * Put the page on the cleaned queue, mark it cleaned, etc.
3113 * Being on the cleaned queue (and having m->clean_queue set)
3114 * does ** NOT ** guarantee that the page is clean!
3116 * Call with the queues lock held.
3119 void vm_page_enqueue_cleaned(vm_page_t m
)
3121 assert(m
->phys_page
!= vm_page_guard_addr
);
3123 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3125 assert( !(m
->absent
&& !m
->unusual
));
3128 assert( !VM_PAGE_WIRED(m
));
3129 if (!m
->private && !m
->fictitious
)
3130 vm_page_wire_count
--;
3131 vm_page_gobble_count
--;
3135 * if this page is currently on the pageout queue, we can't do the
3136 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3137 * and we can't remove it manually since we would need the object lock
3138 * (which is not required here) to decrement the activity_in_progress
3139 * reference which is held on the object while the page is in the pageout queue...
3140 * just let the normal laundry processing proceed
3142 if (m
->laundry
|| m
->clean_queue
|| m
->pageout_queue
|| m
->private || m
->fictitious
)
3145 VM_PAGE_QUEUES_REMOVE(m
);
3147 queue_enter(&vm_page_queue_cleaned
, m
, vm_page_t
, pageq
);
3148 m
->clean_queue
= TRUE
;
3149 vm_page_cleaned_count
++;
3152 vm_page_inactive_count
++;
3153 if (m
->object
->internal
) {
3154 vm_page_pageable_internal_count
++;
3156 vm_page_pageable_external_count
++;
3159 vm_pageout_enqueued_cleaned
++;
3165 * Put the specified page on the active list (if appropriate).
3167 * The page queues must be locked.
3172 register vm_page_t m
)
3175 #ifdef FIXME_4778297
3176 assert(m
->object
!= kernel_object
);
3178 assert(m
->phys_page
!= vm_page_guard_addr
);
3180 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3182 assert( !(m
->absent
&& !m
->unusual
));
3185 assert( !VM_PAGE_WIRED(m
));
3186 if (!m
->private && !m
->fictitious
)
3187 vm_page_wire_count
--;
3188 vm_page_gobble_count
--;
3192 * if this page is currently on the pageout queue, we can't do the
3193 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3194 * and we can't remove it manually since we would need the object lock
3195 * (which is not required here) to decrement the activity_in_progress
3196 * reference which is held on the object while the page is in the pageout queue...
3197 * just let the normal laundry processing proceed
3199 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3204 panic("vm_page_activate: already active");
3207 if (m
->speculative
) {
3208 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
3209 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
3212 VM_PAGE_QUEUES_REMOVE(m
);
3214 if ( !VM_PAGE_WIRED(m
)) {
3216 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
3217 m
->dirty
&& m
->object
->internal
&&
3218 (m
->object
->purgable
== VM_PURGABLE_DENY
||
3219 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3220 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3221 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
3222 m
->throttled
= TRUE
;
3223 vm_page_throttled_count
++;
3225 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
3227 vm_page_active_count
++;
3228 if (m
->object
->internal
) {
3229 vm_page_pageable_internal_count
++;
3231 vm_page_pageable_external_count
++;
3234 m
->reference
= TRUE
;
3235 m
->no_cache
= FALSE
;
3242 * vm_page_speculate:
3244 * Put the specified page on the speculative list (if appropriate).
3246 * The page queues must be locked.
3253 struct vm_speculative_age_q
*aq
;
3256 assert(m
->object
!= kernel_object
);
3257 assert(m
->phys_page
!= vm_page_guard_addr
);
3259 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3261 assert( !(m
->absent
&& !m
->unusual
));
3264 * if this page is currently on the pageout queue, we can't do the
3265 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3266 * and we can't remove it manually since we would need the object lock
3267 * (which is not required here) to decrement the activity_in_progress
3268 * reference which is held on the object while the page is in the pageout queue...
3269 * just let the normal laundry processing proceed
3271 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3274 VM_PAGE_QUEUES_REMOVE(m
);
3276 if ( !VM_PAGE_WIRED(m
)) {
3281 clock_get_system_nanotime(&sec
, &nsec
);
3282 ts
.tv_sec
= (unsigned int) sec
;
3285 if (vm_page_speculative_count
== 0) {
3287 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3288 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3290 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3293 * set the timer to begin a new group
3295 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3296 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3298 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3300 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3302 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
3304 speculative_age_index
++;
3306 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3307 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3308 if (speculative_age_index
== speculative_steal_index
) {
3309 speculative_steal_index
= speculative_age_index
+ 1;
3311 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3312 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3314 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3316 if (!queue_empty(&aq
->age_q
))
3317 vm_page_speculate_ageit(aq
);
3319 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3320 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3322 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3325 enqueue_tail(&aq
->age_q
, &m
->pageq
);
3326 m
->speculative
= TRUE
;
3327 vm_page_speculative_count
++;
3328 if (m
->object
->internal
) {
3329 vm_page_pageable_internal_count
++;
3331 vm_page_pageable_external_count
++;
3335 vm_object_lock_assert_exclusive(m
->object
);
3337 m
->object
->pages_created
++;
3338 #if DEVELOPMENT || DEBUG
3339 vm_page_speculative_created
++;
3348 * move pages from the specified aging bin to
3349 * the speculative bin that pageout_scan claims from
3351 * The page queues must be locked.
3354 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
3356 struct vm_speculative_age_q
*sq
;
3359 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
3361 if (queue_empty(&sq
->age_q
)) {
3362 sq
->age_q
.next
= aq
->age_q
.next
;
3363 sq
->age_q
.prev
= aq
->age_q
.prev
;
3365 t
= (vm_page_t
)sq
->age_q
.next
;
3366 t
->pageq
.prev
= &sq
->age_q
;
3368 t
= (vm_page_t
)sq
->age_q
.prev
;
3369 t
->pageq
.next
= &sq
->age_q
;
3371 t
= (vm_page_t
)sq
->age_q
.prev
;
3372 t
->pageq
.next
= aq
->age_q
.next
;
3374 t
= (vm_page_t
)aq
->age_q
.next
;
3375 t
->pageq
.prev
= sq
->age_q
.prev
;
3377 t
= (vm_page_t
)aq
->age_q
.prev
;
3378 t
->pageq
.next
= &sq
->age_q
;
3380 sq
->age_q
.prev
= aq
->age_q
.prev
;
3382 queue_init(&aq
->age_q
);
3391 assert(m
->object
!= kernel_object
);
3392 assert(m
->phys_page
!= vm_page_guard_addr
);
3395 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3398 * if this page is currently on the pageout queue, we can't do the
3399 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3400 * and we can't remove it manually since we would need the object lock
3401 * (which is not required here) to decrement the activity_in_progress
3402 * reference which is held on the object while the page is in the pageout queue...
3403 * just let the normal laundry processing proceed
3405 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->compressor
|| (VM_PAGE_WIRED(m
)))
3408 m
->no_cache
= FALSE
;
3410 VM_PAGE_QUEUES_REMOVE(m
);
3412 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
3417 vm_page_reactivate_all_throttled(void)
3419 vm_page_t first_throttled
, last_throttled
;
3420 vm_page_t first_active
;
3422 int extra_active_count
;
3423 int extra_internal_count
, extra_external_count
;
3425 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
))
3428 extra_active_count
= 0;
3429 extra_internal_count
= 0;
3430 extra_external_count
= 0;
3431 vm_page_lock_queues();
3432 if (! queue_empty(&vm_page_queue_throttled
)) {
3434 * Switch "throttled" pages to "active".
3436 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3438 assert(m
->throttled
);
3440 assert(!m
->inactive
);
3441 assert(!m
->speculative
);
3442 assert(!VM_PAGE_WIRED(m
));
3444 extra_active_count
++;
3445 if (m
->object
->internal
) {
3446 extra_internal_count
++;
3448 extra_external_count
++;
3451 m
->throttled
= FALSE
;
3457 * Transfer the entire throttled queue to a regular LRU page queues.
3458 * We insert it at the head of the active queue, so that these pages
3459 * get re-evaluated by the LRU algorithm first, since they've been
3460 * completely out of it until now.
3462 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3463 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3464 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3465 if (queue_empty(&vm_page_queue_active
)) {
3466 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3468 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3470 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3471 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3472 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3475 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3477 queue_init(&vm_page_queue_throttled
);
3479 * Adjust the global page counts.
3481 vm_page_active_count
+= extra_active_count
;
3482 vm_page_pageable_internal_count
+= extra_internal_count
;
3483 vm_page_pageable_external_count
+= extra_external_count
;
3484 vm_page_throttled_count
= 0;
3486 assert(vm_page_throttled_count
== 0);
3487 assert(queue_empty(&vm_page_queue_throttled
));
3488 vm_page_unlock_queues();
3493 * move pages from the indicated local queue to the global active queue
3494 * its ok to fail if we're below the hard limit and force == FALSE
3495 * the nolocks == TRUE case is to allow this function to be run on
3496 * the hibernate path
3500 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3503 vm_page_t first_local
, last_local
;
3504 vm_page_t first_active
;
3508 if (vm_page_local_q
== NULL
)
3511 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3513 if (nolocks
== FALSE
) {
3514 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3515 if ( !vm_page_trylockspin_queues())
3518 vm_page_lockspin_queues();
3520 VPL_LOCK(&lq
->vpl_lock
);
3522 if (lq
->vpl_count
) {
3524 * Switch "local" pages to "active".
3526 assert(!queue_empty(&lq
->vpl_queue
));
3528 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3532 assert(!m
->inactive
);
3533 assert(!m
->speculative
);
3534 assert(!VM_PAGE_WIRED(m
));
3535 assert(!m
->throttled
);
3536 assert(!m
->fictitious
);
3538 if (m
->local_id
!= lid
)
3539 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3548 if (count
!= lq
->vpl_count
)
3549 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3552 * Transfer the entire local queue to a regular LRU page queues.
3554 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3555 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3556 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3558 if (queue_empty(&vm_page_queue_active
)) {
3559 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3561 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3563 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3564 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3565 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3567 queue_init(&lq
->vpl_queue
);
3569 * Adjust the global page counts.
3571 vm_page_active_count
+= lq
->vpl_count
;
3572 vm_page_pageable_internal_count
+= lq
->vpl_internal_count
;
3573 vm_page_pageable_external_count
+= lq
->vpl_external_count
;
3575 lq
->vpl_internal_count
= 0;
3576 lq
->vpl_external_count
= 0;
3578 assert(queue_empty(&lq
->vpl_queue
));
3580 if (nolocks
== FALSE
) {
3581 VPL_UNLOCK(&lq
->vpl_lock
);
3582 vm_page_unlock_queues();
3587 * vm_page_part_zero_fill:
3589 * Zero-fill a part of the page.
3591 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3593 vm_page_part_zero_fill(
3601 * we don't hold the page queue lock
3602 * so this check isn't safe to make
3607 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3608 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3612 tmp
= vm_page_grab();
3613 if (tmp
== VM_PAGE_NULL
) {
3614 vm_page_wait(THREAD_UNINT
);
3619 vm_page_zero_fill(tmp
);
3621 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3623 if((m_pa
+ len
) < PAGE_SIZE
) {
3624 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3625 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3627 vm_page_copy(tmp
,m
);
3634 * vm_page_zero_fill:
3636 * Zero-fill the specified page.
3643 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3644 m
->object
, m
->offset
, m
, 0,0);
3647 * we don't hold the page queue lock
3648 * so this check isn't safe to make
3653 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3654 pmap_zero_page(m
->phys_page
);
3658 * vm_page_part_copy:
3660 * copy part of one page to another
3673 * we don't hold the page queue lock
3674 * so this check isn't safe to make
3676 VM_PAGE_CHECK(src_m
);
3677 VM_PAGE_CHECK(dst_m
);
3679 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3680 dst_m
->phys_page
, dst_pa
, len
);
3686 * Copy one page to another
3689 * The source page should not be encrypted. The caller should
3690 * make sure the page is decrypted first, if necessary.
3693 int vm_page_copy_cs_validations
= 0;
3694 int vm_page_copy_cs_tainted
= 0;
3702 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3703 src_m
->object
, src_m
->offset
,
3704 dest_m
->object
, dest_m
->offset
,
3708 * we don't hold the page queue lock
3709 * so this check isn't safe to make
3711 VM_PAGE_CHECK(src_m
);
3712 VM_PAGE_CHECK(dest_m
);
3714 vm_object_lock_assert_held(src_m
->object
);
3718 * The source page should not be encrypted at this point.
3719 * The destination page will therefore not contain encrypted
3720 * data after the copy.
3722 if (src_m
->encrypted
) {
3723 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3725 dest_m
->encrypted
= FALSE
;
3727 if (src_m
->object
!= VM_OBJECT_NULL
&&
3728 src_m
->object
->code_signed
) {
3730 * We're copying a page from a code-signed object.
3731 * Whoever ends up mapping the copy page might care about
3732 * the original page's integrity, so let's validate the
3735 vm_page_copy_cs_validations
++;
3736 vm_page_validate_cs(src_m
);
3739 if (vm_page_is_slideable(src_m
)) {
3740 boolean_t was_busy
= src_m
->busy
;
3742 (void) vm_page_slide(src_m
, 0);
3743 assert(src_m
->busy
);
3745 PAGE_WAKEUP_DONE(src_m
);
3750 * Propagate the cs_tainted bit to the copy page. Do not propagate
3751 * the cs_validated bit.
3753 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3754 if (dest_m
->cs_tainted
) {
3755 vm_page_copy_cs_tainted
++;
3757 dest_m
->slid
= src_m
->slid
;
3758 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
3759 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3767 printf("vm_page %p: \n", p
);
3768 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3769 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3770 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p
->next_m
));
3771 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3772 printf(" wire_count=%u\n", p
->wire_count
);
3774 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3775 (p
->local
? "" : "!"),
3776 (p
->inactive
? "" : "!"),
3777 (p
->active
? "" : "!"),
3778 (p
->pageout_queue
? "" : "!"),
3779 (p
->speculative
? "" : "!"),
3780 (p
->laundry
? "" : "!"));
3781 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3782 (p
->free
? "" : "!"),
3783 (p
->reference
? "" : "!"),
3784 (p
->gobbled
? "" : "!"),
3785 (p
->private ? "" : "!"),
3786 (p
->throttled
? "" : "!"));
3787 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3788 (p
->busy
? "" : "!"),
3789 (p
->wanted
? "" : "!"),
3790 (p
->tabled
? "" : "!"),
3791 (p
->fictitious
? "" : "!"),
3792 (p
->pmapped
? "" : "!"),
3793 (p
->wpmapped
? "" : "!"));
3794 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3795 (p
->pageout
? "" : "!"),
3796 (p
->absent
? "" : "!"),
3797 (p
->error
? "" : "!"),
3798 (p
->dirty
? "" : "!"),
3799 (p
->cleaning
? "" : "!"),
3800 (p
->precious
? "" : "!"),
3801 (p
->clustered
? "" : "!"));
3802 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3803 (p
->overwriting
? "" : "!"),
3804 (p
->restart
? "" : "!"),
3805 (p
->unusual
? "" : "!"),
3806 (p
->encrypted
? "" : "!"),
3807 (p
->encrypted_cleaning
? "" : "!"));
3808 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3809 (p
->cs_validated
? "" : "!"),
3810 (p
->cs_tainted
? "" : "!"),
3811 (p
->no_cache
? "" : "!"));
3813 printf("phys_page=0x%x\n", p
->phys_page
);
3817 * Check that the list of pages is ordered by
3818 * ascending physical address and has no holes.
3821 vm_page_verify_contiguous(
3823 unsigned int npages
)
3825 register vm_page_t m
;
3826 unsigned int page_count
;
3827 vm_offset_t prev_addr
;
3829 prev_addr
= pages
->phys_page
;
3831 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3832 if (m
->phys_page
!= prev_addr
+ 1) {
3833 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3834 m
, (long)prev_addr
, m
->phys_page
);
3835 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
3836 panic("vm_page_verify_contiguous: not contiguous!");
3838 prev_addr
= m
->phys_page
;
3841 if (page_count
!= npages
) {
3842 printf("pages %p actual count 0x%x but requested 0x%x\n",
3843 pages
, page_count
, npages
);
3844 panic("vm_page_verify_contiguous: count error");
3851 * Check the free lists for proper length etc.
3853 static boolean_t vm_page_verify_this_free_list_enabled
= FALSE
;
3855 vm_page_verify_free_list(
3856 queue_head_t
*vm_page_queue
,
3858 vm_page_t look_for_page
,
3859 boolean_t expect_page
)
3861 unsigned int npages
;
3864 boolean_t found_page
;
3866 if (! vm_page_verify_this_free_list_enabled
)
3871 prev_m
= (vm_page_t
) vm_page_queue
;
3872 queue_iterate(vm_page_queue
,
3877 if (m
== look_for_page
) {
3880 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3881 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3882 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3884 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3886 if (color
!= (unsigned int) -1) {
3887 if ((m
->phys_page
& vm_color_mask
) != color
)
3888 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3889 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3891 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3897 if (look_for_page
!= VM_PAGE_NULL
) {
3898 unsigned int other_color
;
3900 if (expect_page
&& !found_page
) {
3901 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3902 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3903 _vm_page_print(look_for_page
);
3904 for (other_color
= 0;
3905 other_color
< vm_colors
;
3907 if (other_color
== color
)
3909 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
3910 other_color
, look_for_page
, FALSE
);
3912 if (color
== (unsigned int) -1) {
3913 vm_page_verify_free_list(&vm_lopage_queue_free
,
3914 (unsigned int) -1, look_for_page
, FALSE
);
3916 panic("vm_page_verify_free_list(color=%u)\n", color
);
3918 if (!expect_page
&& found_page
) {
3919 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3920 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3926 static boolean_t vm_page_verify_all_free_lists_enabled
= FALSE
;
3928 vm_page_verify_free_lists( void )
3930 unsigned int color
, npages
, nlopages
;
3931 boolean_t toggle
= TRUE
;
3933 if (! vm_page_verify_all_free_lists_enabled
)
3938 lck_mtx_lock(&vm_page_queue_free_lock
);
3940 if (vm_page_verify_this_free_list_enabled
== TRUE
) {
3942 * This variable has been set globally for extra checking of
3943 * each free list Q. Since we didn't set it, we don't own it
3944 * and we shouldn't toggle it.
3949 if (toggle
== TRUE
) {
3950 vm_page_verify_this_free_list_enabled
= TRUE
;
3953 for( color
= 0; color
< vm_colors
; color
++ ) {
3954 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
3955 color
, VM_PAGE_NULL
, FALSE
);
3957 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
3959 VM_PAGE_NULL
, FALSE
);
3960 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
3961 panic("vm_page_verify_free_lists: "
3962 "npages %u free_count %d nlopages %u lo_free_count %u",
3963 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
3965 if (toggle
== TRUE
) {
3966 vm_page_verify_this_free_list_enabled
= FALSE
;
3969 lck_mtx_unlock(&vm_page_queue_free_lock
);
3973 vm_page_queues_assert(
3978 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3980 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3981 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3982 _vm_page_print(mem
);
3983 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3985 if (VM_PAGE_WIRED(mem
)) {
3986 assert(!mem
->active
);
3987 assert(!mem
->inactive
);
3988 assert(!mem
->speculative
);
3989 assert(!mem
->throttled
);
3990 assert(!mem
->pageout_queue
);
3993 #endif /* MACH_ASSERT */
3997 * CONTIGUOUS PAGE ALLOCATION
3999 * Find a region large enough to contain at least n pages
4000 * of contiguous physical memory.
4002 * This is done by traversing the vm_page_t array in a linear fashion
4003 * we assume that the vm_page_t array has the avaiable physical pages in an
4004 * ordered, ascending list... this is currently true of all our implementations
4005 * and must remain so... there can be 'holes' in the array... we also can
4006 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4007 * which use to happen via 'vm_page_convert'... that function was no longer
4008 * being called and was removed...
4010 * The basic flow consists of stabilizing some of the interesting state of
4011 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4012 * sweep at the beginning of the array looking for pages that meet our criterea
4013 * for a 'stealable' page... currently we are pretty conservative... if the page
4014 * meets this criterea and is physically contiguous to the previous page in the 'run'
4015 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4016 * and start to develop a new run... if at this point we've already considered
4017 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4018 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4019 * to other threads trying to acquire free pages (or move pages from q to q),
4020 * and then continue from the spot we left off... we only make 1 pass through the
4021 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4022 * which steals the pages from the queues they're currently on... pages on the free
4023 * queue can be stolen directly... pages that are on any of the other queues
4024 * must be removed from the object they are tabled on... this requires taking the
4025 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4026 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4027 * dump the pages we've currently stolen back to the free list, and pick up our
4028 * scan from the point where we aborted the 'current' run.
4032 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4034 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4039 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4042 #define RESET_STATE_OF_RUN() \
4044 prevcontaddr = -2; \
4046 free_considered = 0; \
4047 substitute_needed = 0; \
4052 * Can we steal in-use (i.e. not free) pages when searching for
4053 * physically-contiguous pages ?
4055 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4057 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
4059 int vm_page_find_contig_debug
= 0;
4063 vm_page_find_contiguous(
4064 unsigned int contig_pages
,
4071 ppnum_t prevcontaddr
;
4073 unsigned int npages
, considered
, scanned
;
4074 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
4075 unsigned int idx_last_contig_page_found
= 0;
4076 int free_considered
, free_available
;
4077 int substitute_needed
;
4080 clock_sec_t tv_start_sec
, tv_end_sec
;
4081 clock_usec_t tv_start_usec
, tv_end_usec
;
4086 int stolen_pages
= 0;
4087 int compressed_pages
= 0;
4090 if (contig_pages
== 0)
4091 return VM_PAGE_NULL
;
4094 vm_page_verify_free_lists();
4097 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
4099 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4101 vm_page_lock_queues();
4102 lck_mtx_lock(&vm_page_queue_free_lock
);
4104 RESET_STATE_OF_RUN();
4108 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4112 if(flags
& KMA_LOMEM
)
4113 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
4115 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
4117 orig_last_idx
= idx_last_contig_page_found
;
4118 last_idx
= orig_last_idx
;
4120 for (page_idx
= last_idx
, start_idx
= last_idx
;
4121 npages
< contig_pages
&& page_idx
< vm_pages_count
;
4126 page_idx
>= orig_last_idx
) {
4128 * We're back where we started and we haven't
4129 * found any suitable contiguous range. Let's
4135 m
= &vm_pages
[page_idx
];
4137 assert(!m
->fictitious
);
4138 assert(!m
->private);
4140 if (max_pnum
&& m
->phys_page
> max_pnum
) {
4141 /* no more low pages... */
4144 if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
4148 RESET_STATE_OF_RUN();
4150 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
4151 m
->encrypted_cleaning
||
4152 m
->pageout_queue
|| m
->laundry
|| m
->wanted
||
4153 m
->cleaning
|| m
->overwriting
|| m
->pageout
) {
4155 * page is in a transient state
4156 * or a state we don't want to deal
4157 * with, so don't consider it which
4158 * means starting a new run
4160 RESET_STATE_OF_RUN();
4162 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
&& !m
->compressor
) {
4164 * page needs to be on one of our queues
4165 * or it needs to belong to the compressor pool
4166 * in order for it to be stable behind the
4167 * locks we hold at this point...
4168 * if not, don't consider it which
4169 * means starting a new run
4171 RESET_STATE_OF_RUN();
4173 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
4175 * pages on the free list are always 'busy'
4176 * so we couldn't test for 'busy' in the check
4177 * for the transient states... pages that are
4178 * 'free' are never 'tabled', so we also couldn't
4179 * test for 'tabled'. So we check here to make
4180 * sure that a non-free page is not busy and is
4181 * tabled on an object...
4182 * if not, don't consider it which
4183 * means starting a new run
4185 RESET_STATE_OF_RUN();
4188 if (m
->phys_page
!= prevcontaddr
+ 1) {
4189 if ((m
->phys_page
& pnum_mask
) != 0) {
4190 RESET_STATE_OF_RUN();
4194 start_idx
= page_idx
;
4195 start_pnum
= m
->phys_page
;
4200 prevcontaddr
= m
->phys_page
;
4207 * This page is not free.
4208 * If we can't steal used pages,
4209 * we have to give up this run
4211 * Otherwise, we might need to
4212 * move the contents of this page
4213 * into a substitute page.
4215 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4216 if (m
->pmapped
|| m
->dirty
|| m
->precious
) {
4217 substitute_needed
++;
4220 RESET_STATE_OF_RUN();
4224 if ((free_considered
+ substitute_needed
) > free_available
) {
4226 * if we let this run continue
4227 * we will end up dropping the vm_page_free_count
4228 * below the reserve limit... we need to abort
4229 * this run, but we can at least re-consider this
4230 * page... thus the jump back to 'retry'
4232 RESET_STATE_OF_RUN();
4234 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
4239 * free_available == 0
4240 * so can't consider any free pages... if
4241 * we went to retry in this case, we'd
4242 * get stuck looking at the same page
4243 * w/o making any forward progress
4244 * we also want to take this path if we've already
4245 * reached our limit that controls the lock latency
4250 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
4252 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4254 lck_mtx_unlock(&vm_page_queue_free_lock
);
4255 vm_page_unlock_queues();
4259 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4261 vm_page_lock_queues();
4262 lck_mtx_lock(&vm_page_queue_free_lock
);
4264 RESET_STATE_OF_RUN();
4266 * reset our free page limit since we
4267 * dropped the lock protecting the vm_page_free_queue
4269 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4280 if (npages
!= contig_pages
) {
4283 * We didn't find a contiguous range but we didn't
4284 * start from the very first page.
4285 * Start again from the very first page.
4287 RESET_STATE_OF_RUN();
4288 if( flags
& KMA_LOMEM
)
4289 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
4291 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
4293 page_idx
= last_idx
;
4297 lck_mtx_unlock(&vm_page_queue_free_lock
);
4301 unsigned int cur_idx
;
4302 unsigned int tmp_start_idx
;
4303 vm_object_t locked_object
= VM_OBJECT_NULL
;
4304 boolean_t abort_run
= FALSE
;
4306 assert(page_idx
- start_idx
== contig_pages
);
4308 tmp_start_idx
= start_idx
;
4311 * first pass through to pull the free pages
4312 * off of the free queue so that in case we
4313 * need substitute pages, we won't grab any
4314 * of the free pages in the run... we'll clear
4315 * the 'free' bit in the 2nd pass, and even in
4316 * an abort_run case, we'll collect all of the
4317 * free pages in this run and return them to the free list
4319 while (start_idx
< page_idx
) {
4321 m1
= &vm_pages
[start_idx
++];
4323 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4330 color
= m1
->phys_page
& vm_color_mask
;
4332 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, m1
, TRUE
);
4334 queue_remove(&vm_page_queue_free
[color
],
4338 m1
->pageq
.next
= NULL
;
4339 m1
->pageq
.prev
= NULL
;
4341 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, VM_PAGE_NULL
, FALSE
);
4344 * Clear the "free" bit so that this page
4345 * does not get considered for another
4346 * concurrent physically-contiguous allocation.
4351 vm_page_free_count
--;
4354 if( flags
& KMA_LOMEM
)
4355 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4357 vm_page_find_contiguous_last_idx
= page_idx
;
4360 * we can drop the free queue lock at this point since
4361 * we've pulled any 'free' candidates off of the list
4362 * we need it dropped so that we can do a vm_page_grab
4363 * when substituing for pmapped/dirty pages
4365 lck_mtx_unlock(&vm_page_queue_free_lock
);
4367 start_idx
= tmp_start_idx
;
4368 cur_idx
= page_idx
- 1;
4370 while (start_idx
++ < page_idx
) {
4372 * must go through the list from back to front
4373 * so that the page list is created in the
4374 * correct order - low -> high phys addresses
4376 m1
= &vm_pages
[cur_idx
--];
4380 if (m1
->object
== VM_OBJECT_NULL
) {
4382 * page has already been removed from
4383 * the free list in the 1st pass
4385 assert(m1
->offset
== (vm_object_offset_t
) -1);
4387 assert(!m1
->wanted
);
4388 assert(!m1
->laundry
);
4392 boolean_t disconnected
, reusable
;
4394 if (abort_run
== TRUE
)
4397 object
= m1
->object
;
4399 if (object
!= locked_object
) {
4400 if (locked_object
) {
4401 vm_object_unlock(locked_object
);
4402 locked_object
= VM_OBJECT_NULL
;
4404 if (vm_object_lock_try(object
))
4405 locked_object
= object
;
4407 if (locked_object
== VM_OBJECT_NULL
||
4408 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
4409 m1
->encrypted_cleaning
||
4410 m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
||
4411 m1
->cleaning
|| m1
->overwriting
|| m1
->pageout
|| m1
->busy
)) {
4413 if (locked_object
) {
4414 vm_object_unlock(locked_object
);
4415 locked_object
= VM_OBJECT_NULL
;
4417 tmp_start_idx
= cur_idx
;
4422 disconnected
= FALSE
;
4425 if ((m1
->reusable
||
4426 m1
->object
->all_reusable
) &&
4430 /* reusable page... */
4431 refmod
= pmap_disconnect(m1
->phys_page
);
4432 disconnected
= TRUE
;
4435 * ... not reused: can steal
4436 * without relocating contents.
4446 vm_object_offset_t offset
;
4448 m2
= vm_page_grab();
4450 if (m2
== VM_PAGE_NULL
) {
4451 if (locked_object
) {
4452 vm_object_unlock(locked_object
);
4453 locked_object
= VM_OBJECT_NULL
;
4455 tmp_start_idx
= cur_idx
;
4459 if (! disconnected
) {
4461 refmod
= pmap_disconnect(m1
->phys_page
);
4466 /* copy the page's contents */
4467 pmap_copy_page(m1
->phys_page
, m2
->phys_page
);
4468 /* copy the page's state */
4469 assert(!VM_PAGE_WIRED(m1
));
4471 assert(!m1
->pageout_queue
);
4472 assert(!m1
->laundry
);
4473 m2
->reference
= m1
->reference
;
4474 assert(!m1
->gobbled
);
4475 assert(!m1
->private);
4476 m2
->no_cache
= m1
->no_cache
;
4479 assert(!m1
->wanted
);
4480 assert(!m1
->fictitious
);
4481 m2
->pmapped
= m1
->pmapped
; /* should flush cache ? */
4482 m2
->wpmapped
= m1
->wpmapped
;
4483 assert(!m1
->pageout
);
4484 m2
->absent
= m1
->absent
;
4485 m2
->error
= m1
->error
;
4486 m2
->dirty
= m1
->dirty
;
4487 assert(!m1
->cleaning
);
4488 m2
->precious
= m1
->precious
;
4489 m2
->clustered
= m1
->clustered
;
4490 assert(!m1
->overwriting
);
4491 m2
->restart
= m1
->restart
;
4492 m2
->unusual
= m1
->unusual
;
4493 m2
->encrypted
= m1
->encrypted
;
4494 assert(!m1
->encrypted_cleaning
);
4495 m2
->cs_validated
= m1
->cs_validated
;
4496 m2
->cs_tainted
= m1
->cs_tainted
;
4499 * If m1 had really been reusable,
4500 * we would have just stolen it, so
4501 * let's not propagate it's "reusable"
4502 * bit and assert that m2 is not
4503 * marked as "reusable".
4505 // m2->reusable = m1->reusable;
4506 assert(!m2
->reusable
);
4508 assert(!m1
->lopage
);
4509 m2
->slid
= m1
->slid
;
4510 m2
->compressor
= m1
->compressor
;
4513 * page may need to be flushed if
4514 * it is marshalled into a UPL
4515 * that is going to be used by a device
4516 * that doesn't support coherency
4518 m2
->written_by_kernel
= TRUE
;
4521 * make sure we clear the ref/mod state
4522 * from the pmap layer... else we risk
4523 * inheriting state from the last time
4524 * this page was used...
4526 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
4528 if (refmod
& VM_MEM_REFERENCED
)
4529 m2
->reference
= TRUE
;
4530 if (refmod
& VM_MEM_MODIFIED
) {
4531 SET_PAGE_DIRTY(m2
, TRUE
);
4533 offset
= m1
->offset
;
4536 * completely cleans up the state
4537 * of the page so that it is ready
4538 * to be put onto the free list, or
4539 * for this purpose it looks like it
4540 * just came off of the free list
4542 vm_page_free_prepare(m1
);
4545 * now put the substitute page
4548 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
, FALSE
);
4550 if (m2
->compressor
) {
4552 m2
->wpmapped
= TRUE
;
4554 PMAP_ENTER(kernel_pmap
, m2
->offset
, m2
,
4555 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
);
4561 vm_page_activate(m2
);
4563 vm_page_deactivate(m2
);
4565 PAGE_WAKEUP_DONE(m2
);
4568 assert(!m1
->compressor
);
4571 * completely cleans up the state
4572 * of the page so that it is ready
4573 * to be put onto the free list, or
4574 * for this purpose it looks like it
4575 * just came off of the free list
4577 vm_page_free_prepare(m1
);
4583 m1
->pageq
.next
= (queue_entry_t
) m
;
4584 m1
->pageq
.prev
= NULL
;
4587 if (locked_object
) {
4588 vm_object_unlock(locked_object
);
4589 locked_object
= VM_OBJECT_NULL
;
4592 if (abort_run
== TRUE
) {
4593 if (m
!= VM_PAGE_NULL
) {
4594 vm_page_free_list(m
, FALSE
);
4600 * want the index of the last
4601 * page in this run that was
4602 * successfully 'stolen', so back
4603 * it up 1 for the auto-decrement on use
4604 * and 1 more to bump back over this page
4606 page_idx
= tmp_start_idx
+ 2;
4607 if (page_idx
>= vm_pages_count
) {
4610 page_idx
= last_idx
= 0;
4616 * We didn't find a contiguous range but we didn't
4617 * start from the very first page.
4618 * Start again from the very first page.
4620 RESET_STATE_OF_RUN();
4622 if( flags
& KMA_LOMEM
)
4623 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4625 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4627 last_idx
= page_idx
;
4629 lck_mtx_lock(&vm_page_queue_free_lock
);
4631 * reset our free page limit since we
4632 * dropped the lock protecting the vm_page_free_queue
4634 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4638 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4646 vm_page_gobble_count
+= npages
;
4649 * gobbled pages are also counted as wired pages
4651 vm_page_wire_count
+= npages
;
4653 assert(vm_page_verify_contiguous(m
, npages
));
4656 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4658 vm_page_unlock_queues();
4661 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4663 tv_end_sec
-= tv_start_sec
;
4664 if (tv_end_usec
< tv_start_usec
) {
4666 tv_end_usec
+= 1000000;
4668 tv_end_usec
-= tv_start_usec
;
4669 if (tv_end_usec
>= 1000000) {
4671 tv_end_sec
-= 1000000;
4673 if (vm_page_find_contig_debug
) {
4674 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4675 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4676 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4677 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
);
4682 vm_page_verify_free_lists();
4688 * Allocate a list of contiguous, wired pages.
4700 unsigned int npages
;
4702 if (size
% PAGE_SIZE
!= 0)
4703 return KERN_INVALID_ARGUMENT
;
4705 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4706 if (npages
!= size
/ PAGE_SIZE
) {
4707 /* 32-bit overflow */
4708 return KERN_INVALID_ARGUMENT
;
4712 * Obtain a pointer to a subset of the free
4713 * list large enough to satisfy the request;
4714 * the region will be physically contiguous.
4716 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4718 if (pages
== VM_PAGE_NULL
)
4719 return KERN_NO_SPACE
;
4721 * determine need for wakeups
4723 if ((vm_page_free_count
< vm_page_free_min
) ||
4724 ((vm_page_free_count
< vm_page_free_target
) &&
4725 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4726 thread_wakeup((event_t
) &vm_page_free_wanted
);
4728 VM_CHECK_MEMORYSTATUS
;
4731 * The CPM pages should now be available and
4732 * ordered by ascending physical address.
4734 assert(vm_page_verify_contiguous(pages
, npages
));
4737 return KERN_SUCCESS
;
4741 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
4744 * when working on a 'run' of pages, it is necessary to hold
4745 * the vm_page_queue_lock (a hot global lock) for certain operations
4746 * on the page... however, the majority of the work can be done
4747 * while merely holding the object lock... in fact there are certain
4748 * collections of pages that don't require any work brokered by the
4749 * vm_page_queue_lock... to mitigate the time spent behind the global
4750 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4751 * while doing all of the work that doesn't require the vm_page_queue_lock...
4752 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4753 * necessary work for each page... we will grab the busy bit on the page
4754 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4755 * if it can't immediately take the vm_page_queue_lock in order to compete
4756 * for the locks in the same order that vm_pageout_scan takes them.
4757 * the operation names are modeled after the names of the routines that
4758 * need to be called in order to make the changes very obvious in the
4763 vm_page_do_delayed_work(
4765 struct vm_page_delayed_work
*dwp
,
4770 vm_page_t local_free_q
= VM_PAGE_NULL
;
4773 * pageout_scan takes the vm_page_lock_queues first
4774 * then tries for the object lock... to avoid what
4775 * is effectively a lock inversion, we'll go to the
4776 * trouble of taking them in that same order... otherwise
4777 * if this object contains the majority of the pages resident
4778 * in the UBC (or a small set of large objects actively being
4779 * worked on contain the majority of the pages), we could
4780 * cause the pageout_scan thread to 'starve' in its attempt
4781 * to find pages to move to the free queue, since it has to
4782 * successfully acquire the object lock of any candidate page
4783 * before it can steal/clean it.
4785 if (!vm_page_trylockspin_queues()) {
4786 vm_object_unlock(object
);
4788 vm_page_lockspin_queues();
4790 for (j
= 0; ; j
++) {
4791 if (!vm_object_lock_avoid(object
) &&
4792 _vm_object_lock_try(object
))
4794 vm_page_unlock_queues();
4796 vm_page_lockspin_queues();
4799 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
4803 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
4804 vm_pageout_throttle_up(m
);
4805 #if CONFIG_PHANTOM_CACHE
4806 if (dwp
->dw_mask
& DW_vm_phantom_cache_update
)
4807 vm_phantom_cache_update(m
);
4809 if (dwp
->dw_mask
& DW_vm_page_wire
)
4811 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
4814 queueit
= (dwp
->dw_mask
& (DW_vm_page_free
| DW_vm_page_deactivate_internal
)) ? FALSE
: TRUE
;
4816 vm_page_unwire(m
, queueit
);
4818 if (dwp
->dw_mask
& DW_vm_page_free
) {
4819 vm_page_free_prepare_queues(m
);
4821 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
4823 * Add this page to our list of reclaimed pages,
4824 * to be freed later.
4826 m
->pageq
.next
= (queue_entry_t
) local_free_q
;
4829 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
4830 vm_page_deactivate_internal(m
, FALSE
);
4831 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
4832 if (m
->active
== FALSE
) {
4833 vm_page_activate(m
);
4836 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
4837 vm_page_speculate(m
, TRUE
);
4838 else if (dwp
->dw_mask
& DW_enqueue_cleaned
) {
4840 * if we didn't hold the object lock and did this,
4841 * we might disconnect the page, then someone might
4842 * soft fault it back in, then we would put it on the
4843 * cleaned queue, and so we would have a referenced (maybe even dirty)
4844 * page on that queue, which we don't want
4846 int refmod_state
= pmap_disconnect(m
->phys_page
);
4848 if ((refmod_state
& VM_MEM_REFERENCED
)) {
4850 * this page has been touched since it got cleaned; let's activate it
4851 * if it hasn't already been
4853 vm_pageout_enqueued_cleaned
++;
4854 vm_pageout_cleaned_reactivated
++;
4855 vm_pageout_cleaned_commit_reactivated
++;
4857 if (m
->active
== FALSE
)
4858 vm_page_activate(m
);
4860 m
->reference
= FALSE
;
4861 vm_page_enqueue_cleaned(m
);
4864 else if (dwp
->dw_mask
& DW_vm_page_lru
)
4866 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
) {
4867 if ( !m
->pageout_queue
)
4868 VM_PAGE_QUEUES_REMOVE(m
);
4870 if (dwp
->dw_mask
& DW_set_reference
)
4871 m
->reference
= TRUE
;
4872 else if (dwp
->dw_mask
& DW_clear_reference
)
4873 m
->reference
= FALSE
;
4875 if (dwp
->dw_mask
& DW_move_page
) {
4876 if ( !m
->pageout_queue
) {
4877 VM_PAGE_QUEUES_REMOVE(m
);
4879 assert(m
->object
!= kernel_object
);
4881 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
4884 if (dwp
->dw_mask
& DW_clear_busy
)
4887 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
4891 vm_page_unlock_queues();
4894 vm_page_free_list(local_free_q
, TRUE
);
4896 VM_CHECK_MEMORYSTATUS
;
4906 vm_page_t lo_page_list
= VM_PAGE_NULL
;
4910 if ( !(flags
& KMA_LOMEM
))
4911 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4913 for (i
= 0; i
< page_count
; i
++) {
4915 mem
= vm_page_grablo();
4917 if (mem
== VM_PAGE_NULL
) {
4919 vm_page_free_list(lo_page_list
, FALSE
);
4921 *list
= VM_PAGE_NULL
;
4923 return (KERN_RESOURCE_SHORTAGE
);
4925 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
4928 *list
= lo_page_list
;
4930 return (KERN_SUCCESS
);
4934 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
4936 page
->offset
= offset
;
4940 vm_page_get_next(vm_page_t page
)
4942 return ((vm_page_t
) page
->pageq
.next
);
4946 vm_page_get_offset(vm_page_t page
)
4948 return (page
->offset
);
4952 vm_page_get_phys_page(vm_page_t page
)
4954 return (page
->phys_page
);
4958 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4962 static vm_page_t hibernate_gobble_queue
;
4964 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4966 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
4967 static int hibernate_flush_dirty_pages(int);
4968 static int hibernate_flush_queue(queue_head_t
*, int);
4970 void hibernate_flush_wait(void);
4971 void hibernate_mark_in_progress(void);
4972 void hibernate_clear_in_progress(void);
4974 void hibernate_free_range(int, int);
4975 void hibernate_hash_insert_page(vm_page_t
);
4976 uint32_t hibernate_mark_as_unneeded(addr64_t
, addr64_t
, hibernate_page_list_t
*, hibernate_page_list_t
*);
4977 void hibernate_rebuild_vm_structs(void);
4978 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t
*, hibernate_page_list_t
*);
4979 ppnum_t
hibernate_lookup_paddr(unsigned int);
4981 struct hibernate_statistics
{
4982 int hibernate_considered
;
4983 int hibernate_reentered_on_q
;
4984 int hibernate_found_dirty
;
4985 int hibernate_skipped_cleaning
;
4986 int hibernate_skipped_transient
;
4987 int hibernate_skipped_precious
;
4988 int hibernate_skipped_external
;
4989 int hibernate_queue_nolock
;
4990 int hibernate_queue_paused
;
4991 int hibernate_throttled
;
4992 int hibernate_throttle_timeout
;
4993 int hibernate_drained
;
4994 int hibernate_drain_timeout
;
4996 int cd_found_precious
;
4999 int cd_found_unusual
;
5000 int cd_found_cleaning
;
5001 int cd_found_laundry
;
5003 int cd_found_xpmapped
;
5004 int cd_skipped_xpmapped
;
5007 int cd_vm_page_wire_count
;
5008 int cd_vm_struct_pages_unneeded
;
5016 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5017 * so that we don't overrun the estimated image size, which would
5018 * result in a hibernation failure.
5020 #define HIBERNATE_XPMAPPED_LIMIT 40000
5024 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
5026 wait_result_t wait_result
;
5028 vm_page_lock_queues();
5030 while ( !queue_empty(&q
->pgo_pending
) ) {
5032 q
->pgo_draining
= TRUE
;
5034 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
5036 vm_page_unlock_queues();
5038 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
5040 if (wait_result
== THREAD_TIMED_OUT
&& !queue_empty(&q
->pgo_pending
)) {
5041 hibernate_stats
.hibernate_drain_timeout
++;
5043 if (q
== &vm_pageout_queue_external
)
5048 vm_page_lock_queues();
5050 hibernate_stats
.hibernate_drained
++;
5052 vm_page_unlock_queues();
5058 boolean_t hibernate_skip_external
= FALSE
;
5061 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
5064 vm_object_t l_object
= NULL
;
5065 vm_object_t m_object
= NULL
;
5066 int refmod_state
= 0;
5067 int try_failed_count
= 0;
5069 int current_run
= 0;
5070 struct vm_pageout_queue
*iq
;
5071 struct vm_pageout_queue
*eq
;
5072 struct vm_pageout_queue
*tq
;
5075 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
5077 iq
= &vm_pageout_queue_internal
;
5078 eq
= &vm_pageout_queue_external
;
5080 vm_page_lock_queues();
5082 while (qcount
&& !queue_empty(q
)) {
5084 if (current_run
++ == 1000) {
5085 if (hibernate_should_abort()) {
5092 m
= (vm_page_t
) queue_first(q
);
5093 m_object
= m
->object
;
5096 * check to see if we currently are working
5097 * with the same object... if so, we've
5098 * already got the lock
5100 if (m_object
!= l_object
) {
5102 * the object associated with candidate page is
5103 * different from the one we were just working
5104 * with... dump the lock if we still own it
5106 if (l_object
!= NULL
) {
5107 vm_object_unlock(l_object
);
5111 * Try to lock object; since we've alread got the
5112 * page queues lock, we can only 'try' for this one.
5113 * if the 'try' fails, we need to do a mutex_pause
5114 * to allow the owner of the object lock a chance to
5117 if ( !vm_object_lock_try_scan(m_object
)) {
5119 if (try_failed_count
> 20) {
5120 hibernate_stats
.hibernate_queue_nolock
++;
5122 goto reenter_pg_on_q
;
5125 vm_page_unlock_queues();
5126 mutex_pause(try_failed_count
++);
5127 vm_page_lock_queues();
5129 hibernate_stats
.hibernate_queue_paused
++;
5132 l_object
= m_object
;
5135 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
) {
5137 * page is not to be cleaned
5138 * put it back on the head of its queue
5141 hibernate_stats
.hibernate_skipped_cleaning
++;
5143 hibernate_stats
.hibernate_skipped_transient
++;
5145 goto reenter_pg_on_q
;
5147 if (m_object
->copy
== VM_OBJECT_NULL
) {
5148 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
5150 * let the normal hibernate image path
5153 goto reenter_pg_on_q
;
5156 if ( !m
->dirty
&& m
->pmapped
) {
5157 refmod_state
= pmap_get_refmod(m
->phys_page
);
5159 if ((refmod_state
& VM_MEM_MODIFIED
)) {
5160 SET_PAGE_DIRTY(m
, FALSE
);
5167 * page is not to be cleaned
5168 * put it back on the head of its queue
5171 hibernate_stats
.hibernate_skipped_precious
++;
5173 goto reenter_pg_on_q
;
5176 if (hibernate_skip_external
== TRUE
&& !m_object
->internal
) {
5178 hibernate_stats
.hibernate_skipped_external
++;
5180 goto reenter_pg_on_q
;
5184 if (m_object
->internal
) {
5185 if (VM_PAGE_Q_THROTTLED(iq
))
5187 } else if (VM_PAGE_Q_THROTTLED(eq
))
5191 wait_result_t wait_result
;
5194 if (l_object
!= NULL
) {
5195 vm_object_unlock(l_object
);
5199 while (retval
== 0) {
5201 tq
->pgo_throttled
= TRUE
;
5203 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
5205 vm_page_unlock_queues();
5207 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
5209 vm_page_lock_queues();
5211 if (wait_result
!= THREAD_TIMED_OUT
)
5213 if (!VM_PAGE_Q_THROTTLED(tq
))
5216 if (hibernate_should_abort())
5219 if (--wait_count
== 0) {
5221 hibernate_stats
.hibernate_throttle_timeout
++;
5224 hibernate_skip_external
= TRUE
;
5233 hibernate_stats
.hibernate_throttled
++;
5238 * we've already factored out pages in the laundry which
5239 * means this page can't be on the pageout queue so it's
5240 * safe to do the VM_PAGE_QUEUES_REMOVE
5242 assert(!m
->pageout_queue
);
5244 VM_PAGE_QUEUES_REMOVE(m
);
5246 if (COMPRESSED_PAGER_IS_ACTIVE
&& m_object
->internal
== TRUE
)
5247 pmap_disconnect_options(m
->phys_page
, PMAP_OPTIONS_COMPRESSOR
, NULL
);
5249 vm_pageout_cluster(m
, FALSE
);
5251 hibernate_stats
.hibernate_found_dirty
++;
5256 queue_remove(q
, m
, vm_page_t
, pageq
);
5257 queue_enter(q
, m
, vm_page_t
, pageq
);
5259 hibernate_stats
.hibernate_reentered_on_q
++;
5261 hibernate_stats
.hibernate_considered
++;
5264 try_failed_count
= 0;
5266 if (l_object
!= NULL
) {
5267 vm_object_unlock(l_object
);
5271 vm_page_unlock_queues();
5273 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
5280 hibernate_flush_dirty_pages(int pass
)
5282 struct vm_speculative_age_q
*aq
;
5285 if (vm_page_local_q
) {
5286 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5287 vm_page_reactivate_local(i
, TRUE
, FALSE
);
5290 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
5294 aq
= &vm_page_queue_speculative
[i
];
5296 if (queue_empty(&aq
->age_q
))
5300 vm_page_lockspin_queues();
5302 queue_iterate(&aq
->age_q
,
5309 vm_page_unlock_queues();
5312 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
5316 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_page_anonymous_count
- vm_page_cleaned_count
))
5318 if (hibernate_flush_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
))
5320 if (hibernate_flush_queue(&vm_page_queue_cleaned
, vm_page_cleaned_count
))
5322 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
5325 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5326 vm_compressor_record_warmup_start();
5328 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
)) {
5329 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5330 vm_compressor_record_warmup_end();
5333 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
)) {
5334 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5335 vm_compressor_record_warmup_end();
5338 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5339 vm_compressor_record_warmup_end();
5341 if (hibernate_skip_external
== FALSE
&& hibernate_drain_pageout_queue(&vm_pageout_queue_external
))
5349 hibernate_reset_stats()
5351 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
5356 hibernate_flush_memory()
5360 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
5362 hibernate_cleaning_in_progress
= TRUE
;
5363 hibernate_skip_external
= FALSE
;
5365 if ((retval
= hibernate_flush_dirty_pages(1)) == 0) {
5367 if (COMPRESSED_PAGER_IS_ACTIVE
) {
5369 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_START
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5371 vm_compressor_flush();
5373 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_END
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5375 if (consider_buffer_cache_collect
!= NULL
) {
5376 unsigned int orig_wire_count
;
5378 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5379 orig_wire_count
= vm_page_wire_count
;
5381 (void)(*consider_buffer_cache_collect
)(1);
5382 consider_zone_gc(TRUE
);
5384 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count
- vm_page_wire_count
);
5386 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, orig_wire_count
- vm_page_wire_count
, 0, 0, 0, 0);
5389 hibernate_cleaning_in_progress
= FALSE
;
5391 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
5393 if (retval
&& COMPRESSED_PAGER_IS_ACTIVE
)
5394 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT
);
5397 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5398 hibernate_stats
.hibernate_considered
,
5399 hibernate_stats
.hibernate_reentered_on_q
,
5400 hibernate_stats
.hibernate_found_dirty
);
5401 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5402 hibernate_stats
.hibernate_skipped_cleaning
,
5403 hibernate_stats
.hibernate_skipped_transient
,
5404 hibernate_stats
.hibernate_skipped_precious
,
5405 hibernate_stats
.hibernate_skipped_external
,
5406 hibernate_stats
.hibernate_queue_nolock
);
5407 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5408 hibernate_stats
.hibernate_queue_paused
,
5409 hibernate_stats
.hibernate_throttled
,
5410 hibernate_stats
.hibernate_throttle_timeout
,
5411 hibernate_stats
.hibernate_drained
,
5412 hibernate_stats
.hibernate_drain_timeout
);
5419 hibernate_page_list_zero(hibernate_page_list_t
*list
)
5422 hibernate_bitmap_t
* bitmap
;
5424 bitmap
= &list
->bank_bitmap
[0];
5425 for (bank
= 0; bank
< list
->bank_count
; bank
++)
5429 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
5430 // set out-of-bound bits at end of bitmap.
5431 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
5433 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
5435 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5440 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
5444 uint64_t start
, end
, timeout
, nsec
;
5445 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
5446 clock_get_uptime(&start
);
5448 for (i
= 0; i
< gobble_count
; i
++)
5450 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
5452 clock_get_uptime(&end
);
5462 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
5463 hibernate_gobble_queue
= m
;
5466 clock_get_uptime(&end
);
5467 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5468 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
5472 hibernate_free_gobble_pages(void)
5477 m
= (vm_page_t
) hibernate_gobble_queue
;
5480 next
= (vm_page_t
) m
->pageq
.next
;
5485 hibernate_gobble_queue
= VM_PAGE_NULL
;
5488 HIBLOG("Freed %d pages\n", count
);
5492 hibernate_consider_discard(vm_page_t m
, boolean_t preflight
)
5494 vm_object_t object
= NULL
;
5496 boolean_t discard
= FALSE
;
5501 panic("hibernate_consider_discard: private");
5503 if (!vm_object_lock_try(m
->object
)) {
5504 if (!preflight
) hibernate_stats
.cd_lock_failed
++;
5509 if (VM_PAGE_WIRED(m
)) {
5510 if (!preflight
) hibernate_stats
.cd_found_wired
++;
5514 if (!preflight
) hibernate_stats
.cd_found_precious
++;
5517 if (m
->busy
|| !object
->alive
) {
5519 * Somebody is playing with this page.
5521 if (!preflight
) hibernate_stats
.cd_found_busy
++;
5524 if (m
->absent
|| m
->unusual
|| m
->error
) {
5526 * If it's unusual in anyway, ignore it
5528 if (!preflight
) hibernate_stats
.cd_found_unusual
++;
5532 if (!preflight
) hibernate_stats
.cd_found_cleaning
++;
5536 if (!preflight
) hibernate_stats
.cd_found_laundry
++;
5541 refmod_state
= pmap_get_refmod(m
->phys_page
);
5543 if (refmod_state
& VM_MEM_REFERENCED
)
5544 m
->reference
= TRUE
;
5545 if (refmod_state
& VM_MEM_MODIFIED
) {
5546 SET_PAGE_DIRTY(m
, FALSE
);
5551 * If it's clean or purgeable we can discard the page on wakeup.
5553 discard
= (!m
->dirty
)
5554 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
5555 || (VM_PURGABLE_EMPTY
== object
->purgable
);
5558 if (discard
== FALSE
) {
5560 hibernate_stats
.cd_found_dirty
++;
5561 } else if (m
->xpmapped
&& m
->reference
&& !object
->internal
) {
5562 if (hibernate_stats
.cd_found_xpmapped
< HIBERNATE_XPMAPPED_LIMIT
) {
5564 hibernate_stats
.cd_found_xpmapped
++;
5568 hibernate_stats
.cd_skipped_xpmapped
++;
5575 vm_object_unlock(object
);
5582 hibernate_discard_page(vm_page_t m
)
5584 if (m
->absent
|| m
->unusual
|| m
->error
)
5586 * If it's unusual in anyway, ignore
5590 #if MACH_ASSERT || DEBUG
5591 vm_object_t object
= m
->object
;
5592 if (!vm_object_lock_try(m
->object
))
5593 panic("hibernate_discard_page(%p) !vm_object_lock_try", m
);
5595 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5596 makes sure these locks are uncontended before sleep */
5597 #endif /* MACH_ASSERT || DEBUG */
5599 if (m
->pmapped
== TRUE
)
5601 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
5605 panic("hibernate_discard_page(%p) laundry", m
);
5607 panic("hibernate_discard_page(%p) private", m
);
5609 panic("hibernate_discard_page(%p) fictitious", m
);
5611 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
5613 /* object should be on a queue */
5614 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
5615 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
5617 if (m
->object
->purgeable_when_ripe
) {
5618 vm_purgeable_token_delete_first(old_queue
);
5620 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
5623 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5624 * accounted in the "volatile" ledger, so no change here.
5625 * We have to update vm_page_purgeable_count, though, since we're
5626 * effectively purging this object.
5629 assert(m
->object
->resident_page_count
>= m
->object
->wired_page_count
);
5630 delta
= (m
->object
->resident_page_count
- m
->object
->wired_page_count
);
5631 assert(vm_page_purgeable_count
>= delta
);
5633 OSAddAtomic(-delta
, (SInt32
*)&vm_page_purgeable_count
);
5638 #if MACH_ASSERT || DEBUG
5639 vm_object_unlock(object
);
5640 #endif /* MACH_ASSERT || DEBUG */
5644 Grab locks for hibernate_page_list_setall()
5647 hibernate_vm_lock_queues(void)
5649 vm_object_lock(compressor_object
);
5650 vm_page_lock_queues();
5651 lck_mtx_lock(&vm_page_queue_free_lock
);
5653 if (vm_page_local_q
) {
5655 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5657 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5658 VPL_LOCK(&lq
->vpl_lock
);
5664 hibernate_vm_unlock_queues(void)
5666 if (vm_page_local_q
) {
5668 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5670 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5671 VPL_UNLOCK(&lq
->vpl_lock
);
5674 lck_mtx_unlock(&vm_page_queue_free_lock
);
5675 vm_page_unlock_queues();
5676 vm_object_unlock(compressor_object
);
5680 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5681 pages known to VM to not need saving are subtracted.
5682 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5686 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
5687 hibernate_page_list_t
* page_list_wired
,
5688 hibernate_page_list_t
* page_list_pal
,
5689 boolean_t preflight
,
5690 boolean_t will_discard
,
5691 uint32_t * pagesOut
)
5693 uint64_t start
, end
, nsec
;
5696 uint32_t pages
= page_list
->page_count
;
5697 uint32_t count_anonymous
= 0, count_throttled
= 0, count_compressor
= 0;
5698 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0, count_cleaned
= 0;
5699 uint32_t count_wire
= pages
;
5700 uint32_t count_discard_active
= 0;
5701 uint32_t count_discard_inactive
= 0;
5702 uint32_t count_discard_cleaned
= 0;
5703 uint32_t count_discard_purgeable
= 0;
5704 uint32_t count_discard_speculative
= 0;
5705 uint32_t count_discard_vm_struct_pages
= 0;
5708 hibernate_bitmap_t
* bitmap
;
5709 hibernate_bitmap_t
* bitmap_wired
;
5710 boolean_t discard_all
;
5713 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight
, page_list
, page_list_wired
);
5717 page_list_wired
= NULL
;
5718 page_list_pal
= NULL
;
5719 discard_all
= FALSE
;
5721 discard_all
= will_discard
;
5724 #if MACH_ASSERT || DEBUG
5727 vm_page_lock_queues();
5728 if (vm_page_local_q
) {
5729 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5731 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5732 VPL_LOCK(&lq
->vpl_lock
);
5736 #endif /* MACH_ASSERT || DEBUG */
5739 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
5741 clock_get_uptime(&start
);
5744 hibernate_page_list_zero(page_list
);
5745 hibernate_page_list_zero(page_list_wired
);
5746 hibernate_page_list_zero(page_list_pal
);
5748 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
5749 hibernate_stats
.cd_pages
= pages
;
5752 if (vm_page_local_q
) {
5753 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5754 vm_page_reactivate_local(i
, TRUE
, !preflight
);
5758 vm_object_lock(compressor_object
);
5759 vm_page_lock_queues();
5760 lck_mtx_lock(&vm_page_queue_free_lock
);
5763 m
= (vm_page_t
) hibernate_gobble_queue
;
5769 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5770 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5772 m
= (vm_page_t
) m
->pageq
.next
;
5775 if (!preflight
) for( i
= 0; i
< real_ncpus
; i
++ )
5777 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
5779 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
5783 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5784 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5786 hibernate_stats
.cd_local_free
++;
5787 hibernate_stats
.cd_total_free
++;
5792 for( i
= 0; i
< vm_colors
; i
++ )
5794 queue_iterate(&vm_page_queue_free
[i
],
5802 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5803 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5805 hibernate_stats
.cd_total_free
++;
5810 queue_iterate(&vm_lopage_queue_free
,
5818 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5819 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5821 hibernate_stats
.cd_total_free
++;
5825 m
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
5826 while (m
&& !queue_end(&vm_page_queue_throttled
, (queue_entry_t
)m
))
5828 next
= (vm_page_t
) m
->pageq
.next
;
5830 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5831 && hibernate_consider_discard(m
, preflight
))
5833 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5834 count_discard_inactive
++;
5835 discard
= discard_all
;
5840 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5842 if (discard
) hibernate_discard_page(m
);
5846 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
5847 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
5849 next
= (vm_page_t
) m
->pageq
.next
;
5851 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5852 && hibernate_consider_discard(m
, preflight
))
5854 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5856 count_discard_purgeable
++;
5858 count_discard_inactive
++;
5859 discard
= discard_all
;
5864 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5865 if (discard
) hibernate_discard_page(m
);
5869 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
5870 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
5872 next
= (vm_page_t
) m
->pageq
.next
;
5874 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5875 && hibernate_consider_discard(m
, preflight
))
5877 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5879 count_discard_purgeable
++;
5881 count_discard_cleaned
++;
5882 discard
= discard_all
;
5887 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5888 if (discard
) hibernate_discard_page(m
);
5892 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5893 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5895 next
= (vm_page_t
) m
->pageq
.next
;
5897 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
5898 && hibernate_consider_discard(m
, preflight
))
5900 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5902 count_discard_purgeable
++;
5904 count_discard_active
++;
5905 discard
= discard_all
;
5910 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5911 if (discard
) hibernate_discard_page(m
);
5915 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5916 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5918 next
= (vm_page_t
) m
->pageq
.next
;
5920 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5921 && hibernate_consider_discard(m
, preflight
))
5923 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5925 count_discard_purgeable
++;
5927 count_discard_inactive
++;
5928 discard
= discard_all
;
5933 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5934 if (discard
) hibernate_discard_page(m
);
5938 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5940 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5941 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5943 next
= (vm_page_t
) m
->pageq
.next
;
5945 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5946 && hibernate_consider_discard(m
, preflight
))
5948 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5949 count_discard_speculative
++;
5950 discard
= discard_all
;
5953 count_speculative
++;
5955 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5956 if (discard
) hibernate_discard_page(m
);
5961 queue_iterate(&compressor_object
->memq
, m
, vm_page_t
, listq
)
5965 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5968 if (preflight
== FALSE
&& discard_all
== TRUE
) {
5969 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5971 HIBLOG("hibernate_teardown started\n");
5972 count_discard_vm_struct_pages
= hibernate_teardown_vm_structs(page_list
, page_list_wired
);
5973 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages
);
5975 pages
-= count_discard_vm_struct_pages
;
5976 count_wire
-= count_discard_vm_struct_pages
;
5978 hibernate_stats
.cd_vm_struct_pages_unneeded
= count_discard_vm_struct_pages
;
5980 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
5984 // pull wired from hibernate_bitmap
5985 bitmap
= &page_list
->bank_bitmap
[0];
5986 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
5987 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
5989 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
5990 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
5991 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5992 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
5996 // machine dependent adjustments
5997 hibernate_page_list_setall_machine(page_list
, page_list_wired
, preflight
, &pages
);
6000 hibernate_stats
.cd_count_wire
= count_wire
;
6001 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+
6002 count_discard_speculative
+ count_discard_cleaned
+ count_discard_vm_struct_pages
;
6005 clock_get_uptime(&end
);
6006 absolutetime_to_nanoseconds(end
- start
, &nsec
);
6007 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
6009 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6010 pages
, count_wire
, count_active
, count_inactive
, count_cleaned
, count_speculative
, count_anonymous
, count_throttled
, count_compressor
, hibernate_stats
.cd_found_xpmapped
,
6011 discard_all
? "did" : "could",
6012 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
6014 if (hibernate_stats
.cd_skipped_xpmapped
)
6015 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats
.cd_skipped_xpmapped
);
6017 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
- count_discard_cleaned
;
6019 if (preflight
&& will_discard
) *pagesOut
-= count_compressor
+ count_throttled
+ count_anonymous
+ count_inactive
+ count_cleaned
+ count_speculative
+ count_active
;
6021 #if MACH_ASSERT || DEBUG
6024 if (vm_page_local_q
) {
6025 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6027 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6028 VPL_UNLOCK(&lq
->vpl_lock
);
6031 vm_page_unlock_queues();
6033 #endif /* MACH_ASSERT || DEBUG */
6036 lck_mtx_unlock(&vm_page_queue_free_lock
);
6037 vm_page_unlock_queues();
6038 vm_object_unlock(compressor_object
);
6041 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
6045 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
6047 uint64_t start
, end
, nsec
;
6051 uint32_t count_discard_active
= 0;
6052 uint32_t count_discard_inactive
= 0;
6053 uint32_t count_discard_purgeable
= 0;
6054 uint32_t count_discard_cleaned
= 0;
6055 uint32_t count_discard_speculative
= 0;
6058 #if MACH_ASSERT || DEBUG
6059 vm_page_lock_queues();
6060 if (vm_page_local_q
) {
6061 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6063 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6064 VPL_LOCK(&lq
->vpl_lock
);
6067 #endif /* MACH_ASSERT || DEBUG */
6069 clock_get_uptime(&start
);
6071 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
6072 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
6074 next
= (vm_page_t
) m
->pageq
.next
;
6075 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6078 count_discard_purgeable
++;
6080 count_discard_inactive
++;
6081 hibernate_discard_page(m
);
6086 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
6088 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
6089 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
6091 next
= (vm_page_t
) m
->pageq
.next
;
6092 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6094 count_discard_speculative
++;
6095 hibernate_discard_page(m
);
6101 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
6102 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
6104 next
= (vm_page_t
) m
->pageq
.next
;
6105 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6108 count_discard_purgeable
++;
6110 count_discard_inactive
++;
6111 hibernate_discard_page(m
);
6116 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
6117 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
6119 next
= (vm_page_t
) m
->pageq
.next
;
6120 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6123 count_discard_purgeable
++;
6125 count_discard_active
++;
6126 hibernate_discard_page(m
);
6131 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
6132 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
6134 next
= (vm_page_t
) m
->pageq
.next
;
6135 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6138 count_discard_purgeable
++;
6140 count_discard_cleaned
++;
6141 hibernate_discard_page(m
);
6146 #if MACH_ASSERT || DEBUG
6147 if (vm_page_local_q
) {
6148 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6150 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6151 VPL_UNLOCK(&lq
->vpl_lock
);
6154 vm_page_unlock_queues();
6155 #endif /* MACH_ASSERT || DEBUG */
6157 clock_get_uptime(&end
);
6158 absolutetime_to_nanoseconds(end
- start
, &nsec
);
6159 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6161 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
6164 boolean_t hibernate_paddr_map_inited
= FALSE
;
6165 boolean_t hibernate_rebuild_needed
= FALSE
;
6166 unsigned int hibernate_teardown_last_valid_compact_indx
= -1;
6167 vm_page_t hibernate_rebuild_hash_list
= NULL
;
6169 unsigned int hibernate_teardown_found_tabled_pages
= 0;
6170 unsigned int hibernate_teardown_found_created_pages
= 0;
6171 unsigned int hibernate_teardown_found_free_pages
= 0;
6172 unsigned int hibernate_teardown_vm_page_free_count
;
6175 struct ppnum_mapping
{
6176 struct ppnum_mapping
*ppnm_next
;
6177 ppnum_t ppnm_base_paddr
;
6178 unsigned int ppnm_sindx
;
6179 unsigned int ppnm_eindx
;
6182 struct ppnum_mapping
*ppnm_head
;
6183 struct ppnum_mapping
*ppnm_last_found
= NULL
;
6187 hibernate_create_paddr_map()
6190 ppnum_t next_ppnum_in_run
= 0;
6191 struct ppnum_mapping
*ppnm
= NULL
;
6193 if (hibernate_paddr_map_inited
== FALSE
) {
6195 for (i
= 0; i
< vm_pages_count
; i
++) {
6198 ppnm
->ppnm_eindx
= i
;
6200 if (ppnm
== NULL
|| vm_pages
[i
].phys_page
!= next_ppnum_in_run
) {
6202 ppnm
= kalloc(sizeof(struct ppnum_mapping
));
6204 ppnm
->ppnm_next
= ppnm_head
;
6207 ppnm
->ppnm_sindx
= i
;
6208 ppnm
->ppnm_base_paddr
= vm_pages
[i
].phys_page
;
6210 next_ppnum_in_run
= vm_pages
[i
].phys_page
+ 1;
6214 hibernate_paddr_map_inited
= TRUE
;
6219 hibernate_lookup_paddr(unsigned int indx
)
6221 struct ppnum_mapping
*ppnm
= NULL
;
6223 ppnm
= ppnm_last_found
;
6226 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
)
6229 for (ppnm
= ppnm_head
; ppnm
; ppnm
= ppnm
->ppnm_next
) {
6231 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
) {
6232 ppnm_last_found
= ppnm
;
6237 panic("hibernate_lookup_paddr of %d failed\n", indx
);
6239 return (ppnm
->ppnm_base_paddr
+ (indx
- ppnm
->ppnm_sindx
));
6244 hibernate_mark_as_unneeded(addr64_t saddr
, addr64_t eaddr
, hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6246 addr64_t saddr_aligned
;
6247 addr64_t eaddr_aligned
;
6250 unsigned int mark_as_unneeded_pages
= 0;
6252 saddr_aligned
= (saddr
+ PAGE_MASK_64
) & ~PAGE_MASK_64
;
6253 eaddr_aligned
= eaddr
& ~PAGE_MASK_64
;
6255 for (addr
= saddr_aligned
; addr
< eaddr_aligned
; addr
+= PAGE_SIZE_64
) {
6257 paddr
= pmap_find_phys(kernel_pmap
, addr
);
6261 hibernate_page_bitset(page_list
, TRUE
, paddr
);
6262 hibernate_page_bitset(page_list_wired
, TRUE
, paddr
);
6264 mark_as_unneeded_pages
++;
6266 return (mark_as_unneeded_pages
);
6271 hibernate_hash_insert_page(vm_page_t mem
)
6273 vm_page_bucket_t
*bucket
;
6276 assert(mem
->hashed
);
6277 assert(mem
->object
);
6278 assert(mem
->offset
!= (vm_object_offset_t
) -1);
6281 * Insert it into the object_object/offset hash table
6283 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
6284 bucket
= &vm_page_buckets
[hash_id
];
6286 mem
->next_m
= bucket
->page_list
;
6287 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
6292 hibernate_free_range(int sindx
, int eindx
)
6297 while (sindx
< eindx
) {
6298 mem
= &vm_pages
[sindx
];
6300 vm_page_init(mem
, hibernate_lookup_paddr(sindx
), FALSE
);
6302 mem
->lopage
= FALSE
;
6305 color
= mem
->phys_page
& vm_color_mask
;
6306 queue_enter_first(&vm_page_queue_free
[color
],
6310 vm_page_free_count
++;
6317 extern void hibernate_rebuild_pmap_structs(void);
6320 hibernate_rebuild_vm_structs(void)
6322 int cindx
, sindx
, eindx
;
6323 vm_page_t mem
, tmem
, mem_next
;
6324 AbsoluteTime startTime
, endTime
;
6327 if (hibernate_rebuild_needed
== FALSE
)
6330 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
6331 HIBLOG("hibernate_rebuild started\n");
6333 clock_get_uptime(&startTime
);
6335 hibernate_rebuild_pmap_structs();
6337 bzero(&vm_page_buckets
[0], vm_page_bucket_count
* sizeof(vm_page_bucket_t
));
6338 eindx
= vm_pages_count
;
6340 for (cindx
= hibernate_teardown_last_valid_compact_indx
; cindx
>= 0; cindx
--) {
6342 mem
= &vm_pages
[cindx
];
6344 * hibernate_teardown_vm_structs leaves the location where
6345 * this vm_page_t must be located in "next".
6347 tmem
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6348 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
6350 sindx
= (int)(tmem
- &vm_pages
[0]);
6354 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6355 * so move it back to its real location
6361 hibernate_hash_insert_page(mem
);
6363 * the 'hole' between this vm_page_t and the previous
6364 * vm_page_t we moved needs to be initialized as
6365 * a range of free vm_page_t's
6367 hibernate_free_range(sindx
+ 1, eindx
);
6372 hibernate_free_range(0, sindx
);
6374 assert(vm_page_free_count
== hibernate_teardown_vm_page_free_count
);
6377 * process the list of vm_page_t's that were entered in the hash,
6378 * but were not located in the vm_pages arrary... these are
6379 * vm_page_t's that were created on the fly (i.e. fictitious)
6381 for (mem
= hibernate_rebuild_hash_list
; mem
; mem
= mem_next
) {
6382 mem_next
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6384 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
6385 hibernate_hash_insert_page(mem
);
6387 hibernate_rebuild_hash_list
= NULL
;
6389 clock_get_uptime(&endTime
);
6390 SUB_ABSOLUTETIME(&endTime
, &startTime
);
6391 absolutetime_to_nanoseconds(endTime
, &nsec
);
6393 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec
/ 1000000ULL);
6395 hibernate_rebuild_needed
= FALSE
;
6397 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
6401 extern void hibernate_teardown_pmap_structs(addr64_t
*, addr64_t
*);
6404 hibernate_teardown_vm_structs(hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6407 unsigned int compact_target_indx
;
6408 vm_page_t mem
, mem_next
;
6409 vm_page_bucket_t
*bucket
;
6410 unsigned int mark_as_unneeded_pages
= 0;
6411 unsigned int unneeded_vm_page_bucket_pages
= 0;
6412 unsigned int unneeded_vm_pages_pages
= 0;
6413 unsigned int unneeded_pmap_pages
= 0;
6414 addr64_t start_of_unneeded
= 0;
6415 addr64_t end_of_unneeded
= 0;
6418 if (hibernate_should_abort())
6421 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6422 vm_page_wire_count
, vm_page_free_count
, vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
,
6423 vm_page_cleaned_count
, compressor_object
->resident_page_count
);
6425 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6427 bucket
= &vm_page_buckets
[i
];
6429 for (mem
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); mem
!= VM_PAGE_NULL
; mem
= mem_next
) {
6430 assert(mem
->hashed
);
6432 mem_next
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6434 if (mem
< &vm_pages
[0] || mem
>= &vm_pages
[vm_pages_count
]) {
6435 mem
->next_m
= VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list
);
6436 hibernate_rebuild_hash_list
= mem
;
6440 unneeded_vm_page_bucket_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_page_buckets
[0], (addr64_t
)&vm_page_buckets
[vm_page_bucket_count
], page_list
, page_list_wired
);
6441 mark_as_unneeded_pages
+= unneeded_vm_page_bucket_pages
;
6443 hibernate_teardown_vm_page_free_count
= vm_page_free_count
;
6445 compact_target_indx
= 0;
6447 for (i
= 0; i
< vm_pages_count
; i
++) {
6455 assert(!mem
->lopage
);
6457 color
= mem
->phys_page
& vm_color_mask
;
6459 queue_remove(&vm_page_queue_free
[color
],
6463 mem
->pageq
.next
= NULL
;
6464 mem
->pageq
.prev
= NULL
;
6466 vm_page_free_count
--;
6468 hibernate_teardown_found_free_pages
++;
6470 if ( !vm_pages
[compact_target_indx
].free
)
6471 compact_target_indx
= i
;
6474 * record this vm_page_t's original location
6475 * we need this even if it doesn't get moved
6476 * as an indicator to the rebuild function that
6477 * we don't have to move it
6479 mem
->next_m
= VM_PAGE_PACK_PTR(mem
);
6481 if (vm_pages
[compact_target_indx
].free
) {
6483 * we've got a hole to fill, so
6484 * move this vm_page_t to it's new home
6486 vm_pages
[compact_target_indx
] = *mem
;
6489 hibernate_teardown_last_valid_compact_indx
= compact_target_indx
;
6490 compact_target_indx
++;
6492 hibernate_teardown_last_valid_compact_indx
= i
;
6495 unneeded_vm_pages_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_pages
[hibernate_teardown_last_valid_compact_indx
+1],
6496 (addr64_t
)&vm_pages
[vm_pages_count
-1], page_list
, page_list_wired
);
6497 mark_as_unneeded_pages
+= unneeded_vm_pages_pages
;
6499 hibernate_teardown_pmap_structs(&start_of_unneeded
, &end_of_unneeded
);
6501 if (start_of_unneeded
) {
6502 unneeded_pmap_pages
= hibernate_mark_as_unneeded(start_of_unneeded
, end_of_unneeded
, page_list
, page_list_wired
);
6503 mark_as_unneeded_pages
+= unneeded_pmap_pages
;
6505 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages
, unneeded_vm_pages_pages
, unneeded_pmap_pages
);
6507 hibernate_rebuild_needed
= TRUE
;
6509 return (mark_as_unneeded_pages
);
6513 #endif /* HIBERNATION */
6515 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6517 #include <mach_vm_debug.h>
6520 #include <mach_debug/hash_info.h>
6521 #include <vm/vm_debug.h>
6524 * Routine: vm_page_info
6526 * Return information about the global VP table.
6527 * Fills the buffer with as much information as possible
6528 * and returns the desired size of the buffer.
6530 * Nothing locked. The caller should provide
6531 * possibly-pageable memory.
6536 hash_info_bucket_t
*info
,
6540 lck_spin_t
*bucket_lock
;
6542 if (vm_page_bucket_count
< count
)
6543 count
= vm_page_bucket_count
;
6545 for (i
= 0; i
< count
; i
++) {
6546 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
6547 unsigned int bucket_count
= 0;
6550 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6551 lck_spin_lock(bucket_lock
);
6553 for (m
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); m
!= VM_PAGE_NULL
; m
= VM_PAGE_UNPACK_PTR(m
->next_m
))
6556 lck_spin_unlock(bucket_lock
);
6558 /* don't touch pageable memory while holding locks */
6559 info
[i
].hib_count
= bucket_count
;
6562 return vm_page_bucket_count
;
6564 #endif /* MACH_VM_DEBUG */
6566 #if VM_PAGE_BUCKETS_CHECK
6568 vm_page_buckets_check(void)
6572 unsigned int p_hash
;
6573 vm_page_bucket_t
*bucket
;
6574 lck_spin_t
*bucket_lock
;
6576 if (!vm_page_buckets_check_ready
) {
6581 if (hibernate_rebuild_needed
||
6582 hibernate_rebuild_hash_list
) {
6583 panic("BUCKET_CHECK: hibernation in progress: "
6584 "rebuild_needed=%d rebuild_hash_list=%p\n",
6585 hibernate_rebuild_needed
,
6586 hibernate_rebuild_hash_list
);
6588 #endif /* HIBERNATION */
6590 #if VM_PAGE_FAKE_BUCKETS
6592 for (cp
= (char *) vm_page_fake_buckets_start
;
6593 cp
< (char *) vm_page_fake_buckets_end
;
6596 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6597 "[0x%llx:0x%llx]\n",
6599 (uint64_t) vm_page_fake_buckets_start
,
6600 (uint64_t) vm_page_fake_buckets_end
);
6603 #endif /* VM_PAGE_FAKE_BUCKETS */
6605 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6606 bucket
= &vm_page_buckets
[i
];
6607 if (!bucket
->page_list
) {
6611 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6612 lck_spin_lock(bucket_lock
);
6613 p
= VM_PAGE_UNPACK_PTR(bucket
->page_list
);
6614 while (p
!= VM_PAGE_NULL
) {
6616 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6617 "hash %d in bucket %d at %p "
6619 p
, p
->object
, p
->offset
,
6622 p_hash
= vm_page_hash(p
->object
, p
->offset
);
6624 panic("BUCKET_CHECK: corruption in bucket %d "
6625 "at %p: page %p object %p offset 0x%llx "
6627 i
, bucket
, p
, p
->object
, p
->offset
,
6630 p
= VM_PAGE_UNPACK_PTR(p
->next_m
);
6632 lck_spin_unlock(bucket_lock
);
6635 // printf("BUCKET_CHECK: checked buckets\n");
6637 #endif /* VM_PAGE_BUCKETS_CHECK */