2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
79 #include <kern/ledger.h>
81 #include <vm/vm_init.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
86 #include <kern/misc_protos.h>
87 #include <zone_debug.h>
89 #include <pexpert/pexpert.h>
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94 #include <vm/vm_compressor.h>
96 #if CONFIG_PHANTOM_CACHE
97 #include <vm/vm_phantom_cache.h>
100 #include <IOKit/IOHibernatePrivate.h>
102 #include <sys/kdebug.h>
104 boolean_t hibernate_cleaning_in_progress
= FALSE
;
105 boolean_t vm_page_free_verify
= TRUE
;
107 uint32_t vm_lopage_free_count
= 0;
108 uint32_t vm_lopage_free_limit
= 0;
109 uint32_t vm_lopage_lowater
= 0;
110 boolean_t vm_lopage_refill
= FALSE
;
111 boolean_t vm_lopage_needed
= FALSE
;
113 lck_mtx_ext_t vm_page_queue_lock_ext
;
114 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
115 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
117 int speculative_age_index
= 0;
118 int speculative_steal_index
= 0;
119 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
122 __private_extern__
void vm_page_init_lck_grp(void);
124 static void vm_page_free_prepare(vm_page_t page
);
125 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
131 * Associated with page of user-allocatable memory is a
136 * These variables record the values returned by vm_page_bootstrap,
137 * for debugging purposes. The implementation of pmap_steal_memory
138 * and pmap_startup here also uses them internally.
141 vm_offset_t virtual_space_start
;
142 vm_offset_t virtual_space_end
;
143 uint32_t vm_page_pages
;
146 * The vm_page_lookup() routine, which provides for fast
147 * (virtual memory object, offset) to page lookup, employs
148 * the following hash table. The vm_page_{insert,remove}
149 * routines install and remove associations in the table.
150 * [This table is often called the virtual-to-physical,
154 vm_page_packed_t page_list
;
155 #if MACH_PAGE_HASH_STATS
156 int cur_count
; /* current count */
157 int hi_count
; /* high water mark */
158 #endif /* MACH_PAGE_HASH_STATS */
162 #define BUCKETS_PER_LOCK 16
164 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
165 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
166 unsigned int vm_page_hash_mask
; /* Mask for hash function */
167 unsigned int vm_page_hash_shift
; /* Shift for hash function */
168 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
169 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
171 lck_spin_t
*vm_page_bucket_locks
;
173 #if VM_PAGE_BUCKETS_CHECK
174 boolean_t vm_page_buckets_check_ready
= FALSE
;
175 #if VM_PAGE_FAKE_BUCKETS
176 vm_page_bucket_t
*vm_page_fake_buckets
; /* decoy buckets */
177 vm_map_offset_t vm_page_fake_buckets_start
, vm_page_fake_buckets_end
;
178 #endif /* VM_PAGE_FAKE_BUCKETS */
179 #endif /* VM_PAGE_BUCKETS_CHECK */
181 #if MACH_PAGE_HASH_STATS
182 /* This routine is only for debug. It is intended to be called by
183 * hand by a developer using a kernel debugger. This routine prints
184 * out vm_page_hash table statistics to the kernel debug console.
194 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
195 if (vm_page_buckets
[i
].hi_count
) {
197 highsum
+= vm_page_buckets
[i
].hi_count
;
198 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
199 maxdepth
= vm_page_buckets
[i
].hi_count
;
202 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
203 printf("Number used buckets: %d = %d%%\n",
204 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
205 printf("Number unused buckets: %d = %d%%\n",
206 vm_page_bucket_count
- numbuckets
,
207 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
208 printf("Sum of bucket max depth: %d\n", highsum
);
209 printf("Average bucket depth: %d.%2d\n",
210 highsum
/vm_page_bucket_count
,
211 highsum%vm_page_bucket_count
);
212 printf("Maximum bucket depth: %d\n", maxdepth
);
214 #endif /* MACH_PAGE_HASH_STATS */
217 * The virtual page size is currently implemented as a runtime
218 * variable, but is constant once initialized using vm_set_page_size.
219 * This initialization must be done in the machine-dependent
220 * bootstrap sequence, before calling other machine-independent
223 * All references to the virtual page size outside this
224 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
227 vm_size_t page_size
= PAGE_SIZE
;
228 vm_size_t page_mask
= PAGE_MASK
;
229 int page_shift
= PAGE_SHIFT
;
232 * Resident page structures are initialized from
233 * a template (see vm_page_alloc).
235 * When adding a new field to the virtual memory
236 * object structure, be sure to add initialization
237 * (see vm_page_bootstrap).
239 struct vm_page vm_page_template
;
241 vm_page_t vm_pages
= VM_PAGE_NULL
;
242 unsigned int vm_pages_count
= 0;
243 ppnum_t vm_page_lowest
= 0;
246 * Resident pages that represent real memory
247 * are allocated from a set of free lists,
250 unsigned int vm_colors
;
251 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
252 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
253 unsigned int vm_free_magazine_refill_limit
= 0;
254 queue_head_t vm_page_queue_free
[MAX_COLORS
];
255 unsigned int vm_page_free_wanted
;
256 unsigned int vm_page_free_wanted_privileged
;
257 unsigned int vm_page_free_count
;
258 unsigned int vm_page_fictitious_count
;
261 * Occasionally, the virtual memory system uses
262 * resident page structures that do not refer to
263 * real pages, for example to leave a page with
264 * important state information in the VP table.
266 * These page structures are allocated the way
267 * most other kernel structures are.
270 vm_locks_array_t vm_page_locks
;
271 decl_lck_mtx_data(,vm_page_alloc_lock
)
272 lck_mtx_ext_t vm_page_alloc_lock_ext
;
274 unsigned int io_throttle_zero_fill
;
276 unsigned int vm_page_local_q_count
= 0;
277 unsigned int vm_page_local_q_soft_limit
= 250;
278 unsigned int vm_page_local_q_hard_limit
= 500;
279 struct vplq
*vm_page_local_q
= NULL
;
281 /* N.B. Guard and fictitious pages must not
282 * be assigned a zero phys_page value.
285 * Fictitious pages don't have a physical address,
286 * but we must initialize phys_page to something.
287 * For debugging, this should be a strange value
288 * that the pmap module can recognize in assertions.
290 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
293 * Guard pages are not accessible so they don't
294 * need a physical address, but we need to enter
296 * Let's make it recognizable and make sure that
297 * we don't use a real physical page with that
300 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
303 * Resident page structures are also chained on
304 * queues that are used by the page replacement
305 * system (pageout daemon). These queues are
306 * defined here, but are shared by the pageout
307 * module. The inactive queue is broken into
308 * file backed and anonymous for convenience as the
309 * pageout daemon often assignes a higher
310 * importance to anonymous pages (less likely to pick)
312 queue_head_t vm_page_queue_active
;
313 queue_head_t vm_page_queue_inactive
;
314 queue_head_t vm_page_queue_anonymous
; /* inactive memory queue for anonymous pages */
315 queue_head_t vm_page_queue_throttled
;
317 unsigned int vm_page_active_count
;
318 unsigned int vm_page_inactive_count
;
319 unsigned int vm_page_anonymous_count
;
320 unsigned int vm_page_throttled_count
;
321 unsigned int vm_page_speculative_count
;
322 unsigned int vm_page_wire_count
;
323 unsigned int vm_page_wire_count_initial
;
324 unsigned int vm_page_gobble_count
= 0;
326 #define VM_PAGE_WIRE_COUNT_WARNING 0
327 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
329 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
330 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
331 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
333 unsigned int vm_page_xpmapped_external_count
= 0;
334 unsigned int vm_page_external_count
= 0;
335 unsigned int vm_page_internal_count
= 0;
336 unsigned int vm_page_pageable_external_count
= 0;
337 unsigned int vm_page_pageable_internal_count
= 0;
339 #if DEVELOPMENT || DEBUG
340 unsigned int vm_page_speculative_recreated
= 0;
341 unsigned int vm_page_speculative_created
= 0;
342 unsigned int vm_page_speculative_used
= 0;
345 queue_head_t vm_page_queue_cleaned
;
347 unsigned int vm_page_cleaned_count
= 0;
348 unsigned int vm_pageout_enqueued_cleaned
= 0;
350 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
351 ppnum_t max_valid_low_ppnum
= 0xffffffff;
355 * Several page replacement parameters are also
356 * shared with this module, so that page allocation
357 * (done here in vm_page_alloc) can trigger the
360 unsigned int vm_page_free_target
= 0;
361 unsigned int vm_page_free_min
= 0;
362 unsigned int vm_page_throttle_limit
= 0;
363 uint32_t vm_page_creation_throttle
= 0;
364 unsigned int vm_page_inactive_target
= 0;
365 unsigned int vm_page_anonymous_min
= 0;
366 unsigned int vm_page_inactive_min
= 0;
367 unsigned int vm_page_free_reserved
= 0;
368 unsigned int vm_page_throttle_count
= 0;
372 * The VM system has a couple of heuristics for deciding
373 * that pages are "uninteresting" and should be placed
374 * on the inactive queue as likely candidates for replacement.
375 * These variables let the heuristics be controlled at run-time
376 * to make experimentation easier.
379 boolean_t vm_page_deactivate_hint
= TRUE
;
381 struct vm_page_stats_reusable vm_page_stats_reusable
;
386 * Sets the page size, perhaps based upon the memory
387 * size. Must be called before any use of page-size
388 * dependent functions.
390 * Sets page_shift and page_mask from page_size.
393 vm_set_page_size(void)
395 page_size
= PAGE_SIZE
;
396 page_mask
= PAGE_MASK
;
397 page_shift
= PAGE_SHIFT
;
399 if ((page_mask
& page_size
) != 0)
400 panic("vm_set_page_size: page size not a power of two");
402 for (page_shift
= 0; ; page_shift
++)
403 if ((1U << page_shift
) == page_size
)
407 #define COLOR_GROUPS_TO_STEAL 4
410 /* Called once during statup, once the cache geometry is known.
413 vm_page_set_colors( void )
415 unsigned int n
, override
;
417 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
419 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
420 n
= vm_cache_geometry_colors
;
421 else n
= DEFAULT_COLORS
; /* use default if all else fails */
425 if ( n
> MAX_COLORS
)
428 /* the count must be a power of 2 */
429 if ( ( n
& (n
- 1)) != 0 )
430 panic("vm_page_set_colors");
433 vm_color_mask
= n
- 1;
435 vm_free_magazine_refill_limit
= vm_colors
* COLOR_GROUPS_TO_STEAL
;
439 lck_grp_t vm_page_lck_grp_free
;
440 lck_grp_t vm_page_lck_grp_queue
;
441 lck_grp_t vm_page_lck_grp_local
;
442 lck_grp_t vm_page_lck_grp_purge
;
443 lck_grp_t vm_page_lck_grp_alloc
;
444 lck_grp_t vm_page_lck_grp_bucket
;
445 lck_grp_attr_t vm_page_lck_grp_attr
;
446 lck_attr_t vm_page_lck_attr
;
449 __private_extern__
void
450 vm_page_init_lck_grp(void)
453 * initialze the vm_page lock world
455 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
456 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
457 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
458 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
459 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
460 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
461 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
462 lck_attr_setdefault(&vm_page_lck_attr
);
463 lck_mtx_init_ext(&vm_page_alloc_lock
, &vm_page_alloc_lock_ext
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
465 vm_compressor_init_locks();
469 vm_page_init_local_q()
471 unsigned int num_cpus
;
473 struct vplq
*t_local_q
;
475 num_cpus
= ml_get_max_cpus();
478 * no point in this for a uni-processor system
481 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
483 for (i
= 0; i
< num_cpus
; i
++) {
486 lq
= &t_local_q
[i
].vpl_un
.vpl
;
487 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
488 queue_init(&lq
->vpl_queue
);
490 lq
->vpl_internal_count
= 0;
491 lq
->vpl_external_count
= 0;
493 vm_page_local_q_count
= num_cpus
;
495 vm_page_local_q
= (struct vplq
*)t_local_q
;
503 * Initializes the resident memory module.
505 * Allocates memory for the page cells, and
506 * for the object/offset-to-page hash table headers.
507 * Each page cell is initialized and placed on the free list.
508 * Returns the range of available kernel virtual memory.
516 register vm_page_t m
;
523 * Initialize the vm_page template.
526 m
= &vm_page_template
;
527 bzero(m
, sizeof (*m
));
529 m
->pageq
.next
= NULL
;
530 m
->pageq
.prev
= NULL
;
531 m
->listq
.next
= NULL
;
532 m
->listq
.prev
= NULL
;
533 m
->next_m
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
535 m
->object
= VM_OBJECT_NULL
; /* reset later */
536 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
542 m
->pageout_queue
= FALSE
;
543 m
->speculative
= FALSE
;
546 m
->reference
= FALSE
;
549 m
->throttled
= FALSE
;
550 m
->__unused_pageq_bits
= 0;
552 m
->phys_page
= 0; /* reset later */
558 m
->fictitious
= FALSE
;
567 m
->clustered
= FALSE
;
568 m
->overwriting
= FALSE
;
571 m
->encrypted
= FALSE
;
572 m
->encrypted_cleaning
= FALSE
;
573 m
->cs_validated
= FALSE
;
574 m
->cs_tainted
= FALSE
;
579 m
->compressor
= FALSE
;
580 m
->written_by_kernel
= FALSE
;
581 m
->__unused_object_bits
= 0;
584 * Initialize the page queues.
586 vm_page_init_lck_grp();
588 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
589 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
590 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
592 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
595 purgeable_queues
[i
].token_q_head
= 0;
596 purgeable_queues
[i
].token_q_tail
= 0;
597 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
598 queue_init(&purgeable_queues
[i
].objq
[group
]);
600 purgeable_queues
[i
].type
= i
;
601 purgeable_queues
[i
].new_pages
= 0;
603 purgeable_queues
[i
].debug_count_tokens
= 0;
604 purgeable_queues
[i
].debug_count_objects
= 0;
607 purgeable_nonvolatile_count
= 0;
608 queue_init(&purgeable_nonvolatile_queue
);
610 for (i
= 0; i
< MAX_COLORS
; i
++ )
611 queue_init(&vm_page_queue_free
[i
]);
613 queue_init(&vm_lopage_queue_free
);
614 queue_init(&vm_page_queue_active
);
615 queue_init(&vm_page_queue_inactive
);
616 queue_init(&vm_page_queue_cleaned
);
617 queue_init(&vm_page_queue_throttled
);
618 queue_init(&vm_page_queue_anonymous
);
620 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
621 queue_init(&vm_page_queue_speculative
[i
].age_q
);
623 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
624 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
626 vm_page_free_wanted
= 0;
627 vm_page_free_wanted_privileged
= 0;
629 vm_page_set_colors();
633 * Steal memory for the map and zone subsystems.
635 kernel_debug_string("zone_steal_memory");
637 kernel_debug_string("vm_map_steal_memory");
638 vm_map_steal_memory();
641 * Allocate (and initialize) the virtual-to-physical
642 * table hash buckets.
644 * The number of buckets should be a power of two to
645 * get a good hash function. The following computation
646 * chooses the first power of two that is greater
647 * than the number of physical pages in the system.
650 if (vm_page_bucket_count
== 0) {
651 unsigned int npages
= pmap_free_pages();
653 vm_page_bucket_count
= 1;
654 while (vm_page_bucket_count
< npages
)
655 vm_page_bucket_count
<<= 1;
657 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
659 vm_page_hash_mask
= vm_page_bucket_count
- 1;
662 * Calculate object shift value for hashing algorithm:
663 * O = log2(sizeof(struct vm_object))
664 * B = log2(vm_page_bucket_count)
665 * hash shifts the object left by
668 size
= vm_page_bucket_count
;
669 for (log1
= 0; size
> 1; log1
++)
671 size
= sizeof(struct vm_object
);
672 for (log2
= 0; size
> 1; log2
++)
674 vm_page_hash_shift
= log1
/2 - log2
+ 1;
676 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
677 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
678 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
680 if (vm_page_hash_mask
& vm_page_bucket_count
)
681 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
683 #if VM_PAGE_BUCKETS_CHECK
684 #if VM_PAGE_FAKE_BUCKETS
686 * Allocate a decoy set of page buckets, to detect
687 * any stomping there.
689 vm_page_fake_buckets
= (vm_page_bucket_t
*)
690 pmap_steal_memory(vm_page_bucket_count
*
691 sizeof(vm_page_bucket_t
));
692 vm_page_fake_buckets_start
= (vm_map_offset_t
) vm_page_fake_buckets
;
693 vm_page_fake_buckets_end
=
694 vm_map_round_page((vm_page_fake_buckets_start
+
695 (vm_page_bucket_count
*
696 sizeof (vm_page_bucket_t
))),
699 for (cp
= (char *)vm_page_fake_buckets_start
;
700 cp
< (char *)vm_page_fake_buckets_end
;
704 #endif /* VM_PAGE_FAKE_BUCKETS */
705 #endif /* VM_PAGE_BUCKETS_CHECK */
707 kernel_debug_string("vm_page_buckets");
708 vm_page_buckets
= (vm_page_bucket_t
*)
709 pmap_steal_memory(vm_page_bucket_count
*
710 sizeof(vm_page_bucket_t
));
712 kernel_debug_string("vm_page_bucket_locks");
713 vm_page_bucket_locks
= (lck_spin_t
*)
714 pmap_steal_memory(vm_page_bucket_lock_count
*
717 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
718 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
720 bucket
->page_list
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
721 #if MACH_PAGE_HASH_STATS
722 bucket
->cur_count
= 0;
723 bucket
->hi_count
= 0;
724 #endif /* MACH_PAGE_HASH_STATS */
727 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
728 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
730 #if VM_PAGE_BUCKETS_CHECK
731 vm_page_buckets_check_ready
= TRUE
;
732 #endif /* VM_PAGE_BUCKETS_CHECK */
735 * Machine-dependent code allocates the resident page table.
736 * It uses vm_page_init to initialize the page frames.
737 * The code also returns to us the virtual space available
738 * to the kernel. We don't trust the pmap module
739 * to get the alignment right.
742 kernel_debug_string("pmap_startup");
743 pmap_startup(&virtual_space_start
, &virtual_space_end
);
744 virtual_space_start
= round_page(virtual_space_start
);
745 virtual_space_end
= trunc_page(virtual_space_end
);
747 *startp
= virtual_space_start
;
748 *endp
= virtual_space_end
;
751 * Compute the initial "wire" count.
752 * Up until now, the pages which have been set aside are not under
753 * the VM system's control, so although they aren't explicitly
754 * wired, they nonetheless can't be moved. At this moment,
755 * all VM managed pages are "free", courtesy of pmap_startup.
757 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
758 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
759 vm_page_wire_count_initial
= vm_page_wire_count
;
761 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
762 vm_page_free_count
, vm_page_wire_count
);
764 kernel_debug_string("vm_page_bootstrap complete");
765 simple_lock_init(&vm_paging_lock
, 0);
768 #ifndef MACHINE_PAGES
770 * We implement pmap_steal_memory and pmap_startup with the help
771 * of two simpler functions, pmap_virtual_space and pmap_next_page.
778 vm_offset_t addr
, vaddr
;
782 * We round the size to a round multiple.
785 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
788 * If this is the first call to pmap_steal_memory,
789 * we have to initialize ourself.
792 if (virtual_space_start
== virtual_space_end
) {
793 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
796 * The initial values must be aligned properly, and
797 * we don't trust the pmap module to do it right.
800 virtual_space_start
= round_page(virtual_space_start
);
801 virtual_space_end
= trunc_page(virtual_space_end
);
805 * Allocate virtual memory for this request.
808 addr
= virtual_space_start
;
809 virtual_space_start
+= size
;
811 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
814 * Allocate and map physical pages to back new virtual pages.
817 for (vaddr
= round_page(addr
);
819 vaddr
+= PAGE_SIZE
) {
821 if (!pmap_next_page_hi(&phys_page
))
822 panic("pmap_steal_memory");
825 * XXX Logically, these mappings should be wired,
826 * but some pmap modules barf if they are.
828 #if defined(__LP64__)
829 pmap_pre_expand(kernel_pmap
, vaddr
);
832 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
833 VM_PROT_READ
|VM_PROT_WRITE
, VM_PROT_NONE
,
834 VM_WIMG_USE_DEFAULT
, FALSE
);
836 * Account for newly stolen memory
838 vm_page_wire_count
++;
842 return (void *) addr
;
845 void vm_page_release_startup(vm_page_t mem
);
851 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
856 #if defined(__LP64__)
858 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
860 assert(sizeof(struct vm_page
) == 64);
863 * make sure we are aligned on a 64 byte boundary
864 * for VM_PAGE_PACK_PTR (it clips off the low-order
865 * 6 bits of the pointer)
867 if (virtual_space_start
!= virtual_space_end
)
868 virtual_space_start
= round_page(virtual_space_start
);
872 * We calculate how many page frames we will have
873 * and then allocate the page structures in one chunk.
876 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
877 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
878 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
880 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
883 * Initialize the page frames.
885 kernel_debug_string("Initialize the page frames");
886 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
887 if (!pmap_next_page(&phys_page
))
889 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
890 vm_page_lowest
= phys_page
;
892 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
896 vm_pages_count
= pages_initialized
;
898 #if defined(__LP64__)
900 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[0])) != &vm_pages
[0])
901 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages
[0]);
903 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[vm_pages_count
-1])) != &vm_pages
[vm_pages_count
-1])
904 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages
[vm_pages_count
-1]);
906 kernel_debug_string("page fill/release");
908 * Check if we want to initialize pages to a known value
910 fill
= 0; /* Assume no fill */
911 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
913 /* This slows down booting the DEBUG kernel, particularly on
914 * large memory systems, but is worthwhile in deterministically
915 * trapping uninitialized memory usage.
919 fillval
= 0xDEB8F177;
923 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval
);
924 // -debug code remove
925 if (2 == vm_himemory_mode
) {
926 // free low -> high so high is preferred
927 for (i
= 1; i
<= pages_initialized
; i
++) {
928 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
929 vm_page_release_startup(&vm_pages
[i
- 1]);
933 // debug code remove-
936 * Release pages in reverse order so that physical pages
937 * initially get allocated in ascending addresses. This keeps
938 * the devices (which must address physical memory) happy if
939 * they require several consecutive pages.
941 for (i
= pages_initialized
; i
> 0; i
--) {
942 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
943 vm_page_release_startup(&vm_pages
[i
- 1]);
946 VM_CHECK_MEMORYSTATUS
;
950 vm_page_t xx
, xxo
, xxl
;
953 j
= 0; /* (BRINGUP) */
956 for( i
= 0; i
< vm_colors
; i
++ ) {
957 queue_iterate(&vm_page_queue_free
[i
],
960 pageq
) { /* BRINGUP */
962 if(j
> vm_page_free_count
) { /* (BRINGUP) */
963 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
966 l
= vm_page_free_count
- j
; /* (BRINGUP) */
967 k
= 0; /* (BRINGUP) */
969 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
971 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
973 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
974 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
975 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
983 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
984 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
991 * We have to re-align virtual_space_start,
992 * because pmap_steal_memory has been using it.
995 virtual_space_start
= round_page(virtual_space_start
);
997 *startp
= virtual_space_start
;
998 *endp
= virtual_space_end
;
1000 #endif /* MACHINE_PAGES */
1003 * Routine: vm_page_module_init
1005 * Second initialization pass, to be done after
1006 * the basic VM system is ready.
1009 vm_page_module_init(void)
1011 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
1012 0, PAGE_SIZE
, "vm pages");
1015 zone_debug_disable(vm_page_zone
);
1016 #endif /* ZONE_DEBUG */
1018 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
1019 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
1020 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
1021 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
1022 zone_change(vm_page_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
1024 * Adjust zone statistics to account for the real pages allocated
1025 * in vm_page_create(). [Q: is this really what we want?]
1027 vm_page_zone
->count
+= vm_page_pages
;
1028 vm_page_zone
->sum_count
+= vm_page_pages
;
1029 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
1033 * Routine: vm_page_create
1035 * After the VM system is up, machine-dependent code
1036 * may stumble across more physical memory. For example,
1037 * memory that it was reserving for a frame buffer.
1038 * vm_page_create turns this memory into available pages.
1049 for (phys_page
= start
;
1052 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
1054 vm_page_more_fictitious();
1056 m
->fictitious
= FALSE
;
1057 pmap_clear_noencrypt(phys_page
);
1067 * Distributes the object/offset key pair among hash buckets.
1069 * NOTE: The bucket count must be a power of 2
1071 #define vm_page_hash(object, offset) (\
1072 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1073 & vm_page_hash_mask)
1077 * vm_page_insert: [ internal use only ]
1079 * Inserts the given mem entry into the object/object-page
1080 * table and object list.
1082 * The object must be locked.
1088 vm_object_offset_t offset
)
1090 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
, FALSE
);
1094 vm_page_insert_internal(
1097 vm_object_offset_t offset
,
1098 boolean_t queues_lock_held
,
1099 boolean_t insert_in_hash
,
1100 boolean_t batch_pmap_op
)
1102 vm_page_bucket_t
*bucket
;
1103 lck_spin_t
*bucket_lock
;
1108 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1109 object
, offset
, mem
, 0,0);
1112 * we may not hold the page queue lock
1113 * so this check isn't safe to make
1118 assert(page_aligned(offset
));
1120 /* the vm_submap_object is only a placeholder for submaps */
1121 assert(object
!= vm_submap_object
);
1123 vm_object_lock_assert_exclusive(object
);
1125 lck_mtx_assert(&vm_page_queue_lock
,
1126 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1127 : LCK_MTX_ASSERT_NOTOWNED
);
1130 if (insert_in_hash
== TRUE
) {
1131 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1132 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1133 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1134 "already in (obj=%p,off=0x%llx)",
1135 mem
, object
, offset
, mem
->object
, mem
->offset
);
1137 assert(!object
->internal
|| offset
< object
->vo_size
);
1139 /* only insert "pageout" pages into "pageout" objects,
1140 * and normal pages into normal objects */
1141 assert(object
->pageout
== mem
->pageout
);
1143 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1146 * Record the object/offset pair in this page
1149 mem
->object
= object
;
1150 mem
->offset
= offset
;
1153 * Insert it into the object_object/offset hash table
1155 hash_id
= vm_page_hash(object
, offset
);
1156 bucket
= &vm_page_buckets
[hash_id
];
1157 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1159 lck_spin_lock(bucket_lock
);
1161 mem
->next_m
= bucket
->page_list
;
1162 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1163 assert(mem
== VM_PAGE_UNPACK_PTR(bucket
->page_list
));
1165 #if MACH_PAGE_HASH_STATS
1166 if (++bucket
->cur_count
> bucket
->hi_count
)
1167 bucket
->hi_count
= bucket
->cur_count
;
1168 #endif /* MACH_PAGE_HASH_STATS */
1170 lck_spin_unlock(bucket_lock
);
1174 unsigned int cache_attr
;
1176 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1178 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1179 PMAP_SET_CACHE_ATTR(mem
, object
, cache_attr
, batch_pmap_op
);
1183 * Now link into the object's list of backed pages.
1185 VM_PAGE_INSERT(mem
, object
);
1189 * Show that the object has one more resident page.
1192 object
->resident_page_count
++;
1193 if (VM_PAGE_WIRED(mem
)) {
1194 object
->wired_page_count
++;
1196 assert(object
->resident_page_count
>= object
->wired_page_count
);
1198 if (object
->internal
) {
1199 OSAddAtomic(1, &vm_page_internal_count
);
1201 OSAddAtomic(1, &vm_page_external_count
);
1205 * It wouldn't make sense to insert a "reusable" page in
1206 * an object (the page would have been marked "reusable" only
1207 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1208 * in the object at that time).
1209 * But a page could be inserted in a "all_reusable" object, if
1210 * something faults it in (a vm_read() from another task or a
1211 * "use-after-free" issue in user space, for example). It can
1212 * also happen if we're relocating a page from that object to
1213 * a different physical page during a physically-contiguous
1216 assert(!mem
->reusable
);
1217 if (mem
->object
->all_reusable
) {
1218 OSAddAtomic(+1, &vm_page_stats_reusable
.reusable_count
);
1221 if (object
->purgable
== VM_PURGABLE_DENY
) {
1224 owner
= object
->vo_purgeable_owner
;
1227 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1228 VM_PAGE_WIRED(mem
))) {
1229 /* more non-volatile bytes */
1230 ledger_credit(owner
->ledger
,
1231 task_ledgers
.purgeable_nonvolatile
,
1233 /* more footprint */
1234 ledger_credit(owner
->ledger
,
1235 task_ledgers
.phys_footprint
,
1239 (object
->purgable
== VM_PURGABLE_VOLATILE
||
1240 object
->purgable
== VM_PURGABLE_EMPTY
)) {
1241 assert(! VM_PAGE_WIRED(mem
));
1242 /* more volatile bytes */
1243 ledger_credit(owner
->ledger
,
1244 task_ledgers
.purgeable_volatile
,
1248 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1249 if (VM_PAGE_WIRED(mem
)) {
1250 OSAddAtomic(+1, &vm_page_purgeable_wired_count
);
1252 OSAddAtomic(+1, &vm_page_purgeable_count
);
1254 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1257 * This page belongs to a purged VM object but hasn't
1258 * been purged (because it was "busy").
1259 * It's in the "throttled" queue and hence not
1260 * visible to vm_pageout_scan(). Move it to a pageable
1261 * queue, so that it can eventually be reclaimed, instead
1262 * of lingering in the "empty" object.
1264 if (queues_lock_held
== FALSE
)
1265 vm_page_lockspin_queues();
1266 vm_page_deactivate(mem
);
1267 if (queues_lock_held
== FALSE
)
1268 vm_page_unlock_queues();
1271 #if VM_OBJECT_TRACKING_OP_MODIFIED
1272 if (vm_object_tracking_inited
&&
1274 object
->resident_page_count
== 0 &&
1275 object
->pager
== NULL
&&
1276 object
->shadow
!= NULL
&&
1277 object
->shadow
->copy
== object
) {
1278 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
1281 numsaved
=OSBacktrace(bt
, VM_OBJECT_TRACKING_BTDEPTH
);
1282 btlog_add_entry(vm_object_tracking_btlog
,
1284 VM_OBJECT_TRACKING_OP_MODIFIED
,
1288 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1294 * Exactly like vm_page_insert, except that we first
1295 * remove any existing page at the given offset in object.
1297 * The object must be locked.
1301 register vm_page_t mem
,
1302 register vm_object_t object
,
1303 register vm_object_offset_t offset
)
1305 vm_page_bucket_t
*bucket
;
1306 vm_page_t found_m
= VM_PAGE_NULL
;
1307 lck_spin_t
*bucket_lock
;
1312 * we don't hold the page queue lock
1313 * so this check isn't safe to make
1317 vm_object_lock_assert_exclusive(object
);
1318 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1319 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1320 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1321 "already in (obj=%p,off=0x%llx)",
1322 mem
, object
, offset
, mem
->object
, mem
->offset
);
1323 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1326 * Record the object/offset pair in this page
1329 mem
->object
= object
;
1330 mem
->offset
= offset
;
1333 * Insert it into the object_object/offset hash table,
1334 * replacing any page that might have been there.
1337 hash_id
= vm_page_hash(object
, offset
);
1338 bucket
= &vm_page_buckets
[hash_id
];
1339 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1341 lck_spin_lock(bucket_lock
);
1343 if (bucket
->page_list
) {
1344 vm_page_packed_t
*mp
= &bucket
->page_list
;
1345 vm_page_t m
= VM_PAGE_UNPACK_PTR(*mp
);
1348 if (m
->object
== object
&& m
->offset
== offset
) {
1350 * Remove old page from hash list
1359 } while ((m
= VM_PAGE_UNPACK_PTR(*mp
)));
1361 mem
->next_m
= bucket
->page_list
;
1363 mem
->next_m
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
1366 * insert new page at head of hash list
1368 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1371 lck_spin_unlock(bucket_lock
);
1375 * there was already a page at the specified
1376 * offset for this object... remove it from
1377 * the object and free it back to the free list
1379 vm_page_free_unlocked(found_m
, FALSE
);
1381 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
, FALSE
);
1385 * vm_page_remove: [ internal use only ]
1387 * Removes the given mem entry from the object/offset-page
1388 * table and the object page list.
1390 * The object must be locked.
1396 boolean_t remove_from_hash
)
1398 vm_page_bucket_t
*bucket
;
1400 lck_spin_t
*bucket_lock
;
1405 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1406 mem
->object
, mem
->offset
,
1409 vm_object_lock_assert_exclusive(mem
->object
);
1410 assert(mem
->tabled
);
1411 assert(!mem
->cleaning
);
1412 assert(!mem
->laundry
);
1415 * we don't hold the page queue lock
1416 * so this check isn't safe to make
1420 if (remove_from_hash
== TRUE
) {
1422 * Remove from the object_object/offset hash table
1424 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1425 bucket
= &vm_page_buckets
[hash_id
];
1426 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1428 lck_spin_lock(bucket_lock
);
1430 if ((this = VM_PAGE_UNPACK_PTR(bucket
->page_list
)) == mem
) {
1431 /* optimize for common case */
1433 bucket
->page_list
= mem
->next_m
;
1435 vm_page_packed_t
*prev
;
1437 for (prev
= &this->next_m
;
1438 (this = VM_PAGE_UNPACK_PTR(*prev
)) != mem
;
1439 prev
= &this->next_m
)
1441 *prev
= this->next_m
;
1443 #if MACH_PAGE_HASH_STATS
1444 bucket
->cur_count
--;
1445 #endif /* MACH_PAGE_HASH_STATS */
1446 mem
->hashed
= FALSE
;
1447 lck_spin_unlock(bucket_lock
);
1450 * Now remove from the object's list of backed pages.
1453 VM_PAGE_REMOVE(mem
);
1456 * And show that the object has one fewer resident
1460 assert(mem
->object
->resident_page_count
> 0);
1461 mem
->object
->resident_page_count
--;
1463 if (mem
->object
->internal
) {
1465 assert(vm_page_internal_count
);
1468 OSAddAtomic(-1, &vm_page_internal_count
);
1470 assert(vm_page_external_count
);
1471 OSAddAtomic(-1, &vm_page_external_count
);
1473 if (mem
->xpmapped
) {
1474 assert(vm_page_xpmapped_external_count
);
1475 OSAddAtomic(-1, &vm_page_xpmapped_external_count
);
1478 if (!mem
->object
->internal
&& (mem
->object
->objq
.next
|| mem
->object
->objq
.prev
)) {
1479 if (mem
->object
->resident_page_count
== 0)
1480 vm_object_cache_remove(mem
->object
);
1483 if (VM_PAGE_WIRED(mem
)) {
1484 assert(mem
->object
->wired_page_count
> 0);
1485 mem
->object
->wired_page_count
--;
1487 assert(mem
->object
->resident_page_count
>=
1488 mem
->object
->wired_page_count
);
1489 if (mem
->reusable
) {
1490 assert(mem
->object
->reusable_page_count
> 0);
1491 mem
->object
->reusable_page_count
--;
1492 assert(mem
->object
->reusable_page_count
<=
1493 mem
->object
->resident_page_count
);
1494 mem
->reusable
= FALSE
;
1495 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1496 vm_page_stats_reusable
.reused_remove
++;
1497 } else if (mem
->object
->all_reusable
) {
1498 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1499 vm_page_stats_reusable
.reused_remove
++;
1502 if (mem
->object
->purgable
== VM_PURGABLE_DENY
) {
1505 owner
= mem
->object
->vo_purgeable_owner
;
1508 (mem
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1509 VM_PAGE_WIRED(mem
))) {
1510 /* less non-volatile bytes */
1511 ledger_debit(owner
->ledger
,
1512 task_ledgers
.purgeable_nonvolatile
,
1514 /* less footprint */
1515 ledger_debit(owner
->ledger
,
1516 task_ledgers
.phys_footprint
,
1519 (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
1520 mem
->object
->purgable
== VM_PURGABLE_EMPTY
)) {
1521 assert(! VM_PAGE_WIRED(mem
));
1522 /* less volatile bytes */
1523 ledger_debit(owner
->ledger
,
1524 task_ledgers
.purgeable_volatile
,
1527 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1528 if (VM_PAGE_WIRED(mem
)) {
1529 assert(vm_page_purgeable_wired_count
> 0);
1530 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1532 assert(vm_page_purgeable_count
> 0);
1533 OSAddAtomic(-1, &vm_page_purgeable_count
);
1536 if (mem
->object
->set_cache_attr
== TRUE
)
1537 pmap_set_cache_attributes(mem
->phys_page
, 0);
1539 mem
->tabled
= FALSE
;
1540 mem
->object
= VM_OBJECT_NULL
;
1541 mem
->offset
= (vm_object_offset_t
) -1;
1548 * Returns the page associated with the object/offset
1549 * pair specified; if none is found, VM_PAGE_NULL is returned.
1551 * The object must be locked. No side effects.
1554 unsigned long vm_page_lookup_hint
= 0;
1555 unsigned long vm_page_lookup_hint_next
= 0;
1556 unsigned long vm_page_lookup_hint_prev
= 0;
1557 unsigned long vm_page_lookup_hint_miss
= 0;
1558 unsigned long vm_page_lookup_bucket_NULL
= 0;
1559 unsigned long vm_page_lookup_miss
= 0;
1565 vm_object_offset_t offset
)
1568 vm_page_bucket_t
*bucket
;
1570 lck_spin_t
*bucket_lock
;
1573 vm_object_lock_assert_held(object
);
1574 mem
= object
->memq_hint
;
1576 if (mem
!= VM_PAGE_NULL
) {
1577 assert(mem
->object
== object
);
1579 if (mem
->offset
== offset
) {
1580 vm_page_lookup_hint
++;
1583 qe
= queue_next(&mem
->listq
);
1585 if (! queue_end(&object
->memq
, qe
)) {
1586 vm_page_t next_page
;
1588 next_page
= (vm_page_t
) qe
;
1589 assert(next_page
->object
== object
);
1591 if (next_page
->offset
== offset
) {
1592 vm_page_lookup_hint_next
++;
1593 object
->memq_hint
= next_page
; /* new hint */
1597 qe
= queue_prev(&mem
->listq
);
1599 if (! queue_end(&object
->memq
, qe
)) {
1600 vm_page_t prev_page
;
1602 prev_page
= (vm_page_t
) qe
;
1603 assert(prev_page
->object
== object
);
1605 if (prev_page
->offset
== offset
) {
1606 vm_page_lookup_hint_prev
++;
1607 object
->memq_hint
= prev_page
; /* new hint */
1613 * Search the hash table for this object/offset pair
1615 hash_id
= vm_page_hash(object
, offset
);
1616 bucket
= &vm_page_buckets
[hash_id
];
1619 * since we hold the object lock, we are guaranteed that no
1620 * new pages can be inserted into this object... this in turn
1621 * guarantess that the page we're looking for can't exist
1622 * if the bucket it hashes to is currently NULL even when looked
1623 * at outside the scope of the hash bucket lock... this is a
1624 * really cheap optimiztion to avoid taking the lock
1626 if (!bucket
->page_list
) {
1627 vm_page_lookup_bucket_NULL
++;
1629 return (VM_PAGE_NULL
);
1631 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1633 lck_spin_lock(bucket_lock
);
1635 for (mem
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); mem
!= VM_PAGE_NULL
; mem
= VM_PAGE_UNPACK_PTR(mem
->next_m
)) {
1638 * we don't hold the page queue lock
1639 * so this check isn't safe to make
1643 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1646 lck_spin_unlock(bucket_lock
);
1648 if (mem
!= VM_PAGE_NULL
) {
1649 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1650 vm_page_lookup_hint_miss
++;
1652 assert(mem
->object
== object
);
1653 object
->memq_hint
= mem
;
1655 vm_page_lookup_miss
++;
1664 * Move the given memory entry from its
1665 * current object to the specified target object/offset.
1667 * The object must be locked.
1671 register vm_page_t mem
,
1672 register vm_object_t new_object
,
1673 vm_object_offset_t new_offset
,
1674 boolean_t encrypted_ok
)
1676 boolean_t internal_to_external
, external_to_internal
;
1678 assert(mem
->object
!= new_object
);
1682 * The encryption key is based on the page's memory object
1683 * (aka "pager") and paging offset. Moving the page to
1684 * another VM object changes its "pager" and "paging_offset"
1685 * so it has to be decrypted first, or we would lose the key.
1687 * One exception is VM object collapsing, where we transfer pages
1688 * from one backing object to its parent object. This operation also
1689 * transfers the paging information, so the <pager,paging_offset> info
1690 * should remain consistent. The caller (vm_object_do_collapse())
1691 * sets "encrypted_ok" in this case.
1693 if (!encrypted_ok
&& mem
->encrypted
) {
1694 panic("vm_page_rename: page %p is encrypted\n", mem
);
1698 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1699 new_object
, new_offset
,
1703 * Changes to mem->object require the page lock because
1704 * the pageout daemon uses that lock to get the object.
1706 vm_page_lockspin_queues();
1708 internal_to_external
= FALSE
;
1709 external_to_internal
= FALSE
;
1713 * it's much easier to get the vm_page_pageable_xxx accounting correct
1714 * if we first move the page to the active queue... it's going to end
1715 * up there anyway, and we don't do vm_page_rename's frequently enough
1716 * for this to matter.
1718 VM_PAGE_QUEUES_REMOVE(mem
);
1719 vm_page_activate(mem
);
1721 if (mem
->active
|| mem
->inactive
|| mem
->speculative
) {
1722 if (mem
->object
->internal
&& !new_object
->internal
) {
1723 internal_to_external
= TRUE
;
1725 if (!mem
->object
->internal
&& new_object
->internal
) {
1726 external_to_internal
= TRUE
;
1730 vm_page_remove(mem
, TRUE
);
1731 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
, FALSE
);
1733 if (internal_to_external
) {
1734 vm_page_pageable_internal_count
--;
1735 vm_page_pageable_external_count
++;
1736 } else if (external_to_internal
) {
1737 vm_page_pageable_external_count
--;
1738 vm_page_pageable_internal_count
++;
1741 vm_page_unlock_queues();
1747 * Initialize the fields in a new page.
1748 * This takes a structure with random values and initializes it
1749 * so that it can be given to vm_page_release or vm_page_insert.
1760 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
1761 if (!(pmap_valid_page(phys_page
))) {
1762 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
1766 *mem
= vm_page_template
;
1767 mem
->phys_page
= phys_page
;
1770 * we're leaving this turned off for now... currently pages
1771 * come off the free list and are either immediately dirtied/referenced
1772 * due to zero-fill or COW faults, or are used to read or write files...
1773 * in the file I/O case, the UPL mechanism takes care of clearing
1774 * the state of the HW ref/mod bits in a somewhat fragile way.
1775 * Since we may change the way this works in the future (to toughen it up),
1776 * I'm leaving this as a reminder of where these bits could get cleared
1780 * make sure both the h/w referenced and modified bits are
1781 * clear at this point... we are especially dependent on
1782 * not finding a 'stale' h/w modified in a number of spots
1783 * once this page goes back into use
1785 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
1787 mem
->lopage
= lopage
;
1791 * vm_page_grab_fictitious:
1793 * Remove a fictitious page from the free list.
1794 * Returns VM_PAGE_NULL if there are no free pages.
1796 int c_vm_page_grab_fictitious
= 0;
1797 int c_vm_page_grab_fictitious_failed
= 0;
1798 int c_vm_page_release_fictitious
= 0;
1799 int c_vm_page_more_fictitious
= 0;
1802 vm_page_grab_fictitious_common(
1807 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
1809 vm_page_init(m
, phys_addr
, FALSE
);
1810 m
->fictitious
= TRUE
;
1812 c_vm_page_grab_fictitious
++;
1814 c_vm_page_grab_fictitious_failed
++;
1820 vm_page_grab_fictitious(void)
1822 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1826 vm_page_grab_guard(void)
1828 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1833 * vm_page_release_fictitious:
1835 * Release a fictitious page to the zone pool
1838 vm_page_release_fictitious(
1842 assert(m
->fictitious
);
1843 assert(m
->phys_page
== vm_page_fictitious_addr
||
1844 m
->phys_page
== vm_page_guard_addr
);
1846 c_vm_page_release_fictitious
++;
1848 zfree(vm_page_zone
, m
);
1852 * vm_page_more_fictitious:
1854 * Add more fictitious pages to the zone.
1855 * Allowed to block. This routine is way intimate
1856 * with the zones code, for several reasons:
1857 * 1. we need to carve some page structures out of physical
1858 * memory before zones work, so they _cannot_ come from
1860 * 2. the zone needs to be collectable in order to prevent
1861 * growth without bound. These structures are used by
1862 * the device pager (by the hundreds and thousands), as
1863 * private pages for pageout, and as blocking pages for
1864 * pagein. Temporary bursts in demand should not result in
1865 * permanent allocation of a resource.
1866 * 3. To smooth allocation humps, we allocate single pages
1867 * with kernel_memory_allocate(), and cram them into the
1871 void vm_page_more_fictitious(void)
1874 kern_return_t retval
;
1876 c_vm_page_more_fictitious
++;
1879 * Allocate a single page from the zone_map. Do not wait if no physical
1880 * pages are immediately available, and do not zero the space. We need
1881 * our own blocking lock here to prevent having multiple,
1882 * simultaneous requests from piling up on the zone_map lock. Exactly
1883 * one (of our) threads should be potentially waiting on the map lock.
1884 * If winner is not vm-privileged, then the page allocation will fail,
1885 * and it will temporarily block here in the vm_page_wait().
1887 lck_mtx_lock(&vm_page_alloc_lock
);
1889 * If another thread allocated space, just bail out now.
1891 if (zone_free_count(vm_page_zone
) > 5) {
1893 * The number "5" is a small number that is larger than the
1894 * number of fictitious pages that any single caller will
1895 * attempt to allocate. Otherwise, a thread will attempt to
1896 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1897 * release all of the resources and locks already acquired,
1898 * and then call this routine. This routine finds the pages
1899 * that the caller released, so fails to allocate new space.
1900 * The process repeats infinitely. The largest known number
1901 * of fictitious pages required in this manner is 2. 5 is
1902 * simply a somewhat larger number.
1904 lck_mtx_unlock(&vm_page_alloc_lock
);
1908 retval
= kernel_memory_allocate(zone_map
,
1909 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1910 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1911 if (retval
!= KERN_SUCCESS
) {
1913 * No page was available. Drop the
1914 * lock to give another thread a chance at it, and
1915 * wait for the pageout daemon to make progress.
1917 lck_mtx_unlock(&vm_page_alloc_lock
);
1918 vm_page_wait(THREAD_UNINT
);
1922 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1923 OSAddAtomic64(1, &(vm_page_zone
->page_count
));
1925 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
1927 lck_mtx_unlock(&vm_page_alloc_lock
);
1934 * Return true if it is not likely that a non-vm_privileged thread
1935 * can get memory without blocking. Advisory only, since the
1936 * situation may change under us.
1941 /* No locking, at worst we will fib. */
1942 return( vm_page_free_count
<= vm_page_free_reserved
);
1948 * this is an interface to support bring-up of drivers
1949 * on platforms with physical memory > 4G...
1951 int vm_himemory_mode
= 2;
1955 * this interface exists to support hardware controllers
1956 * incapable of generating DMAs with more than 32 bits
1957 * of address on platforms with physical memory > 4G...
1959 unsigned int vm_lopages_allocated_q
= 0;
1960 unsigned int vm_lopages_allocated_cpm_success
= 0;
1961 unsigned int vm_lopages_allocated_cpm_failed
= 0;
1962 queue_head_t vm_lopage_queue_free
;
1965 vm_page_grablo(void)
1969 if (vm_lopage_needed
== FALSE
)
1970 return (vm_page_grab());
1972 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1974 if ( !queue_empty(&vm_lopage_queue_free
)) {
1975 queue_remove_first(&vm_lopage_queue_free
,
1979 assert(vm_lopage_free_count
);
1981 vm_lopage_free_count
--;
1982 vm_lopages_allocated_q
++;
1984 if (vm_lopage_free_count
< vm_lopage_lowater
)
1985 vm_lopage_refill
= TRUE
;
1987 lck_mtx_unlock(&vm_page_queue_free_lock
);
1989 lck_mtx_unlock(&vm_page_queue_free_lock
);
1991 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
1993 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1994 vm_lopages_allocated_cpm_failed
++;
1995 lck_mtx_unlock(&vm_page_queue_free_lock
);
1997 return (VM_PAGE_NULL
);
2001 vm_page_lockspin_queues();
2003 mem
->gobbled
= FALSE
;
2004 vm_page_gobble_count
--;
2005 vm_page_wire_count
--;
2007 vm_lopages_allocated_cpm_success
++;
2008 vm_page_unlock_queues();
2012 assert(!mem
->pmapped
);
2013 assert(!mem
->wpmapped
);
2014 assert(!pmap_is_noencrypt(mem
->phys_page
));
2016 mem
->pageq
.next
= NULL
;
2017 mem
->pageq
.prev
= NULL
;
2026 * first try to grab a page from the per-cpu free list...
2027 * this must be done while pre-emption is disabled... if
2028 * a page is available, we're done...
2029 * if no page is available, grab the vm_page_queue_free_lock
2030 * and see if current number of free pages would allow us
2031 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2032 * if there are pages available, disable preemption and
2033 * recheck the state of the per-cpu free list... we could
2034 * have been preempted and moved to a different cpu, or
2035 * some other thread could have re-filled it... if still
2036 * empty, figure out how many pages we can steal from the
2037 * global free queue and move to the per-cpu queue...
2038 * return 1 of these pages when done... only wakeup the
2039 * pageout_scan thread if we moved pages from the global
2040 * list... no need for the wakeup if we've satisfied the
2041 * request from the per-cpu queue.
2046 vm_page_grab( void )
2051 disable_preemption();
2053 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2054 return_page_from_cpu_list
:
2055 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2056 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
2058 enable_preemption();
2059 mem
->pageq
.next
= NULL
;
2061 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2062 assert(mem
->tabled
== FALSE
);
2063 assert(mem
->object
== VM_OBJECT_NULL
);
2064 assert(!mem
->laundry
);
2066 assert(pmap_verify_free(mem
->phys_page
));
2068 assert(!mem
->encrypted
);
2069 assert(!mem
->pmapped
);
2070 assert(!mem
->wpmapped
);
2071 assert(!mem
->active
);
2072 assert(!mem
->inactive
);
2073 assert(!mem
->throttled
);
2074 assert(!mem
->speculative
);
2075 assert(!pmap_is_noencrypt(mem
->phys_page
));
2079 enable_preemption();
2083 * Optionally produce warnings if the wire or gobble
2084 * counts exceed some threshold.
2086 #if VM_PAGE_WIRE_COUNT_WARNING
2087 if (vm_page_wire_count
>= VM_PAGE_WIRE_COUNT_WARNING
) {
2088 printf("mk: vm_page_grab(): high wired page count of %d\n",
2089 vm_page_wire_count
);
2092 #if VM_PAGE_GOBBLE_COUNT_WARNING
2093 if (vm_page_gobble_count
>= VM_PAGE_GOBBLE_COUNT_WARNING
) {
2094 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2095 vm_page_gobble_count
);
2098 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2101 * Only let privileged threads (involved in pageout)
2102 * dip into the reserved pool.
2104 if ((vm_page_free_count
< vm_page_free_reserved
) &&
2105 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
2106 lck_mtx_unlock(&vm_page_queue_free_lock
);
2112 unsigned int pages_to_steal
;
2115 while ( vm_page_free_count
== 0 ) {
2117 lck_mtx_unlock(&vm_page_queue_free_lock
);
2119 * must be a privileged thread to be
2120 * in this state since a non-privileged
2121 * thread would have bailed if we were
2122 * under the vm_page_free_reserved mark
2125 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2128 disable_preemption();
2130 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2131 lck_mtx_unlock(&vm_page_queue_free_lock
);
2134 * we got preempted and moved to another processor
2135 * or we got preempted and someone else ran and filled the cache
2137 goto return_page_from_cpu_list
;
2139 if (vm_page_free_count
<= vm_page_free_reserved
)
2142 if (vm_free_magazine_refill_limit
<= (vm_page_free_count
- vm_page_free_reserved
))
2143 pages_to_steal
= vm_free_magazine_refill_limit
;
2145 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
2147 color
= PROCESSOR_DATA(current_processor(), start_color
);
2150 vm_page_free_count
-= pages_to_steal
;
2152 while (pages_to_steal
--) {
2154 while (queue_empty(&vm_page_queue_free
[color
]))
2155 color
= (color
+ 1) & vm_color_mask
;
2157 queue_remove_first(&vm_page_queue_free
[color
],
2161 mem
->pageq
.next
= NULL
;
2162 mem
->pageq
.prev
= NULL
;
2164 assert(!mem
->active
);
2165 assert(!mem
->inactive
);
2166 assert(!mem
->throttled
);
2167 assert(!mem
->speculative
);
2169 color
= (color
+ 1) & vm_color_mask
;
2174 tail
->pageq
.next
= (queue_t
)mem
;
2177 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2178 assert(mem
->tabled
== FALSE
);
2179 assert(mem
->object
== VM_OBJECT_NULL
);
2180 assert(!mem
->laundry
);
2184 assert(pmap_verify_free(mem
->phys_page
));
2187 assert(!mem
->encrypted
);
2188 assert(!mem
->pmapped
);
2189 assert(!mem
->wpmapped
);
2190 assert(!pmap_is_noencrypt(mem
->phys_page
));
2192 lck_mtx_unlock(&vm_page_queue_free_lock
);
2194 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
2195 PROCESSOR_DATA(current_processor(), start_color
) = color
;
2198 * satisfy this request
2200 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2202 mem
->pageq
.next
= NULL
;
2204 enable_preemption();
2207 * Decide if we should poke the pageout daemon.
2208 * We do this if the free count is less than the low
2209 * water mark, or if the free count is less than the high
2210 * water mark (but above the low water mark) and the inactive
2211 * count is less than its target.
2213 * We don't have the counts locked ... if they change a little,
2214 * it doesn't really matter.
2216 if ((vm_page_free_count
< vm_page_free_min
) ||
2217 ((vm_page_free_count
< vm_page_free_target
) &&
2218 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
2219 thread_wakeup((event_t
) &vm_page_free_wanted
);
2221 VM_CHECK_MEMORYSTATUS
;
2223 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2231 * Return a page to the free list.
2236 register vm_page_t mem
)
2239 int need_wakeup
= 0;
2240 int need_priv_wakeup
= 0;
2243 assert(!mem
->private && !mem
->fictitious
);
2244 if (vm_page_free_verify
) {
2245 assert(pmap_verify_free(mem
->phys_page
));
2247 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2249 pmap_clear_noencrypt(mem
->phys_page
);
2251 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2254 panic("vm_page_release");
2258 assert(!mem
->laundry
);
2259 assert(mem
->object
== VM_OBJECT_NULL
);
2260 assert(mem
->pageq
.next
== NULL
&&
2261 mem
->pageq
.prev
== NULL
);
2262 assert(mem
->listq
.next
== NULL
&&
2263 mem
->listq
.prev
== NULL
);
2265 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2266 vm_lopage_free_count
< vm_lopage_free_limit
&&
2267 mem
->phys_page
< max_valid_low_ppnum
) {
2269 * this exists to support hardware controllers
2270 * incapable of generating DMAs with more than 32 bits
2271 * of address on platforms with physical memory > 4G...
2273 queue_enter_first(&vm_lopage_queue_free
,
2277 vm_lopage_free_count
++;
2279 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
2280 vm_lopage_refill
= FALSE
;
2284 mem
->lopage
= FALSE
;
2287 color
= mem
->phys_page
& vm_color_mask
;
2288 queue_enter_first(&vm_page_queue_free
[color
],
2292 vm_page_free_count
++;
2294 * Check if we should wake up someone waiting for page.
2295 * But don't bother waking them unless they can allocate.
2297 * We wakeup only one thread, to prevent starvation.
2298 * Because the scheduling system handles wait queues FIFO,
2299 * if we wakeup all waiting threads, one greedy thread
2300 * can starve multiple niceguy threads. When the threads
2301 * all wakeup, the greedy threads runs first, grabs the page,
2302 * and waits for another page. It will be the first to run
2303 * when the next page is freed.
2305 * However, there is a slight danger here.
2306 * The thread we wake might not use the free page.
2307 * Then the other threads could wait indefinitely
2308 * while the page goes unused. To forestall this,
2309 * the pageout daemon will keep making free pages
2310 * as long as vm_page_free_wanted is non-zero.
2313 assert(vm_page_free_count
> 0);
2314 if (vm_page_free_wanted_privileged
> 0) {
2315 vm_page_free_wanted_privileged
--;
2316 need_priv_wakeup
= 1;
2317 } else if (vm_page_free_wanted
> 0 &&
2318 vm_page_free_count
> vm_page_free_reserved
) {
2319 vm_page_free_wanted
--;
2323 lck_mtx_unlock(&vm_page_queue_free_lock
);
2325 if (need_priv_wakeup
)
2326 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2327 else if (need_wakeup
)
2328 thread_wakeup_one((event_t
) &vm_page_free_count
);
2330 VM_CHECK_MEMORYSTATUS
;
2334 * This version of vm_page_release() is used only at startup
2335 * when we are single-threaded and pages are being released
2336 * for the first time. Hence, no locking or unnecessary checks are made.
2337 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2340 vm_page_release_startup(
2341 register vm_page_t mem
)
2345 if (vm_lopage_free_count
< vm_lopage_free_limit
&&
2346 mem
->phys_page
< max_valid_low_ppnum
) {
2348 vm_lopage_free_count
++;
2349 queue_free
= &vm_lopage_queue_free
;
2351 mem
->lopage
= FALSE
;
2353 vm_page_free_count
++;
2354 queue_free
= &vm_page_queue_free
[mem
->phys_page
& vm_color_mask
];
2356 queue_enter_first(queue_free
, mem
, vm_page_t
, pageq
);
2362 * Wait for a page to become available.
2363 * If there are plenty of free pages, then we don't sleep.
2366 * TRUE: There may be another page, try again
2367 * FALSE: We were interrupted out of our wait, don't try again
2375 * We can't use vm_page_free_reserved to make this
2376 * determination. Consider: some thread might
2377 * need to allocate two pages. The first allocation
2378 * succeeds, the second fails. After the first page is freed,
2379 * a call to vm_page_wait must really block.
2381 kern_return_t wait_result
;
2382 int need_wakeup
= 0;
2383 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2385 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2387 if (is_privileged
&& vm_page_free_count
) {
2388 lck_mtx_unlock(&vm_page_queue_free_lock
);
2391 if (vm_page_free_count
< vm_page_free_target
) {
2393 if (is_privileged
) {
2394 if (vm_page_free_wanted_privileged
++ == 0)
2396 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2398 if (vm_page_free_wanted
++ == 0)
2400 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2402 lck_mtx_unlock(&vm_page_queue_free_lock
);
2403 counter(c_vm_page_wait_block
++);
2406 thread_wakeup((event_t
)&vm_page_free_wanted
);
2408 if (wait_result
== THREAD_WAITING
) {
2409 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_START
,
2410 vm_page_free_wanted_privileged
, vm_page_free_wanted
, 0, 0);
2411 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2412 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_END
, 0, 0, 0, 0);
2415 return(wait_result
== THREAD_AWAKENED
);
2417 lck_mtx_unlock(&vm_page_queue_free_lock
);
2425 * Allocate and return a memory cell associated
2426 * with this VM object/offset pair.
2428 * Object must be locked.
2434 vm_object_offset_t offset
)
2436 register vm_page_t mem
;
2438 vm_object_lock_assert_exclusive(object
);
2439 mem
= vm_page_grab();
2440 if (mem
== VM_PAGE_NULL
)
2441 return VM_PAGE_NULL
;
2443 vm_page_insert(mem
, object
, offset
);
2451 vm_object_offset_t offset
)
2453 register vm_page_t mem
;
2455 vm_object_lock_assert_exclusive(object
);
2456 mem
= vm_page_grablo();
2457 if (mem
== VM_PAGE_NULL
)
2458 return VM_PAGE_NULL
;
2460 vm_page_insert(mem
, object
, offset
);
2467 * vm_page_alloc_guard:
2469 * Allocate a fictitious page which will be used
2470 * as a guard page. The page will be inserted into
2471 * the object and returned to the caller.
2475 vm_page_alloc_guard(
2477 vm_object_offset_t offset
)
2479 register vm_page_t mem
;
2481 vm_object_lock_assert_exclusive(object
);
2482 mem
= vm_page_grab_guard();
2483 if (mem
== VM_PAGE_NULL
)
2484 return VM_PAGE_NULL
;
2486 vm_page_insert(mem
, object
, offset
);
2492 counter(unsigned int c_laundry_pages_freed
= 0;)
2495 * vm_page_free_prepare:
2497 * Removes page from any queue it may be on
2498 * and disassociates it from its VM object.
2500 * Object and page queues must be locked prior to entry.
2503 vm_page_free_prepare(
2506 vm_page_free_prepare_queues(mem
);
2507 vm_page_free_prepare_object(mem
, TRUE
);
2512 vm_page_free_prepare_queues(
2517 assert(!mem
->cleaning
);
2519 #if MACH_ASSERT || DEBUG
2520 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2522 panic("vm_page_free: freeing page on free list\n");
2523 #endif /* MACH_ASSERT || DEBUG */
2525 vm_object_lock_assert_exclusive(mem
->object
);
2529 * We may have to free a page while it's being laundered
2530 * if we lost its pager (due to a forced unmount, for example).
2531 * We need to call vm_pageout_steal_laundry() before removing
2532 * the page from its VM object, so that we can remove it
2533 * from its pageout queue and adjust the laundry accounting
2535 vm_pageout_steal_laundry(mem
, TRUE
);
2536 counter(++c_laundry_pages_freed
);
2539 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2541 if (VM_PAGE_WIRED(mem
)) {
2543 assert(mem
->object
->wired_page_count
> 0);
2544 mem
->object
->wired_page_count
--;
2545 assert(mem
->object
->resident_page_count
>=
2546 mem
->object
->wired_page_count
);
2548 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2549 OSAddAtomic(+1, &vm_page_purgeable_count
);
2550 assert(vm_page_purgeable_wired_count
> 0);
2551 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2553 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2554 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2555 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2558 owner
= mem
->object
->vo_purgeable_owner
;
2560 * While wired, this page was accounted
2561 * as "non-volatile" but it should now
2562 * be accounted as "volatile".
2564 /* one less "non-volatile"... */
2565 ledger_debit(owner
->ledger
,
2566 task_ledgers
.purgeable_nonvolatile
,
2568 /* ... and "phys_footprint" */
2569 ledger_debit(owner
->ledger
,
2570 task_ledgers
.phys_footprint
,
2572 /* one more "volatile" */
2573 ledger_credit(owner
->ledger
,
2574 task_ledgers
.purgeable_volatile
,
2578 if (!mem
->private && !mem
->fictitious
)
2579 vm_page_wire_count
--;
2580 mem
->wire_count
= 0;
2581 assert(!mem
->gobbled
);
2582 } else if (mem
->gobbled
) {
2583 if (!mem
->private && !mem
->fictitious
)
2584 vm_page_wire_count
--;
2585 vm_page_gobble_count
--;
2591 vm_page_free_prepare_object(
2593 boolean_t remove_from_hash
)
2596 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2598 PAGE_WAKEUP(mem
); /* clears wanted */
2601 mem
->private = FALSE
;
2602 mem
->fictitious
= TRUE
;
2603 mem
->phys_page
= vm_page_fictitious_addr
;
2605 if ( !mem
->fictitious
) {
2606 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2614 * Returns the given page to the free list,
2615 * disassociating it with any VM object.
2617 * Object and page queues must be locked prior to entry.
2623 vm_page_free_prepare(mem
);
2625 if (mem
->fictitious
) {
2626 vm_page_release_fictitious(mem
);
2628 vm_page_release(mem
);
2634 vm_page_free_unlocked(
2636 boolean_t remove_from_hash
)
2638 vm_page_lockspin_queues();
2639 vm_page_free_prepare_queues(mem
);
2640 vm_page_unlock_queues();
2642 vm_page_free_prepare_object(mem
, remove_from_hash
);
2644 if (mem
->fictitious
) {
2645 vm_page_release_fictitious(mem
);
2647 vm_page_release(mem
);
2653 * Free a list of pages. The list can be up to several hundred pages,
2654 * as blocked up by vm_pageout_scan().
2655 * The big win is not having to take the free list lock once
2661 boolean_t prepare_object
)
2665 vm_page_t local_freeq
;
2671 local_freeq
= VM_PAGE_NULL
;
2675 * break up the processing into smaller chunks so
2676 * that we can 'pipeline' the pages onto the
2677 * free list w/o introducing too much
2678 * contention on the global free queue lock
2680 while (mem
&& pg_count
< 64) {
2682 assert(!mem
->inactive
);
2683 assert(!mem
->active
);
2684 assert(!mem
->throttled
);
2686 assert(!mem
->speculative
);
2687 assert(!VM_PAGE_WIRED(mem
));
2688 assert(mem
->pageq
.prev
== NULL
);
2690 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2692 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2693 assert(pmap_verify_free(mem
->phys_page
));
2695 if (prepare_object
== TRUE
)
2696 vm_page_free_prepare_object(mem
, TRUE
);
2698 if (!mem
->fictitious
) {
2701 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2702 vm_lopage_free_count
< vm_lopage_free_limit
&&
2703 mem
->phys_page
< max_valid_low_ppnum
) {
2704 mem
->pageq
.next
= NULL
;
2705 vm_page_release(mem
);
2708 * IMPORTANT: we can't set the page "free" here
2709 * because that would make the page eligible for
2710 * a physically-contiguous allocation (see
2711 * vm_page_find_contiguous()) right away (we don't
2712 * hold the vm_page_queue_free lock). That would
2713 * cause trouble because the page is not actually
2714 * in the free queue yet...
2716 mem
->pageq
.next
= (queue_entry_t
)local_freeq
;
2720 pmap_clear_noencrypt(mem
->phys_page
);
2723 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2724 mem
->phys_page
== vm_page_guard_addr
);
2725 vm_page_release_fictitious(mem
);
2731 if ( (mem
= local_freeq
) ) {
2732 unsigned int avail_free_count
;
2733 unsigned int need_wakeup
= 0;
2734 unsigned int need_priv_wakeup
= 0;
2736 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2741 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2747 color
= mem
->phys_page
& vm_color_mask
;
2748 queue_enter_first(&vm_page_queue_free
[color
],
2754 vm_page_free_count
+= pg_count
;
2755 avail_free_count
= vm_page_free_count
;
2757 if (vm_page_free_wanted_privileged
> 0 && avail_free_count
> 0) {
2759 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2760 need_priv_wakeup
= avail_free_count
;
2761 vm_page_free_wanted_privileged
-= avail_free_count
;
2762 avail_free_count
= 0;
2764 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2765 vm_page_free_wanted_privileged
= 0;
2766 avail_free_count
-= vm_page_free_wanted_privileged
;
2769 if (vm_page_free_wanted
> 0 && avail_free_count
> vm_page_free_reserved
) {
2770 unsigned int available_pages
;
2772 available_pages
= avail_free_count
- vm_page_free_reserved
;
2774 if (available_pages
>= vm_page_free_wanted
) {
2775 need_wakeup
= vm_page_free_wanted
;
2776 vm_page_free_wanted
= 0;
2778 need_wakeup
= available_pages
;
2779 vm_page_free_wanted
-= available_pages
;
2782 lck_mtx_unlock(&vm_page_queue_free_lock
);
2784 if (need_priv_wakeup
!= 0) {
2786 * There shouldn't be that many VM-privileged threads,
2787 * so let's wake them all up, even if we don't quite
2788 * have enough pages to satisfy them all.
2790 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2792 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2794 * We don't expect to have any more waiters
2795 * after this, so let's wake them all up at
2798 thread_wakeup((event_t
) &vm_page_free_count
);
2799 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2801 * Wake up one waiter per page we just released.
2803 thread_wakeup_one((event_t
) &vm_page_free_count
);
2806 VM_CHECK_MEMORYSTATUS
;
2815 * Mark this page as wired down by yet
2816 * another map, removing it from paging queues
2819 * The page's object and the page queues must be locked.
2823 register vm_page_t mem
)
2826 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2830 vm_object_lock_assert_exclusive(mem
->object
);
2833 * In theory, the page should be in an object before it
2834 * gets wired, since we need to hold the object lock
2835 * to update some fields in the page structure.
2836 * However, some code (i386 pmap, for example) might want
2837 * to wire a page before it gets inserted into an object.
2838 * That's somewhat OK, as long as nobody else can get to
2839 * that page and update it at the same time.
2843 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2845 if ( !VM_PAGE_WIRED(mem
)) {
2847 if (mem
->pageout_queue
) {
2848 mem
->pageout
= FALSE
;
2849 vm_pageout_throttle_up(mem
);
2851 VM_PAGE_QUEUES_REMOVE(mem
);
2854 mem
->object
->wired_page_count
++;
2855 assert(mem
->object
->resident_page_count
>=
2856 mem
->object
->wired_page_count
);
2857 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2858 assert(vm_page_purgeable_count
> 0);
2859 OSAddAtomic(-1, &vm_page_purgeable_count
);
2860 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2862 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2863 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2864 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2867 owner
= mem
->object
->vo_purgeable_owner
;
2868 /* less volatile bytes */
2869 ledger_debit(owner
->ledger
,
2870 task_ledgers
.purgeable_volatile
,
2872 /* more not-quite-volatile bytes */
2873 ledger_credit(owner
->ledger
,
2874 task_ledgers
.purgeable_nonvolatile
,
2876 /* more footprint */
2877 ledger_credit(owner
->ledger
,
2878 task_ledgers
.phys_footprint
,
2881 if (mem
->object
->all_reusable
) {
2883 * Wired pages are not counted as "re-usable"
2884 * in "all_reusable" VM objects, so nothing
2887 } else if (mem
->reusable
) {
2889 * This page is not "re-usable" when it's
2890 * wired, so adjust its state and the
2893 vm_object_reuse_pages(mem
->object
,
2895 mem
->offset
+PAGE_SIZE_64
,
2899 assert(!mem
->reusable
);
2901 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2902 vm_page_wire_count
++;
2904 vm_page_gobble_count
--;
2905 mem
->gobbled
= FALSE
;
2907 VM_CHECK_MEMORYSTATUS
;
2911 * The page could be encrypted, but
2912 * We don't have to decrypt it here
2913 * because we don't guarantee that the
2914 * data is actually valid at this point.
2915 * The page will get decrypted in
2916 * vm_fault_wire() if needed.
2919 assert(!mem
->gobbled
);
2927 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2929 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2933 register vm_page_t mem
)
2935 vm_page_lockspin_queues();
2938 assert(!mem
->gobbled
);
2939 assert( !VM_PAGE_WIRED(mem
));
2941 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2942 if (!mem
->private && !mem
->fictitious
)
2943 vm_page_wire_count
++;
2945 vm_page_gobble_count
++;
2946 mem
->gobbled
= TRUE
;
2947 vm_page_unlock_queues();
2953 * Release one wiring of this page, potentially
2954 * enabling it to be paged again.
2956 * The page's object and the page queues must be locked.
2964 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2967 assert(VM_PAGE_WIRED(mem
));
2968 assert(mem
->object
!= VM_OBJECT_NULL
);
2970 vm_object_lock_assert_exclusive(mem
->object
);
2971 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2973 if (--mem
->wire_count
== 0) {
2974 assert(!mem
->private && !mem
->fictitious
);
2975 vm_page_wire_count
--;
2976 assert(mem
->object
->wired_page_count
> 0);
2977 mem
->object
->wired_page_count
--;
2978 assert(mem
->object
->resident_page_count
>=
2979 mem
->object
->wired_page_count
);
2980 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2981 OSAddAtomic(+1, &vm_page_purgeable_count
);
2982 assert(vm_page_purgeable_wired_count
> 0);
2983 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2985 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2986 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2987 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2990 owner
= mem
->object
->vo_purgeable_owner
;
2991 /* more volatile bytes */
2992 ledger_credit(owner
->ledger
,
2993 task_ledgers
.purgeable_volatile
,
2995 /* less not-quite-volatile bytes */
2996 ledger_debit(owner
->ledger
,
2997 task_ledgers
.purgeable_nonvolatile
,
2999 /* less footprint */
3000 ledger_debit(owner
->ledger
,
3001 task_ledgers
.phys_footprint
,
3004 assert(mem
->object
!= kernel_object
);
3005 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
3007 if (queueit
== TRUE
) {
3008 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
3009 vm_page_deactivate(mem
);
3011 vm_page_activate(mem
);
3015 VM_CHECK_MEMORYSTATUS
;
3022 * vm_page_deactivate:
3024 * Returns the given page to the inactive list,
3025 * indicating that no physical maps have access
3026 * to this page. [Used by the physical mapping system.]
3028 * The page queues must be locked.
3034 vm_page_deactivate_internal(m
, TRUE
);
3039 vm_page_deactivate_internal(
3041 boolean_t clear_hw_reference
)
3045 assert(m
->object
!= kernel_object
);
3046 assert(m
->phys_page
!= vm_page_guard_addr
);
3048 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3050 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3053 * This page is no longer very interesting. If it was
3054 * interesting (active or inactive/referenced), then we
3055 * clear the reference bit and (re)enter it in the
3056 * inactive queue. Note wired pages should not have
3057 * their reference bit cleared.
3059 assert ( !(m
->absent
&& !m
->unusual
));
3061 if (m
->gobbled
) { /* can this happen? */
3062 assert( !VM_PAGE_WIRED(m
));
3064 if (!m
->private && !m
->fictitious
)
3065 vm_page_wire_count
--;
3066 vm_page_gobble_count
--;
3070 * if this page is currently on the pageout queue, we can't do the
3071 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3072 * and we can't remove it manually since we would need the object lock
3073 * (which is not required here) to decrement the activity_in_progress
3074 * reference which is held on the object while the page is in the pageout queue...
3075 * just let the normal laundry processing proceed
3077 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
|| (VM_PAGE_WIRED(m
)))
3080 if (!m
->absent
&& clear_hw_reference
== TRUE
)
3081 pmap_clear_reference(m
->phys_page
);
3083 m
->reference
= FALSE
;
3084 m
->no_cache
= FALSE
;
3087 VM_PAGE_QUEUES_REMOVE(m
);
3089 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
3090 m
->dirty
&& m
->object
->internal
&&
3091 (m
->object
->purgable
== VM_PURGABLE_DENY
||
3092 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3093 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3094 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
3095 m
->throttled
= TRUE
;
3096 vm_page_throttled_count
++;
3098 if (m
->object
->named
&& m
->object
->ref_count
== 1) {
3099 vm_page_speculate(m
, FALSE
);
3100 #if DEVELOPMENT || DEBUG
3101 vm_page_speculative_recreated
++;
3104 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
3111 * vm_page_enqueue_cleaned
3113 * Put the page on the cleaned queue, mark it cleaned, etc.
3114 * Being on the cleaned queue (and having m->clean_queue set)
3115 * does ** NOT ** guarantee that the page is clean!
3117 * Call with the queues lock held.
3120 void vm_page_enqueue_cleaned(vm_page_t m
)
3122 assert(m
->phys_page
!= vm_page_guard_addr
);
3124 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3126 assert( !(m
->absent
&& !m
->unusual
));
3129 assert( !VM_PAGE_WIRED(m
));
3130 if (!m
->private && !m
->fictitious
)
3131 vm_page_wire_count
--;
3132 vm_page_gobble_count
--;
3136 * if this page is currently on the pageout queue, we can't do the
3137 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3138 * and we can't remove it manually since we would need the object lock
3139 * (which is not required here) to decrement the activity_in_progress
3140 * reference which is held on the object while the page is in the pageout queue...
3141 * just let the normal laundry processing proceed
3143 if (m
->laundry
|| m
->clean_queue
|| m
->pageout_queue
|| m
->private || m
->fictitious
)
3146 VM_PAGE_QUEUES_REMOVE(m
);
3148 queue_enter(&vm_page_queue_cleaned
, m
, vm_page_t
, pageq
);
3149 m
->clean_queue
= TRUE
;
3150 vm_page_cleaned_count
++;
3153 vm_page_inactive_count
++;
3154 if (m
->object
->internal
) {
3155 vm_page_pageable_internal_count
++;
3157 vm_page_pageable_external_count
++;
3160 vm_pageout_enqueued_cleaned
++;
3166 * Put the specified page on the active list (if appropriate).
3168 * The page queues must be locked.
3173 register vm_page_t m
)
3176 #ifdef FIXME_4778297
3177 assert(m
->object
!= kernel_object
);
3179 assert(m
->phys_page
!= vm_page_guard_addr
);
3181 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3183 assert( !(m
->absent
&& !m
->unusual
));
3186 assert( !VM_PAGE_WIRED(m
));
3187 if (!m
->private && !m
->fictitious
)
3188 vm_page_wire_count
--;
3189 vm_page_gobble_count
--;
3193 * if this page is currently on the pageout queue, we can't do the
3194 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3195 * and we can't remove it manually since we would need the object lock
3196 * (which is not required here) to decrement the activity_in_progress
3197 * reference which is held on the object while the page is in the pageout queue...
3198 * just let the normal laundry processing proceed
3200 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3205 panic("vm_page_activate: already active");
3208 if (m
->speculative
) {
3209 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
3210 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
3213 VM_PAGE_QUEUES_REMOVE(m
);
3215 if ( !VM_PAGE_WIRED(m
)) {
3217 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
3218 m
->dirty
&& m
->object
->internal
&&
3219 (m
->object
->purgable
== VM_PURGABLE_DENY
||
3220 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3221 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3222 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
3223 m
->throttled
= TRUE
;
3224 vm_page_throttled_count
++;
3226 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
3228 vm_page_active_count
++;
3229 if (m
->object
->internal
) {
3230 vm_page_pageable_internal_count
++;
3232 vm_page_pageable_external_count
++;
3235 m
->reference
= TRUE
;
3236 m
->no_cache
= FALSE
;
3243 * vm_page_speculate:
3245 * Put the specified page on the speculative list (if appropriate).
3247 * The page queues must be locked.
3254 struct vm_speculative_age_q
*aq
;
3257 assert(m
->object
!= kernel_object
);
3258 assert(m
->phys_page
!= vm_page_guard_addr
);
3260 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3262 assert( !(m
->absent
&& !m
->unusual
));
3265 * if this page is currently on the pageout queue, we can't do the
3266 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3267 * and we can't remove it manually since we would need the object lock
3268 * (which is not required here) to decrement the activity_in_progress
3269 * reference which is held on the object while the page is in the pageout queue...
3270 * just let the normal laundry processing proceed
3272 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3275 VM_PAGE_QUEUES_REMOVE(m
);
3277 if ( !VM_PAGE_WIRED(m
)) {
3282 clock_get_system_nanotime(&sec
, &nsec
);
3283 ts
.tv_sec
= (unsigned int) sec
;
3286 if (vm_page_speculative_count
== 0) {
3288 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3289 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3291 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3294 * set the timer to begin a new group
3296 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3297 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3299 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3301 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3303 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
3305 speculative_age_index
++;
3307 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3308 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3309 if (speculative_age_index
== speculative_steal_index
) {
3310 speculative_steal_index
= speculative_age_index
+ 1;
3312 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3313 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3315 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3317 if (!queue_empty(&aq
->age_q
))
3318 vm_page_speculate_ageit(aq
);
3320 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3321 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3323 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3326 enqueue_tail(&aq
->age_q
, &m
->pageq
);
3327 m
->speculative
= TRUE
;
3328 vm_page_speculative_count
++;
3329 if (m
->object
->internal
) {
3330 vm_page_pageable_internal_count
++;
3332 vm_page_pageable_external_count
++;
3336 vm_object_lock_assert_exclusive(m
->object
);
3338 m
->object
->pages_created
++;
3339 #if DEVELOPMENT || DEBUG
3340 vm_page_speculative_created
++;
3349 * move pages from the specified aging bin to
3350 * the speculative bin that pageout_scan claims from
3352 * The page queues must be locked.
3355 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
3357 struct vm_speculative_age_q
*sq
;
3360 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
3362 if (queue_empty(&sq
->age_q
)) {
3363 sq
->age_q
.next
= aq
->age_q
.next
;
3364 sq
->age_q
.prev
= aq
->age_q
.prev
;
3366 t
= (vm_page_t
)sq
->age_q
.next
;
3367 t
->pageq
.prev
= &sq
->age_q
;
3369 t
= (vm_page_t
)sq
->age_q
.prev
;
3370 t
->pageq
.next
= &sq
->age_q
;
3372 t
= (vm_page_t
)sq
->age_q
.prev
;
3373 t
->pageq
.next
= aq
->age_q
.next
;
3375 t
= (vm_page_t
)aq
->age_q
.next
;
3376 t
->pageq
.prev
= sq
->age_q
.prev
;
3378 t
= (vm_page_t
)aq
->age_q
.prev
;
3379 t
->pageq
.next
= &sq
->age_q
;
3381 sq
->age_q
.prev
= aq
->age_q
.prev
;
3383 queue_init(&aq
->age_q
);
3392 assert(m
->object
!= kernel_object
);
3393 assert(m
->phys_page
!= vm_page_guard_addr
);
3396 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3399 * if this page is currently on the pageout queue, we can't do the
3400 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3401 * and we can't remove it manually since we would need the object lock
3402 * (which is not required here) to decrement the activity_in_progress
3403 * reference which is held on the object while the page is in the pageout queue...
3404 * just let the normal laundry processing proceed
3406 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->compressor
|| (VM_PAGE_WIRED(m
)))
3409 m
->no_cache
= FALSE
;
3411 VM_PAGE_QUEUES_REMOVE(m
);
3413 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
3418 vm_page_reactivate_all_throttled(void)
3420 vm_page_t first_throttled
, last_throttled
;
3421 vm_page_t first_active
;
3423 int extra_active_count
;
3424 int extra_internal_count
, extra_external_count
;
3426 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
))
3429 extra_active_count
= 0;
3430 extra_internal_count
= 0;
3431 extra_external_count
= 0;
3432 vm_page_lock_queues();
3433 if (! queue_empty(&vm_page_queue_throttled
)) {
3435 * Switch "throttled" pages to "active".
3437 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3439 assert(m
->throttled
);
3441 assert(!m
->inactive
);
3442 assert(!m
->speculative
);
3443 assert(!VM_PAGE_WIRED(m
));
3445 extra_active_count
++;
3446 if (m
->object
->internal
) {
3447 extra_internal_count
++;
3449 extra_external_count
++;
3452 m
->throttled
= FALSE
;
3458 * Transfer the entire throttled queue to a regular LRU page queues.
3459 * We insert it at the head of the active queue, so that these pages
3460 * get re-evaluated by the LRU algorithm first, since they've been
3461 * completely out of it until now.
3463 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3464 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3465 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3466 if (queue_empty(&vm_page_queue_active
)) {
3467 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3469 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3471 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3472 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3473 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3476 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3478 queue_init(&vm_page_queue_throttled
);
3480 * Adjust the global page counts.
3482 vm_page_active_count
+= extra_active_count
;
3483 vm_page_pageable_internal_count
+= extra_internal_count
;
3484 vm_page_pageable_external_count
+= extra_external_count
;
3485 vm_page_throttled_count
= 0;
3487 assert(vm_page_throttled_count
== 0);
3488 assert(queue_empty(&vm_page_queue_throttled
));
3489 vm_page_unlock_queues();
3494 * move pages from the indicated local queue to the global active queue
3495 * its ok to fail if we're below the hard limit and force == FALSE
3496 * the nolocks == TRUE case is to allow this function to be run on
3497 * the hibernate path
3501 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3504 vm_page_t first_local
, last_local
;
3505 vm_page_t first_active
;
3509 if (vm_page_local_q
== NULL
)
3512 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3514 if (nolocks
== FALSE
) {
3515 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3516 if ( !vm_page_trylockspin_queues())
3519 vm_page_lockspin_queues();
3521 VPL_LOCK(&lq
->vpl_lock
);
3523 if (lq
->vpl_count
) {
3525 * Switch "local" pages to "active".
3527 assert(!queue_empty(&lq
->vpl_queue
));
3529 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3533 assert(!m
->inactive
);
3534 assert(!m
->speculative
);
3535 assert(!VM_PAGE_WIRED(m
));
3536 assert(!m
->throttled
);
3537 assert(!m
->fictitious
);
3539 if (m
->local_id
!= lid
)
3540 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3549 if (count
!= lq
->vpl_count
)
3550 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3553 * Transfer the entire local queue to a regular LRU page queues.
3555 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3556 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3557 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3559 if (queue_empty(&vm_page_queue_active
)) {
3560 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3562 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3564 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3565 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3566 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3568 queue_init(&lq
->vpl_queue
);
3570 * Adjust the global page counts.
3572 vm_page_active_count
+= lq
->vpl_count
;
3573 vm_page_pageable_internal_count
+= lq
->vpl_internal_count
;
3574 vm_page_pageable_external_count
+= lq
->vpl_external_count
;
3576 lq
->vpl_internal_count
= 0;
3577 lq
->vpl_external_count
= 0;
3579 assert(queue_empty(&lq
->vpl_queue
));
3581 if (nolocks
== FALSE
) {
3582 VPL_UNLOCK(&lq
->vpl_lock
);
3583 vm_page_unlock_queues();
3588 * vm_page_part_zero_fill:
3590 * Zero-fill a part of the page.
3592 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3594 vm_page_part_zero_fill(
3602 * we don't hold the page queue lock
3603 * so this check isn't safe to make
3608 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3609 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3613 tmp
= vm_page_grab();
3614 if (tmp
== VM_PAGE_NULL
) {
3615 vm_page_wait(THREAD_UNINT
);
3620 vm_page_zero_fill(tmp
);
3622 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3624 if((m_pa
+ len
) < PAGE_SIZE
) {
3625 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3626 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3628 vm_page_copy(tmp
,m
);
3635 * vm_page_zero_fill:
3637 * Zero-fill the specified page.
3644 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3645 m
->object
, m
->offset
, m
, 0,0);
3648 * we don't hold the page queue lock
3649 * so this check isn't safe to make
3654 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3655 pmap_zero_page(m
->phys_page
);
3659 * vm_page_part_copy:
3661 * copy part of one page to another
3674 * we don't hold the page queue lock
3675 * so this check isn't safe to make
3677 VM_PAGE_CHECK(src_m
);
3678 VM_PAGE_CHECK(dst_m
);
3680 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3681 dst_m
->phys_page
, dst_pa
, len
);
3687 * Copy one page to another
3690 * The source page should not be encrypted. The caller should
3691 * make sure the page is decrypted first, if necessary.
3694 int vm_page_copy_cs_validations
= 0;
3695 int vm_page_copy_cs_tainted
= 0;
3703 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3704 src_m
->object
, src_m
->offset
,
3705 dest_m
->object
, dest_m
->offset
,
3709 * we don't hold the page queue lock
3710 * so this check isn't safe to make
3712 VM_PAGE_CHECK(src_m
);
3713 VM_PAGE_CHECK(dest_m
);
3715 vm_object_lock_assert_held(src_m
->object
);
3719 * The source page should not be encrypted at this point.
3720 * The destination page will therefore not contain encrypted
3721 * data after the copy.
3723 if (src_m
->encrypted
) {
3724 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3726 dest_m
->encrypted
= FALSE
;
3728 if (src_m
->object
!= VM_OBJECT_NULL
&&
3729 src_m
->object
->code_signed
) {
3731 * We're copying a page from a code-signed object.
3732 * Whoever ends up mapping the copy page might care about
3733 * the original page's integrity, so let's validate the
3736 vm_page_copy_cs_validations
++;
3737 vm_page_validate_cs(src_m
);
3740 if (vm_page_is_slideable(src_m
)) {
3741 boolean_t was_busy
= src_m
->busy
;
3743 (void) vm_page_slide(src_m
, 0);
3744 assert(src_m
->busy
);
3746 PAGE_WAKEUP_DONE(src_m
);
3751 * Propagate the cs_tainted bit to the copy page. Do not propagate
3752 * the cs_validated bit.
3754 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3755 if (dest_m
->cs_tainted
) {
3756 vm_page_copy_cs_tainted
++;
3758 dest_m
->slid
= src_m
->slid
;
3759 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
3760 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3768 printf("vm_page %p: \n", p
);
3769 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3770 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3771 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p
->next_m
));
3772 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3773 printf(" wire_count=%u\n", p
->wire_count
);
3775 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3776 (p
->local
? "" : "!"),
3777 (p
->inactive
? "" : "!"),
3778 (p
->active
? "" : "!"),
3779 (p
->pageout_queue
? "" : "!"),
3780 (p
->speculative
? "" : "!"),
3781 (p
->laundry
? "" : "!"));
3782 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3783 (p
->free
? "" : "!"),
3784 (p
->reference
? "" : "!"),
3785 (p
->gobbled
? "" : "!"),
3786 (p
->private ? "" : "!"),
3787 (p
->throttled
? "" : "!"));
3788 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3789 (p
->busy
? "" : "!"),
3790 (p
->wanted
? "" : "!"),
3791 (p
->tabled
? "" : "!"),
3792 (p
->fictitious
? "" : "!"),
3793 (p
->pmapped
? "" : "!"),
3794 (p
->wpmapped
? "" : "!"));
3795 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3796 (p
->pageout
? "" : "!"),
3797 (p
->absent
? "" : "!"),
3798 (p
->error
? "" : "!"),
3799 (p
->dirty
? "" : "!"),
3800 (p
->cleaning
? "" : "!"),
3801 (p
->precious
? "" : "!"),
3802 (p
->clustered
? "" : "!"));
3803 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3804 (p
->overwriting
? "" : "!"),
3805 (p
->restart
? "" : "!"),
3806 (p
->unusual
? "" : "!"),
3807 (p
->encrypted
? "" : "!"),
3808 (p
->encrypted_cleaning
? "" : "!"));
3809 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3810 (p
->cs_validated
? "" : "!"),
3811 (p
->cs_tainted
? "" : "!"),
3812 (p
->no_cache
? "" : "!"));
3814 printf("phys_page=0x%x\n", p
->phys_page
);
3818 * Check that the list of pages is ordered by
3819 * ascending physical address and has no holes.
3822 vm_page_verify_contiguous(
3824 unsigned int npages
)
3826 register vm_page_t m
;
3827 unsigned int page_count
;
3828 vm_offset_t prev_addr
;
3830 prev_addr
= pages
->phys_page
;
3832 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3833 if (m
->phys_page
!= prev_addr
+ 1) {
3834 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3835 m
, (long)prev_addr
, m
->phys_page
);
3836 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
3837 panic("vm_page_verify_contiguous: not contiguous!");
3839 prev_addr
= m
->phys_page
;
3842 if (page_count
!= npages
) {
3843 printf("pages %p actual count 0x%x but requested 0x%x\n",
3844 pages
, page_count
, npages
);
3845 panic("vm_page_verify_contiguous: count error");
3852 * Check the free lists for proper length etc.
3854 static boolean_t vm_page_verify_this_free_list_enabled
= FALSE
;
3856 vm_page_verify_free_list(
3857 queue_head_t
*vm_page_queue
,
3859 vm_page_t look_for_page
,
3860 boolean_t expect_page
)
3862 unsigned int npages
;
3865 boolean_t found_page
;
3867 if (! vm_page_verify_this_free_list_enabled
)
3872 prev_m
= (vm_page_t
) vm_page_queue
;
3873 queue_iterate(vm_page_queue
,
3878 if (m
== look_for_page
) {
3881 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3882 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3883 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3885 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3887 if (color
!= (unsigned int) -1) {
3888 if ((m
->phys_page
& vm_color_mask
) != color
)
3889 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3890 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3892 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3898 if (look_for_page
!= VM_PAGE_NULL
) {
3899 unsigned int other_color
;
3901 if (expect_page
&& !found_page
) {
3902 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3903 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3904 _vm_page_print(look_for_page
);
3905 for (other_color
= 0;
3906 other_color
< vm_colors
;
3908 if (other_color
== color
)
3910 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
3911 other_color
, look_for_page
, FALSE
);
3913 if (color
== (unsigned int) -1) {
3914 vm_page_verify_free_list(&vm_lopage_queue_free
,
3915 (unsigned int) -1, look_for_page
, FALSE
);
3917 panic("vm_page_verify_free_list(color=%u)\n", color
);
3919 if (!expect_page
&& found_page
) {
3920 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3921 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3927 static boolean_t vm_page_verify_all_free_lists_enabled
= FALSE
;
3929 vm_page_verify_free_lists( void )
3931 unsigned int color
, npages
, nlopages
;
3932 boolean_t toggle
= TRUE
;
3934 if (! vm_page_verify_all_free_lists_enabled
)
3939 lck_mtx_lock(&vm_page_queue_free_lock
);
3941 if (vm_page_verify_this_free_list_enabled
== TRUE
) {
3943 * This variable has been set globally for extra checking of
3944 * each free list Q. Since we didn't set it, we don't own it
3945 * and we shouldn't toggle it.
3950 if (toggle
== TRUE
) {
3951 vm_page_verify_this_free_list_enabled
= TRUE
;
3954 for( color
= 0; color
< vm_colors
; color
++ ) {
3955 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
3956 color
, VM_PAGE_NULL
, FALSE
);
3958 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
3960 VM_PAGE_NULL
, FALSE
);
3961 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
3962 panic("vm_page_verify_free_lists: "
3963 "npages %u free_count %d nlopages %u lo_free_count %u",
3964 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
3966 if (toggle
== TRUE
) {
3967 vm_page_verify_this_free_list_enabled
= FALSE
;
3970 lck_mtx_unlock(&vm_page_queue_free_lock
);
3974 vm_page_queues_assert(
3979 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3981 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3982 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3983 _vm_page_print(mem
);
3984 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3986 if (VM_PAGE_WIRED(mem
)) {
3987 assert(!mem
->active
);
3988 assert(!mem
->inactive
);
3989 assert(!mem
->speculative
);
3990 assert(!mem
->throttled
);
3991 assert(!mem
->pageout_queue
);
3994 #endif /* MACH_ASSERT */
3998 * CONTIGUOUS PAGE ALLOCATION
4000 * Find a region large enough to contain at least n pages
4001 * of contiguous physical memory.
4003 * This is done by traversing the vm_page_t array in a linear fashion
4004 * we assume that the vm_page_t array has the avaiable physical pages in an
4005 * ordered, ascending list... this is currently true of all our implementations
4006 * and must remain so... there can be 'holes' in the array... we also can
4007 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4008 * which use to happen via 'vm_page_convert'... that function was no longer
4009 * being called and was removed...
4011 * The basic flow consists of stabilizing some of the interesting state of
4012 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4013 * sweep at the beginning of the array looking for pages that meet our criterea
4014 * for a 'stealable' page... currently we are pretty conservative... if the page
4015 * meets this criterea and is physically contiguous to the previous page in the 'run'
4016 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4017 * and start to develop a new run... if at this point we've already considered
4018 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4019 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4020 * to other threads trying to acquire free pages (or move pages from q to q),
4021 * and then continue from the spot we left off... we only make 1 pass through the
4022 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4023 * which steals the pages from the queues they're currently on... pages on the free
4024 * queue can be stolen directly... pages that are on any of the other queues
4025 * must be removed from the object they are tabled on... this requires taking the
4026 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4027 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4028 * dump the pages we've currently stolen back to the free list, and pick up our
4029 * scan from the point where we aborted the 'current' run.
4033 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4035 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4040 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4043 #define RESET_STATE_OF_RUN() \
4045 prevcontaddr = -2; \
4047 free_considered = 0; \
4048 substitute_needed = 0; \
4053 * Can we steal in-use (i.e. not free) pages when searching for
4054 * physically-contiguous pages ?
4056 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4058 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
4060 int vm_page_find_contig_debug
= 0;
4064 vm_page_find_contiguous(
4065 unsigned int contig_pages
,
4072 ppnum_t prevcontaddr
;
4074 unsigned int npages
, considered
, scanned
;
4075 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
4076 unsigned int idx_last_contig_page_found
= 0;
4077 int free_considered
, free_available
;
4078 int substitute_needed
;
4081 clock_sec_t tv_start_sec
, tv_end_sec
;
4082 clock_usec_t tv_start_usec
, tv_end_usec
;
4087 int stolen_pages
= 0;
4088 int compressed_pages
= 0;
4091 if (contig_pages
== 0)
4092 return VM_PAGE_NULL
;
4095 vm_page_verify_free_lists();
4098 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
4100 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4102 vm_page_lock_queues();
4103 lck_mtx_lock(&vm_page_queue_free_lock
);
4105 RESET_STATE_OF_RUN();
4109 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4113 if(flags
& KMA_LOMEM
)
4114 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
4116 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
4118 orig_last_idx
= idx_last_contig_page_found
;
4119 last_idx
= orig_last_idx
;
4121 for (page_idx
= last_idx
, start_idx
= last_idx
;
4122 npages
< contig_pages
&& page_idx
< vm_pages_count
;
4127 page_idx
>= orig_last_idx
) {
4129 * We're back where we started and we haven't
4130 * found any suitable contiguous range. Let's
4136 m
= &vm_pages
[page_idx
];
4138 assert(!m
->fictitious
);
4139 assert(!m
->private);
4141 if (max_pnum
&& m
->phys_page
> max_pnum
) {
4142 /* no more low pages... */
4145 if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
4149 RESET_STATE_OF_RUN();
4151 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
4152 m
->encrypted_cleaning
||
4153 m
->pageout_queue
|| m
->laundry
|| m
->wanted
||
4154 m
->cleaning
|| m
->overwriting
|| m
->pageout
) {
4156 * page is in a transient state
4157 * or a state we don't want to deal
4158 * with, so don't consider it which
4159 * means starting a new run
4161 RESET_STATE_OF_RUN();
4163 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
&& !m
->compressor
) {
4165 * page needs to be on one of our queues
4166 * or it needs to belong to the compressor pool
4167 * in order for it to be stable behind the
4168 * locks we hold at this point...
4169 * if not, don't consider it which
4170 * means starting a new run
4172 RESET_STATE_OF_RUN();
4174 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
4176 * pages on the free list are always 'busy'
4177 * so we couldn't test for 'busy' in the check
4178 * for the transient states... pages that are
4179 * 'free' are never 'tabled', so we also couldn't
4180 * test for 'tabled'. So we check here to make
4181 * sure that a non-free page is not busy and is
4182 * tabled on an object...
4183 * if not, don't consider it which
4184 * means starting a new run
4186 RESET_STATE_OF_RUN();
4189 if (m
->phys_page
!= prevcontaddr
+ 1) {
4190 if ((m
->phys_page
& pnum_mask
) != 0) {
4191 RESET_STATE_OF_RUN();
4195 start_idx
= page_idx
;
4196 start_pnum
= m
->phys_page
;
4201 prevcontaddr
= m
->phys_page
;
4208 * This page is not free.
4209 * If we can't steal used pages,
4210 * we have to give up this run
4212 * Otherwise, we might need to
4213 * move the contents of this page
4214 * into a substitute page.
4216 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4217 if (m
->pmapped
|| m
->dirty
|| m
->precious
) {
4218 substitute_needed
++;
4221 RESET_STATE_OF_RUN();
4225 if ((free_considered
+ substitute_needed
) > free_available
) {
4227 * if we let this run continue
4228 * we will end up dropping the vm_page_free_count
4229 * below the reserve limit... we need to abort
4230 * this run, but we can at least re-consider this
4231 * page... thus the jump back to 'retry'
4233 RESET_STATE_OF_RUN();
4235 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
4240 * free_available == 0
4241 * so can't consider any free pages... if
4242 * we went to retry in this case, we'd
4243 * get stuck looking at the same page
4244 * w/o making any forward progress
4245 * we also want to take this path if we've already
4246 * reached our limit that controls the lock latency
4251 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
4253 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4255 lck_mtx_unlock(&vm_page_queue_free_lock
);
4256 vm_page_unlock_queues();
4260 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4262 vm_page_lock_queues();
4263 lck_mtx_lock(&vm_page_queue_free_lock
);
4265 RESET_STATE_OF_RUN();
4267 * reset our free page limit since we
4268 * dropped the lock protecting the vm_page_free_queue
4270 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4281 if (npages
!= contig_pages
) {
4284 * We didn't find a contiguous range but we didn't
4285 * start from the very first page.
4286 * Start again from the very first page.
4288 RESET_STATE_OF_RUN();
4289 if( flags
& KMA_LOMEM
)
4290 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
4292 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
4294 page_idx
= last_idx
;
4298 lck_mtx_unlock(&vm_page_queue_free_lock
);
4302 unsigned int cur_idx
;
4303 unsigned int tmp_start_idx
;
4304 vm_object_t locked_object
= VM_OBJECT_NULL
;
4305 boolean_t abort_run
= FALSE
;
4307 assert(page_idx
- start_idx
== contig_pages
);
4309 tmp_start_idx
= start_idx
;
4312 * first pass through to pull the free pages
4313 * off of the free queue so that in case we
4314 * need substitute pages, we won't grab any
4315 * of the free pages in the run... we'll clear
4316 * the 'free' bit in the 2nd pass, and even in
4317 * an abort_run case, we'll collect all of the
4318 * free pages in this run and return them to the free list
4320 while (start_idx
< page_idx
) {
4322 m1
= &vm_pages
[start_idx
++];
4324 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4331 color
= m1
->phys_page
& vm_color_mask
;
4333 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, m1
, TRUE
);
4335 queue_remove(&vm_page_queue_free
[color
],
4339 m1
->pageq
.next
= NULL
;
4340 m1
->pageq
.prev
= NULL
;
4342 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, VM_PAGE_NULL
, FALSE
);
4345 * Clear the "free" bit so that this page
4346 * does not get considered for another
4347 * concurrent physically-contiguous allocation.
4352 vm_page_free_count
--;
4355 if( flags
& KMA_LOMEM
)
4356 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4358 vm_page_find_contiguous_last_idx
= page_idx
;
4361 * we can drop the free queue lock at this point since
4362 * we've pulled any 'free' candidates off of the list
4363 * we need it dropped so that we can do a vm_page_grab
4364 * when substituing for pmapped/dirty pages
4366 lck_mtx_unlock(&vm_page_queue_free_lock
);
4368 start_idx
= tmp_start_idx
;
4369 cur_idx
= page_idx
- 1;
4371 while (start_idx
++ < page_idx
) {
4373 * must go through the list from back to front
4374 * so that the page list is created in the
4375 * correct order - low -> high phys addresses
4377 m1
= &vm_pages
[cur_idx
--];
4381 if (m1
->object
== VM_OBJECT_NULL
) {
4383 * page has already been removed from
4384 * the free list in the 1st pass
4386 assert(m1
->offset
== (vm_object_offset_t
) -1);
4388 assert(!m1
->wanted
);
4389 assert(!m1
->laundry
);
4393 boolean_t disconnected
, reusable
;
4395 if (abort_run
== TRUE
)
4398 object
= m1
->object
;
4400 if (object
!= locked_object
) {
4401 if (locked_object
) {
4402 vm_object_unlock(locked_object
);
4403 locked_object
= VM_OBJECT_NULL
;
4405 if (vm_object_lock_try(object
))
4406 locked_object
= object
;
4408 if (locked_object
== VM_OBJECT_NULL
||
4409 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
4410 m1
->encrypted_cleaning
||
4411 m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
||
4412 m1
->cleaning
|| m1
->overwriting
|| m1
->pageout
|| m1
->busy
)) {
4414 if (locked_object
) {
4415 vm_object_unlock(locked_object
);
4416 locked_object
= VM_OBJECT_NULL
;
4418 tmp_start_idx
= cur_idx
;
4423 disconnected
= FALSE
;
4426 if ((m1
->reusable
||
4427 m1
->object
->all_reusable
) &&
4431 /* reusable page... */
4432 refmod
= pmap_disconnect(m1
->phys_page
);
4433 disconnected
= TRUE
;
4436 * ... not reused: can steal
4437 * without relocating contents.
4447 vm_object_offset_t offset
;
4449 m2
= vm_page_grab();
4451 if (m2
== VM_PAGE_NULL
) {
4452 if (locked_object
) {
4453 vm_object_unlock(locked_object
);
4454 locked_object
= VM_OBJECT_NULL
;
4456 tmp_start_idx
= cur_idx
;
4460 if (! disconnected
) {
4462 refmod
= pmap_disconnect(m1
->phys_page
);
4467 /* copy the page's contents */
4468 pmap_copy_page(m1
->phys_page
, m2
->phys_page
);
4469 /* copy the page's state */
4470 assert(!VM_PAGE_WIRED(m1
));
4472 assert(!m1
->pageout_queue
);
4473 assert(!m1
->laundry
);
4474 m2
->reference
= m1
->reference
;
4475 assert(!m1
->gobbled
);
4476 assert(!m1
->private);
4477 m2
->no_cache
= m1
->no_cache
;
4480 assert(!m1
->wanted
);
4481 assert(!m1
->fictitious
);
4482 m2
->pmapped
= m1
->pmapped
; /* should flush cache ? */
4483 m2
->wpmapped
= m1
->wpmapped
;
4484 assert(!m1
->pageout
);
4485 m2
->absent
= m1
->absent
;
4486 m2
->error
= m1
->error
;
4487 m2
->dirty
= m1
->dirty
;
4488 assert(!m1
->cleaning
);
4489 m2
->precious
= m1
->precious
;
4490 m2
->clustered
= m1
->clustered
;
4491 assert(!m1
->overwriting
);
4492 m2
->restart
= m1
->restart
;
4493 m2
->unusual
= m1
->unusual
;
4494 m2
->encrypted
= m1
->encrypted
;
4495 assert(!m1
->encrypted_cleaning
);
4496 m2
->cs_validated
= m1
->cs_validated
;
4497 m2
->cs_tainted
= m1
->cs_tainted
;
4500 * If m1 had really been reusable,
4501 * we would have just stolen it, so
4502 * let's not propagate it's "reusable"
4503 * bit and assert that m2 is not
4504 * marked as "reusable".
4506 // m2->reusable = m1->reusable;
4507 assert(!m2
->reusable
);
4509 assert(!m1
->lopage
);
4510 m2
->slid
= m1
->slid
;
4511 m2
->compressor
= m1
->compressor
;
4514 * page may need to be flushed if
4515 * it is marshalled into a UPL
4516 * that is going to be used by a device
4517 * that doesn't support coherency
4519 m2
->written_by_kernel
= TRUE
;
4522 * make sure we clear the ref/mod state
4523 * from the pmap layer... else we risk
4524 * inheriting state from the last time
4525 * this page was used...
4527 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
4529 if (refmod
& VM_MEM_REFERENCED
)
4530 m2
->reference
= TRUE
;
4531 if (refmod
& VM_MEM_MODIFIED
) {
4532 SET_PAGE_DIRTY(m2
, TRUE
);
4534 offset
= m1
->offset
;
4537 * completely cleans up the state
4538 * of the page so that it is ready
4539 * to be put onto the free list, or
4540 * for this purpose it looks like it
4541 * just came off of the free list
4543 vm_page_free_prepare(m1
);
4546 * now put the substitute page
4549 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
, FALSE
);
4551 if (m2
->compressor
) {
4553 m2
->wpmapped
= TRUE
;
4555 PMAP_ENTER(kernel_pmap
, m2
->offset
, m2
,
4556 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
);
4562 vm_page_activate(m2
);
4564 vm_page_deactivate(m2
);
4566 PAGE_WAKEUP_DONE(m2
);
4569 assert(!m1
->compressor
);
4572 * completely cleans up the state
4573 * of the page so that it is ready
4574 * to be put onto the free list, or
4575 * for this purpose it looks like it
4576 * just came off of the free list
4578 vm_page_free_prepare(m1
);
4584 m1
->pageq
.next
= (queue_entry_t
) m
;
4585 m1
->pageq
.prev
= NULL
;
4588 if (locked_object
) {
4589 vm_object_unlock(locked_object
);
4590 locked_object
= VM_OBJECT_NULL
;
4593 if (abort_run
== TRUE
) {
4594 if (m
!= VM_PAGE_NULL
) {
4595 vm_page_free_list(m
, FALSE
);
4601 * want the index of the last
4602 * page in this run that was
4603 * successfully 'stolen', so back
4604 * it up 1 for the auto-decrement on use
4605 * and 1 more to bump back over this page
4607 page_idx
= tmp_start_idx
+ 2;
4608 if (page_idx
>= vm_pages_count
) {
4611 page_idx
= last_idx
= 0;
4617 * We didn't find a contiguous range but we didn't
4618 * start from the very first page.
4619 * Start again from the very first page.
4621 RESET_STATE_OF_RUN();
4623 if( flags
& KMA_LOMEM
)
4624 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4626 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4628 last_idx
= page_idx
;
4630 lck_mtx_lock(&vm_page_queue_free_lock
);
4632 * reset our free page limit since we
4633 * dropped the lock protecting the vm_page_free_queue
4635 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4639 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4647 vm_page_gobble_count
+= npages
;
4650 * gobbled pages are also counted as wired pages
4652 vm_page_wire_count
+= npages
;
4654 assert(vm_page_verify_contiguous(m
, npages
));
4657 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4659 vm_page_unlock_queues();
4662 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4664 tv_end_sec
-= tv_start_sec
;
4665 if (tv_end_usec
< tv_start_usec
) {
4667 tv_end_usec
+= 1000000;
4669 tv_end_usec
-= tv_start_usec
;
4670 if (tv_end_usec
>= 1000000) {
4672 tv_end_sec
-= 1000000;
4674 if (vm_page_find_contig_debug
) {
4675 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4676 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4677 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4678 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
);
4683 vm_page_verify_free_lists();
4689 * Allocate a list of contiguous, wired pages.
4701 unsigned int npages
;
4703 if (size
% PAGE_SIZE
!= 0)
4704 return KERN_INVALID_ARGUMENT
;
4706 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4707 if (npages
!= size
/ PAGE_SIZE
) {
4708 /* 32-bit overflow */
4709 return KERN_INVALID_ARGUMENT
;
4713 * Obtain a pointer to a subset of the free
4714 * list large enough to satisfy the request;
4715 * the region will be physically contiguous.
4717 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4719 if (pages
== VM_PAGE_NULL
)
4720 return KERN_NO_SPACE
;
4722 * determine need for wakeups
4724 if ((vm_page_free_count
< vm_page_free_min
) ||
4725 ((vm_page_free_count
< vm_page_free_target
) &&
4726 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4727 thread_wakeup((event_t
) &vm_page_free_wanted
);
4729 VM_CHECK_MEMORYSTATUS
;
4732 * The CPM pages should now be available and
4733 * ordered by ascending physical address.
4735 assert(vm_page_verify_contiguous(pages
, npages
));
4738 return KERN_SUCCESS
;
4742 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
4745 * when working on a 'run' of pages, it is necessary to hold
4746 * the vm_page_queue_lock (a hot global lock) for certain operations
4747 * on the page... however, the majority of the work can be done
4748 * while merely holding the object lock... in fact there are certain
4749 * collections of pages that don't require any work brokered by the
4750 * vm_page_queue_lock... to mitigate the time spent behind the global
4751 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4752 * while doing all of the work that doesn't require the vm_page_queue_lock...
4753 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4754 * necessary work for each page... we will grab the busy bit on the page
4755 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4756 * if it can't immediately take the vm_page_queue_lock in order to compete
4757 * for the locks in the same order that vm_pageout_scan takes them.
4758 * the operation names are modeled after the names of the routines that
4759 * need to be called in order to make the changes very obvious in the
4764 vm_page_do_delayed_work(
4766 struct vm_page_delayed_work
*dwp
,
4771 vm_page_t local_free_q
= VM_PAGE_NULL
;
4774 * pageout_scan takes the vm_page_lock_queues first
4775 * then tries for the object lock... to avoid what
4776 * is effectively a lock inversion, we'll go to the
4777 * trouble of taking them in that same order... otherwise
4778 * if this object contains the majority of the pages resident
4779 * in the UBC (or a small set of large objects actively being
4780 * worked on contain the majority of the pages), we could
4781 * cause the pageout_scan thread to 'starve' in its attempt
4782 * to find pages to move to the free queue, since it has to
4783 * successfully acquire the object lock of any candidate page
4784 * before it can steal/clean it.
4786 if (!vm_page_trylockspin_queues()) {
4787 vm_object_unlock(object
);
4789 vm_page_lockspin_queues();
4791 for (j
= 0; ; j
++) {
4792 if (!vm_object_lock_avoid(object
) &&
4793 _vm_object_lock_try(object
))
4795 vm_page_unlock_queues();
4797 vm_page_lockspin_queues();
4800 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
4804 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
4805 vm_pageout_throttle_up(m
);
4806 #if CONFIG_PHANTOM_CACHE
4807 if (dwp
->dw_mask
& DW_vm_phantom_cache_update
)
4808 vm_phantom_cache_update(m
);
4810 if (dwp
->dw_mask
& DW_vm_page_wire
)
4812 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
4815 queueit
= (dwp
->dw_mask
& (DW_vm_page_free
| DW_vm_page_deactivate_internal
)) ? FALSE
: TRUE
;
4817 vm_page_unwire(m
, queueit
);
4819 if (dwp
->dw_mask
& DW_vm_page_free
) {
4820 vm_page_free_prepare_queues(m
);
4822 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
4824 * Add this page to our list of reclaimed pages,
4825 * to be freed later.
4827 m
->pageq
.next
= (queue_entry_t
) local_free_q
;
4830 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
4831 vm_page_deactivate_internal(m
, FALSE
);
4832 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
4833 if (m
->active
== FALSE
) {
4834 vm_page_activate(m
);
4837 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
4838 vm_page_speculate(m
, TRUE
);
4839 else if (dwp
->dw_mask
& DW_enqueue_cleaned
) {
4841 * if we didn't hold the object lock and did this,
4842 * we might disconnect the page, then someone might
4843 * soft fault it back in, then we would put it on the
4844 * cleaned queue, and so we would have a referenced (maybe even dirty)
4845 * page on that queue, which we don't want
4847 int refmod_state
= pmap_disconnect(m
->phys_page
);
4849 if ((refmod_state
& VM_MEM_REFERENCED
)) {
4851 * this page has been touched since it got cleaned; let's activate it
4852 * if it hasn't already been
4854 vm_pageout_enqueued_cleaned
++;
4855 vm_pageout_cleaned_reactivated
++;
4856 vm_pageout_cleaned_commit_reactivated
++;
4858 if (m
->active
== FALSE
)
4859 vm_page_activate(m
);
4861 m
->reference
= FALSE
;
4862 vm_page_enqueue_cleaned(m
);
4865 else if (dwp
->dw_mask
& DW_vm_page_lru
)
4867 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
) {
4868 if ( !m
->pageout_queue
)
4869 VM_PAGE_QUEUES_REMOVE(m
);
4871 if (dwp
->dw_mask
& DW_set_reference
)
4872 m
->reference
= TRUE
;
4873 else if (dwp
->dw_mask
& DW_clear_reference
)
4874 m
->reference
= FALSE
;
4876 if (dwp
->dw_mask
& DW_move_page
) {
4877 if ( !m
->pageout_queue
) {
4878 VM_PAGE_QUEUES_REMOVE(m
);
4880 assert(m
->object
!= kernel_object
);
4882 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
4885 if (dwp
->dw_mask
& DW_clear_busy
)
4888 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
4892 vm_page_unlock_queues();
4895 vm_page_free_list(local_free_q
, TRUE
);
4897 VM_CHECK_MEMORYSTATUS
;
4907 vm_page_t lo_page_list
= VM_PAGE_NULL
;
4911 if ( !(flags
& KMA_LOMEM
))
4912 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4914 for (i
= 0; i
< page_count
; i
++) {
4916 mem
= vm_page_grablo();
4918 if (mem
== VM_PAGE_NULL
) {
4920 vm_page_free_list(lo_page_list
, FALSE
);
4922 *list
= VM_PAGE_NULL
;
4924 return (KERN_RESOURCE_SHORTAGE
);
4926 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
4929 *list
= lo_page_list
;
4931 return (KERN_SUCCESS
);
4935 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
4937 page
->offset
= offset
;
4941 vm_page_get_next(vm_page_t page
)
4943 return ((vm_page_t
) page
->pageq
.next
);
4947 vm_page_get_offset(vm_page_t page
)
4949 return (page
->offset
);
4953 vm_page_get_phys_page(vm_page_t page
)
4955 return (page
->phys_page
);
4959 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4963 static vm_page_t hibernate_gobble_queue
;
4965 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4967 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
4968 static int hibernate_flush_dirty_pages(int);
4969 static int hibernate_flush_queue(queue_head_t
*, int);
4971 void hibernate_flush_wait(void);
4972 void hibernate_mark_in_progress(void);
4973 void hibernate_clear_in_progress(void);
4975 void hibernate_free_range(int, int);
4976 void hibernate_hash_insert_page(vm_page_t
);
4977 uint32_t hibernate_mark_as_unneeded(addr64_t
, addr64_t
, hibernate_page_list_t
*, hibernate_page_list_t
*);
4978 void hibernate_rebuild_vm_structs(void);
4979 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t
*, hibernate_page_list_t
*);
4980 ppnum_t
hibernate_lookup_paddr(unsigned int);
4982 struct hibernate_statistics
{
4983 int hibernate_considered
;
4984 int hibernate_reentered_on_q
;
4985 int hibernate_found_dirty
;
4986 int hibernate_skipped_cleaning
;
4987 int hibernate_skipped_transient
;
4988 int hibernate_skipped_precious
;
4989 int hibernate_skipped_external
;
4990 int hibernate_queue_nolock
;
4991 int hibernate_queue_paused
;
4992 int hibernate_throttled
;
4993 int hibernate_throttle_timeout
;
4994 int hibernate_drained
;
4995 int hibernate_drain_timeout
;
4997 int cd_found_precious
;
5000 int cd_found_unusual
;
5001 int cd_found_cleaning
;
5002 int cd_found_laundry
;
5004 int cd_found_xpmapped
;
5005 int cd_skipped_xpmapped
;
5008 int cd_vm_page_wire_count
;
5009 int cd_vm_struct_pages_unneeded
;
5017 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5018 * so that we don't overrun the estimated image size, which would
5019 * result in a hibernation failure.
5021 #define HIBERNATE_XPMAPPED_LIMIT 40000
5025 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
5027 wait_result_t wait_result
;
5029 vm_page_lock_queues();
5031 while ( !queue_empty(&q
->pgo_pending
) ) {
5033 q
->pgo_draining
= TRUE
;
5035 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
5037 vm_page_unlock_queues();
5039 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
5041 if (wait_result
== THREAD_TIMED_OUT
&& !queue_empty(&q
->pgo_pending
)) {
5042 hibernate_stats
.hibernate_drain_timeout
++;
5044 if (q
== &vm_pageout_queue_external
)
5049 vm_page_lock_queues();
5051 hibernate_stats
.hibernate_drained
++;
5053 vm_page_unlock_queues();
5059 boolean_t hibernate_skip_external
= FALSE
;
5062 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
5065 vm_object_t l_object
= NULL
;
5066 vm_object_t m_object
= NULL
;
5067 int refmod_state
= 0;
5068 int try_failed_count
= 0;
5070 int current_run
= 0;
5071 struct vm_pageout_queue
*iq
;
5072 struct vm_pageout_queue
*eq
;
5073 struct vm_pageout_queue
*tq
;
5076 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
5078 iq
= &vm_pageout_queue_internal
;
5079 eq
= &vm_pageout_queue_external
;
5081 vm_page_lock_queues();
5083 while (qcount
&& !queue_empty(q
)) {
5085 if (current_run
++ == 1000) {
5086 if (hibernate_should_abort()) {
5093 m
= (vm_page_t
) queue_first(q
);
5094 m_object
= m
->object
;
5097 * check to see if we currently are working
5098 * with the same object... if so, we've
5099 * already got the lock
5101 if (m_object
!= l_object
) {
5103 * the object associated with candidate page is
5104 * different from the one we were just working
5105 * with... dump the lock if we still own it
5107 if (l_object
!= NULL
) {
5108 vm_object_unlock(l_object
);
5112 * Try to lock object; since we've alread got the
5113 * page queues lock, we can only 'try' for this one.
5114 * if the 'try' fails, we need to do a mutex_pause
5115 * to allow the owner of the object lock a chance to
5118 if ( !vm_object_lock_try_scan(m_object
)) {
5120 if (try_failed_count
> 20) {
5121 hibernate_stats
.hibernate_queue_nolock
++;
5123 goto reenter_pg_on_q
;
5125 vm_pageout_scan_wants_object
= m_object
;
5127 vm_page_unlock_queues();
5128 mutex_pause(try_failed_count
++);
5129 vm_page_lock_queues();
5131 hibernate_stats
.hibernate_queue_paused
++;
5134 l_object
= m_object
;
5135 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
5138 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
) {
5140 * page is not to be cleaned
5141 * put it back on the head of its queue
5144 hibernate_stats
.hibernate_skipped_cleaning
++;
5146 hibernate_stats
.hibernate_skipped_transient
++;
5148 goto reenter_pg_on_q
;
5150 if (m_object
->copy
== VM_OBJECT_NULL
) {
5151 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
5153 * let the normal hibernate image path
5156 goto reenter_pg_on_q
;
5159 if ( !m
->dirty
&& m
->pmapped
) {
5160 refmod_state
= pmap_get_refmod(m
->phys_page
);
5162 if ((refmod_state
& VM_MEM_MODIFIED
)) {
5163 SET_PAGE_DIRTY(m
, FALSE
);
5170 * page is not to be cleaned
5171 * put it back on the head of its queue
5174 hibernate_stats
.hibernate_skipped_precious
++;
5176 goto reenter_pg_on_q
;
5179 if (hibernate_skip_external
== TRUE
&& !m_object
->internal
) {
5181 hibernate_stats
.hibernate_skipped_external
++;
5183 goto reenter_pg_on_q
;
5187 if (m_object
->internal
) {
5188 if (VM_PAGE_Q_THROTTLED(iq
))
5190 } else if (VM_PAGE_Q_THROTTLED(eq
))
5194 wait_result_t wait_result
;
5197 if (l_object
!= NULL
) {
5198 vm_object_unlock(l_object
);
5201 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
5203 while (retval
== 0) {
5205 tq
->pgo_throttled
= TRUE
;
5207 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
5209 vm_page_unlock_queues();
5211 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
5213 vm_page_lock_queues();
5215 if (wait_result
!= THREAD_TIMED_OUT
)
5217 if (!VM_PAGE_Q_THROTTLED(tq
))
5220 if (hibernate_should_abort())
5223 if (--wait_count
== 0) {
5225 hibernate_stats
.hibernate_throttle_timeout
++;
5228 hibernate_skip_external
= TRUE
;
5237 hibernate_stats
.hibernate_throttled
++;
5242 * we've already factored out pages in the laundry which
5243 * means this page can't be on the pageout queue so it's
5244 * safe to do the VM_PAGE_QUEUES_REMOVE
5246 assert(!m
->pageout_queue
);
5248 VM_PAGE_QUEUES_REMOVE(m
);
5250 if (COMPRESSED_PAGER_IS_ACTIVE
&& m_object
->internal
== TRUE
)
5251 pmap_disconnect_options(m
->phys_page
, PMAP_OPTIONS_COMPRESSOR
, NULL
);
5253 vm_pageout_cluster(m
, FALSE
);
5255 hibernate_stats
.hibernate_found_dirty
++;
5260 queue_remove(q
, m
, vm_page_t
, pageq
);
5261 queue_enter(q
, m
, vm_page_t
, pageq
);
5263 hibernate_stats
.hibernate_reentered_on_q
++;
5265 hibernate_stats
.hibernate_considered
++;
5268 try_failed_count
= 0;
5270 if (l_object
!= NULL
) {
5271 vm_object_unlock(l_object
);
5274 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
5276 vm_page_unlock_queues();
5278 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
5285 hibernate_flush_dirty_pages(int pass
)
5287 struct vm_speculative_age_q
*aq
;
5290 if (vm_page_local_q
) {
5291 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5292 vm_page_reactivate_local(i
, TRUE
, FALSE
);
5295 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
5299 aq
= &vm_page_queue_speculative
[i
];
5301 if (queue_empty(&aq
->age_q
))
5305 vm_page_lockspin_queues();
5307 queue_iterate(&aq
->age_q
,
5314 vm_page_unlock_queues();
5317 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
5321 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_page_anonymous_count
- vm_page_cleaned_count
))
5323 if (hibernate_flush_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
))
5325 if (hibernate_flush_queue(&vm_page_queue_cleaned
, vm_page_cleaned_count
))
5327 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
5330 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5331 vm_compressor_record_warmup_start();
5333 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
)) {
5334 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5335 vm_compressor_record_warmup_end();
5338 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
)) {
5339 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5340 vm_compressor_record_warmup_end();
5343 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5344 vm_compressor_record_warmup_end();
5346 if (hibernate_skip_external
== FALSE
&& hibernate_drain_pageout_queue(&vm_pageout_queue_external
))
5354 hibernate_reset_stats()
5356 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
5361 hibernate_flush_memory()
5365 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
5367 hibernate_cleaning_in_progress
= TRUE
;
5368 hibernate_skip_external
= FALSE
;
5370 if ((retval
= hibernate_flush_dirty_pages(1)) == 0) {
5372 if (COMPRESSED_PAGER_IS_ACTIVE
) {
5374 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_START
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5376 vm_compressor_flush();
5378 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_END
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5380 if (consider_buffer_cache_collect
!= NULL
) {
5381 unsigned int orig_wire_count
;
5383 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5384 orig_wire_count
= vm_page_wire_count
;
5386 (void)(*consider_buffer_cache_collect
)(1);
5387 consider_zone_gc(TRUE
);
5389 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count
- vm_page_wire_count
);
5391 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, orig_wire_count
- vm_page_wire_count
, 0, 0, 0, 0);
5394 hibernate_cleaning_in_progress
= FALSE
;
5396 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
5398 if (retval
&& COMPRESSED_PAGER_IS_ACTIVE
)
5399 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT
);
5402 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5403 hibernate_stats
.hibernate_considered
,
5404 hibernate_stats
.hibernate_reentered_on_q
,
5405 hibernate_stats
.hibernate_found_dirty
);
5406 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5407 hibernate_stats
.hibernate_skipped_cleaning
,
5408 hibernate_stats
.hibernate_skipped_transient
,
5409 hibernate_stats
.hibernate_skipped_precious
,
5410 hibernate_stats
.hibernate_skipped_external
,
5411 hibernate_stats
.hibernate_queue_nolock
);
5412 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5413 hibernate_stats
.hibernate_queue_paused
,
5414 hibernate_stats
.hibernate_throttled
,
5415 hibernate_stats
.hibernate_throttle_timeout
,
5416 hibernate_stats
.hibernate_drained
,
5417 hibernate_stats
.hibernate_drain_timeout
);
5424 hibernate_page_list_zero(hibernate_page_list_t
*list
)
5427 hibernate_bitmap_t
* bitmap
;
5429 bitmap
= &list
->bank_bitmap
[0];
5430 for (bank
= 0; bank
< list
->bank_count
; bank
++)
5434 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
5435 // set out-of-bound bits at end of bitmap.
5436 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
5438 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
5440 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5445 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
5449 uint64_t start
, end
, timeout
, nsec
;
5450 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
5451 clock_get_uptime(&start
);
5453 for (i
= 0; i
< gobble_count
; i
++)
5455 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
5457 clock_get_uptime(&end
);
5467 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
5468 hibernate_gobble_queue
= m
;
5471 clock_get_uptime(&end
);
5472 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5473 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
5477 hibernate_free_gobble_pages(void)
5482 m
= (vm_page_t
) hibernate_gobble_queue
;
5485 next
= (vm_page_t
) m
->pageq
.next
;
5490 hibernate_gobble_queue
= VM_PAGE_NULL
;
5493 HIBLOG("Freed %d pages\n", count
);
5497 hibernate_consider_discard(vm_page_t m
, boolean_t preflight
)
5499 vm_object_t object
= NULL
;
5501 boolean_t discard
= FALSE
;
5506 panic("hibernate_consider_discard: private");
5508 if (!vm_object_lock_try(m
->object
)) {
5509 if (!preflight
) hibernate_stats
.cd_lock_failed
++;
5514 if (VM_PAGE_WIRED(m
)) {
5515 if (!preflight
) hibernate_stats
.cd_found_wired
++;
5519 if (!preflight
) hibernate_stats
.cd_found_precious
++;
5522 if (m
->busy
|| !object
->alive
) {
5524 * Somebody is playing with this page.
5526 if (!preflight
) hibernate_stats
.cd_found_busy
++;
5529 if (m
->absent
|| m
->unusual
|| m
->error
) {
5531 * If it's unusual in anyway, ignore it
5533 if (!preflight
) hibernate_stats
.cd_found_unusual
++;
5537 if (!preflight
) hibernate_stats
.cd_found_cleaning
++;
5541 if (!preflight
) hibernate_stats
.cd_found_laundry
++;
5546 refmod_state
= pmap_get_refmod(m
->phys_page
);
5548 if (refmod_state
& VM_MEM_REFERENCED
)
5549 m
->reference
= TRUE
;
5550 if (refmod_state
& VM_MEM_MODIFIED
) {
5551 SET_PAGE_DIRTY(m
, FALSE
);
5556 * If it's clean or purgeable we can discard the page on wakeup.
5558 discard
= (!m
->dirty
)
5559 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
5560 || (VM_PURGABLE_EMPTY
== object
->purgable
);
5563 if (discard
== FALSE
) {
5565 hibernate_stats
.cd_found_dirty
++;
5566 } else if (m
->xpmapped
&& m
->reference
&& !object
->internal
) {
5567 if (hibernate_stats
.cd_found_xpmapped
< HIBERNATE_XPMAPPED_LIMIT
) {
5569 hibernate_stats
.cd_found_xpmapped
++;
5573 hibernate_stats
.cd_skipped_xpmapped
++;
5580 vm_object_unlock(object
);
5587 hibernate_discard_page(vm_page_t m
)
5589 if (m
->absent
|| m
->unusual
|| m
->error
)
5591 * If it's unusual in anyway, ignore
5595 #if MACH_ASSERT || DEBUG
5596 vm_object_t object
= m
->object
;
5597 if (!vm_object_lock_try(m
->object
))
5598 panic("hibernate_discard_page(%p) !vm_object_lock_try", m
);
5600 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5601 makes sure these locks are uncontended before sleep */
5602 #endif /* MACH_ASSERT || DEBUG */
5604 if (m
->pmapped
== TRUE
)
5606 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
5610 panic("hibernate_discard_page(%p) laundry", m
);
5612 panic("hibernate_discard_page(%p) private", m
);
5614 panic("hibernate_discard_page(%p) fictitious", m
);
5616 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
5618 /* object should be on a queue */
5619 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
5620 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
5622 if (m
->object
->purgeable_when_ripe
) {
5623 vm_purgeable_token_delete_first(old_queue
);
5625 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
5628 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5629 * accounted in the "volatile" ledger, so no change here.
5630 * We have to update vm_page_purgeable_count, though, since we're
5631 * effectively purging this object.
5634 assert(m
->object
->resident_page_count
>= m
->object
->wired_page_count
);
5635 delta
= (m
->object
->resident_page_count
- m
->object
->wired_page_count
);
5636 assert(vm_page_purgeable_count
>= delta
);
5638 OSAddAtomic(-delta
, (SInt32
*)&vm_page_purgeable_count
);
5643 #if MACH_ASSERT || DEBUG
5644 vm_object_unlock(object
);
5645 #endif /* MACH_ASSERT || DEBUG */
5649 Grab locks for hibernate_page_list_setall()
5652 hibernate_vm_lock_queues(void)
5654 vm_object_lock(compressor_object
);
5655 vm_page_lock_queues();
5656 lck_mtx_lock(&vm_page_queue_free_lock
);
5658 if (vm_page_local_q
) {
5660 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5662 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5663 VPL_LOCK(&lq
->vpl_lock
);
5669 hibernate_vm_unlock_queues(void)
5671 if (vm_page_local_q
) {
5673 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5675 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5676 VPL_UNLOCK(&lq
->vpl_lock
);
5679 lck_mtx_unlock(&vm_page_queue_free_lock
);
5680 vm_page_unlock_queues();
5681 vm_object_unlock(compressor_object
);
5685 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5686 pages known to VM to not need saving are subtracted.
5687 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5691 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
5692 hibernate_page_list_t
* page_list_wired
,
5693 hibernate_page_list_t
* page_list_pal
,
5694 boolean_t preflight
,
5695 boolean_t will_discard
,
5696 uint32_t * pagesOut
)
5698 uint64_t start
, end
, nsec
;
5701 uint32_t pages
= page_list
->page_count
;
5702 uint32_t count_anonymous
= 0, count_throttled
= 0, count_compressor
= 0;
5703 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0, count_cleaned
= 0;
5704 uint32_t count_wire
= pages
;
5705 uint32_t count_discard_active
= 0;
5706 uint32_t count_discard_inactive
= 0;
5707 uint32_t count_discard_cleaned
= 0;
5708 uint32_t count_discard_purgeable
= 0;
5709 uint32_t count_discard_speculative
= 0;
5710 uint32_t count_discard_vm_struct_pages
= 0;
5713 hibernate_bitmap_t
* bitmap
;
5714 hibernate_bitmap_t
* bitmap_wired
;
5715 boolean_t discard_all
;
5718 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight
, page_list
, page_list_wired
);
5722 page_list_wired
= NULL
;
5723 page_list_pal
= NULL
;
5724 discard_all
= FALSE
;
5726 discard_all
= will_discard
;
5729 #if MACH_ASSERT || DEBUG
5732 vm_page_lock_queues();
5733 if (vm_page_local_q
) {
5734 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5736 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5737 VPL_LOCK(&lq
->vpl_lock
);
5741 #endif /* MACH_ASSERT || DEBUG */
5744 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
5746 clock_get_uptime(&start
);
5749 hibernate_page_list_zero(page_list
);
5750 hibernate_page_list_zero(page_list_wired
);
5751 hibernate_page_list_zero(page_list_pal
);
5753 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
5754 hibernate_stats
.cd_pages
= pages
;
5757 if (vm_page_local_q
) {
5758 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5759 vm_page_reactivate_local(i
, TRUE
, !preflight
);
5763 vm_object_lock(compressor_object
);
5764 vm_page_lock_queues();
5765 lck_mtx_lock(&vm_page_queue_free_lock
);
5768 m
= (vm_page_t
) hibernate_gobble_queue
;
5774 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5775 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5777 m
= (vm_page_t
) m
->pageq
.next
;
5780 if (!preflight
) for( i
= 0; i
< real_ncpus
; i
++ )
5782 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
5784 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
5788 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5789 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5791 hibernate_stats
.cd_local_free
++;
5792 hibernate_stats
.cd_total_free
++;
5797 for( i
= 0; i
< vm_colors
; i
++ )
5799 queue_iterate(&vm_page_queue_free
[i
],
5807 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5808 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5810 hibernate_stats
.cd_total_free
++;
5815 queue_iterate(&vm_lopage_queue_free
,
5823 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5824 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5826 hibernate_stats
.cd_total_free
++;
5830 m
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
5831 while (m
&& !queue_end(&vm_page_queue_throttled
, (queue_entry_t
)m
))
5833 next
= (vm_page_t
) m
->pageq
.next
;
5835 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5836 && hibernate_consider_discard(m
, preflight
))
5838 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5839 count_discard_inactive
++;
5840 discard
= discard_all
;
5845 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5847 if (discard
) hibernate_discard_page(m
);
5851 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
5852 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
5854 next
= (vm_page_t
) m
->pageq
.next
;
5856 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5857 && hibernate_consider_discard(m
, preflight
))
5859 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5861 count_discard_purgeable
++;
5863 count_discard_inactive
++;
5864 discard
= discard_all
;
5869 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5870 if (discard
) hibernate_discard_page(m
);
5874 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
5875 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
5877 next
= (vm_page_t
) m
->pageq
.next
;
5879 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5880 && hibernate_consider_discard(m
, preflight
))
5882 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5884 count_discard_purgeable
++;
5886 count_discard_cleaned
++;
5887 discard
= discard_all
;
5892 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5893 if (discard
) hibernate_discard_page(m
);
5897 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5898 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5900 next
= (vm_page_t
) m
->pageq
.next
;
5902 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
5903 && hibernate_consider_discard(m
, preflight
))
5905 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5907 count_discard_purgeable
++;
5909 count_discard_active
++;
5910 discard
= discard_all
;
5915 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5916 if (discard
) hibernate_discard_page(m
);
5920 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5921 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5923 next
= (vm_page_t
) m
->pageq
.next
;
5925 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5926 && hibernate_consider_discard(m
, preflight
))
5928 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5930 count_discard_purgeable
++;
5932 count_discard_inactive
++;
5933 discard
= discard_all
;
5938 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5939 if (discard
) hibernate_discard_page(m
);
5943 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5945 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5946 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5948 next
= (vm_page_t
) m
->pageq
.next
;
5950 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5951 && hibernate_consider_discard(m
, preflight
))
5953 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5954 count_discard_speculative
++;
5955 discard
= discard_all
;
5958 count_speculative
++;
5960 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5961 if (discard
) hibernate_discard_page(m
);
5966 queue_iterate(&compressor_object
->memq
, m
, vm_page_t
, listq
)
5970 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5973 if (preflight
== FALSE
&& discard_all
== TRUE
) {
5974 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5976 HIBLOG("hibernate_teardown started\n");
5977 count_discard_vm_struct_pages
= hibernate_teardown_vm_structs(page_list
, page_list_wired
);
5978 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages
);
5980 pages
-= count_discard_vm_struct_pages
;
5981 count_wire
-= count_discard_vm_struct_pages
;
5983 hibernate_stats
.cd_vm_struct_pages_unneeded
= count_discard_vm_struct_pages
;
5985 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
5989 // pull wired from hibernate_bitmap
5990 bitmap
= &page_list
->bank_bitmap
[0];
5991 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
5992 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
5994 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
5995 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
5996 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5997 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
6001 // machine dependent adjustments
6002 hibernate_page_list_setall_machine(page_list
, page_list_wired
, preflight
, &pages
);
6005 hibernate_stats
.cd_count_wire
= count_wire
;
6006 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+
6007 count_discard_speculative
+ count_discard_cleaned
+ count_discard_vm_struct_pages
;
6010 clock_get_uptime(&end
);
6011 absolutetime_to_nanoseconds(end
- start
, &nsec
);
6012 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
6014 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6015 pages
, count_wire
, count_active
, count_inactive
, count_cleaned
, count_speculative
, count_anonymous
, count_throttled
, count_compressor
, hibernate_stats
.cd_found_xpmapped
,
6016 discard_all
? "did" : "could",
6017 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
6019 if (hibernate_stats
.cd_skipped_xpmapped
)
6020 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats
.cd_skipped_xpmapped
);
6022 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
- count_discard_cleaned
;
6024 if (preflight
&& will_discard
) *pagesOut
-= count_compressor
+ count_throttled
+ count_anonymous
+ count_inactive
+ count_cleaned
+ count_speculative
+ count_active
;
6026 #if MACH_ASSERT || DEBUG
6029 if (vm_page_local_q
) {
6030 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6032 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6033 VPL_UNLOCK(&lq
->vpl_lock
);
6036 vm_page_unlock_queues();
6038 #endif /* MACH_ASSERT || DEBUG */
6041 lck_mtx_unlock(&vm_page_queue_free_lock
);
6042 vm_page_unlock_queues();
6043 vm_object_unlock(compressor_object
);
6046 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
6050 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
6052 uint64_t start
, end
, nsec
;
6056 uint32_t count_discard_active
= 0;
6057 uint32_t count_discard_inactive
= 0;
6058 uint32_t count_discard_purgeable
= 0;
6059 uint32_t count_discard_cleaned
= 0;
6060 uint32_t count_discard_speculative
= 0;
6063 #if MACH_ASSERT || DEBUG
6064 vm_page_lock_queues();
6065 if (vm_page_local_q
) {
6066 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6068 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6069 VPL_LOCK(&lq
->vpl_lock
);
6072 #endif /* MACH_ASSERT || DEBUG */
6074 clock_get_uptime(&start
);
6076 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
6077 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
6079 next
= (vm_page_t
) m
->pageq
.next
;
6080 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6083 count_discard_purgeable
++;
6085 count_discard_inactive
++;
6086 hibernate_discard_page(m
);
6091 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
6093 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
6094 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
6096 next
= (vm_page_t
) m
->pageq
.next
;
6097 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6099 count_discard_speculative
++;
6100 hibernate_discard_page(m
);
6106 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
6107 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
6109 next
= (vm_page_t
) m
->pageq
.next
;
6110 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6113 count_discard_purgeable
++;
6115 count_discard_inactive
++;
6116 hibernate_discard_page(m
);
6121 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
6122 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
6124 next
= (vm_page_t
) m
->pageq
.next
;
6125 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6128 count_discard_purgeable
++;
6130 count_discard_active
++;
6131 hibernate_discard_page(m
);
6136 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
6137 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
6139 next
= (vm_page_t
) m
->pageq
.next
;
6140 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6143 count_discard_purgeable
++;
6145 count_discard_cleaned
++;
6146 hibernate_discard_page(m
);
6151 #if MACH_ASSERT || DEBUG
6152 if (vm_page_local_q
) {
6153 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6155 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6156 VPL_UNLOCK(&lq
->vpl_lock
);
6159 vm_page_unlock_queues();
6160 #endif /* MACH_ASSERT || DEBUG */
6162 clock_get_uptime(&end
);
6163 absolutetime_to_nanoseconds(end
- start
, &nsec
);
6164 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6166 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
6169 boolean_t hibernate_paddr_map_inited
= FALSE
;
6170 boolean_t hibernate_rebuild_needed
= FALSE
;
6171 unsigned int hibernate_teardown_last_valid_compact_indx
= -1;
6172 vm_page_t hibernate_rebuild_hash_list
= NULL
;
6174 unsigned int hibernate_teardown_found_tabled_pages
= 0;
6175 unsigned int hibernate_teardown_found_created_pages
= 0;
6176 unsigned int hibernate_teardown_found_free_pages
= 0;
6177 unsigned int hibernate_teardown_vm_page_free_count
;
6180 struct ppnum_mapping
{
6181 struct ppnum_mapping
*ppnm_next
;
6182 ppnum_t ppnm_base_paddr
;
6183 unsigned int ppnm_sindx
;
6184 unsigned int ppnm_eindx
;
6187 struct ppnum_mapping
*ppnm_head
;
6188 struct ppnum_mapping
*ppnm_last_found
= NULL
;
6192 hibernate_create_paddr_map()
6195 ppnum_t next_ppnum_in_run
= 0;
6196 struct ppnum_mapping
*ppnm
= NULL
;
6198 if (hibernate_paddr_map_inited
== FALSE
) {
6200 for (i
= 0; i
< vm_pages_count
; i
++) {
6203 ppnm
->ppnm_eindx
= i
;
6205 if (ppnm
== NULL
|| vm_pages
[i
].phys_page
!= next_ppnum_in_run
) {
6207 ppnm
= kalloc(sizeof(struct ppnum_mapping
));
6209 ppnm
->ppnm_next
= ppnm_head
;
6212 ppnm
->ppnm_sindx
= i
;
6213 ppnm
->ppnm_base_paddr
= vm_pages
[i
].phys_page
;
6215 next_ppnum_in_run
= vm_pages
[i
].phys_page
+ 1;
6219 hibernate_paddr_map_inited
= TRUE
;
6224 hibernate_lookup_paddr(unsigned int indx
)
6226 struct ppnum_mapping
*ppnm
= NULL
;
6228 ppnm
= ppnm_last_found
;
6231 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
)
6234 for (ppnm
= ppnm_head
; ppnm
; ppnm
= ppnm
->ppnm_next
) {
6236 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
) {
6237 ppnm_last_found
= ppnm
;
6242 panic("hibernate_lookup_paddr of %d failed\n", indx
);
6244 return (ppnm
->ppnm_base_paddr
+ (indx
- ppnm
->ppnm_sindx
));
6249 hibernate_mark_as_unneeded(addr64_t saddr
, addr64_t eaddr
, hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6251 addr64_t saddr_aligned
;
6252 addr64_t eaddr_aligned
;
6255 unsigned int mark_as_unneeded_pages
= 0;
6257 saddr_aligned
= (saddr
+ PAGE_MASK_64
) & ~PAGE_MASK_64
;
6258 eaddr_aligned
= eaddr
& ~PAGE_MASK_64
;
6260 for (addr
= saddr_aligned
; addr
< eaddr_aligned
; addr
+= PAGE_SIZE_64
) {
6262 paddr
= pmap_find_phys(kernel_pmap
, addr
);
6266 hibernate_page_bitset(page_list
, TRUE
, paddr
);
6267 hibernate_page_bitset(page_list_wired
, TRUE
, paddr
);
6269 mark_as_unneeded_pages
++;
6271 return (mark_as_unneeded_pages
);
6276 hibernate_hash_insert_page(vm_page_t mem
)
6278 vm_page_bucket_t
*bucket
;
6281 assert(mem
->hashed
);
6282 assert(mem
->object
);
6283 assert(mem
->offset
!= (vm_object_offset_t
) -1);
6286 * Insert it into the object_object/offset hash table
6288 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
6289 bucket
= &vm_page_buckets
[hash_id
];
6291 mem
->next_m
= bucket
->page_list
;
6292 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
6297 hibernate_free_range(int sindx
, int eindx
)
6302 while (sindx
< eindx
) {
6303 mem
= &vm_pages
[sindx
];
6305 vm_page_init(mem
, hibernate_lookup_paddr(sindx
), FALSE
);
6307 mem
->lopage
= FALSE
;
6310 color
= mem
->phys_page
& vm_color_mask
;
6311 queue_enter_first(&vm_page_queue_free
[color
],
6315 vm_page_free_count
++;
6322 extern void hibernate_rebuild_pmap_structs(void);
6325 hibernate_rebuild_vm_structs(void)
6327 int cindx
, sindx
, eindx
;
6328 vm_page_t mem
, tmem
, mem_next
;
6329 AbsoluteTime startTime
, endTime
;
6332 if (hibernate_rebuild_needed
== FALSE
)
6335 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
6336 HIBLOG("hibernate_rebuild started\n");
6338 clock_get_uptime(&startTime
);
6340 hibernate_rebuild_pmap_structs();
6342 bzero(&vm_page_buckets
[0], vm_page_bucket_count
* sizeof(vm_page_bucket_t
));
6343 eindx
= vm_pages_count
;
6345 for (cindx
= hibernate_teardown_last_valid_compact_indx
; cindx
>= 0; cindx
--) {
6347 mem
= &vm_pages
[cindx
];
6349 * hibernate_teardown_vm_structs leaves the location where
6350 * this vm_page_t must be located in "next".
6352 tmem
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6353 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
6355 sindx
= (int)(tmem
- &vm_pages
[0]);
6359 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6360 * so move it back to its real location
6366 hibernate_hash_insert_page(mem
);
6368 * the 'hole' between this vm_page_t and the previous
6369 * vm_page_t we moved needs to be initialized as
6370 * a range of free vm_page_t's
6372 hibernate_free_range(sindx
+ 1, eindx
);
6377 hibernate_free_range(0, sindx
);
6379 assert(vm_page_free_count
== hibernate_teardown_vm_page_free_count
);
6382 * process the list of vm_page_t's that were entered in the hash,
6383 * but were not located in the vm_pages arrary... these are
6384 * vm_page_t's that were created on the fly (i.e. fictitious)
6386 for (mem
= hibernate_rebuild_hash_list
; mem
; mem
= mem_next
) {
6387 mem_next
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6389 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
6390 hibernate_hash_insert_page(mem
);
6392 hibernate_rebuild_hash_list
= NULL
;
6394 clock_get_uptime(&endTime
);
6395 SUB_ABSOLUTETIME(&endTime
, &startTime
);
6396 absolutetime_to_nanoseconds(endTime
, &nsec
);
6398 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec
/ 1000000ULL);
6400 hibernate_rebuild_needed
= FALSE
;
6402 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
6406 extern void hibernate_teardown_pmap_structs(addr64_t
*, addr64_t
*);
6409 hibernate_teardown_vm_structs(hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6412 unsigned int compact_target_indx
;
6413 vm_page_t mem
, mem_next
;
6414 vm_page_bucket_t
*bucket
;
6415 unsigned int mark_as_unneeded_pages
= 0;
6416 unsigned int unneeded_vm_page_bucket_pages
= 0;
6417 unsigned int unneeded_vm_pages_pages
= 0;
6418 unsigned int unneeded_pmap_pages
= 0;
6419 addr64_t start_of_unneeded
= 0;
6420 addr64_t end_of_unneeded
= 0;
6423 if (hibernate_should_abort())
6426 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6427 vm_page_wire_count
, vm_page_free_count
, vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
,
6428 vm_page_cleaned_count
, compressor_object
->resident_page_count
);
6430 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6432 bucket
= &vm_page_buckets
[i
];
6434 for (mem
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); mem
!= VM_PAGE_NULL
; mem
= mem_next
) {
6435 assert(mem
->hashed
);
6437 mem_next
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6439 if (mem
< &vm_pages
[0] || mem
>= &vm_pages
[vm_pages_count
]) {
6440 mem
->next_m
= VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list
);
6441 hibernate_rebuild_hash_list
= mem
;
6445 unneeded_vm_page_bucket_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_page_buckets
[0], (addr64_t
)&vm_page_buckets
[vm_page_bucket_count
], page_list
, page_list_wired
);
6446 mark_as_unneeded_pages
+= unneeded_vm_page_bucket_pages
;
6448 hibernate_teardown_vm_page_free_count
= vm_page_free_count
;
6450 compact_target_indx
= 0;
6452 for (i
= 0; i
< vm_pages_count
; i
++) {
6460 assert(!mem
->lopage
);
6462 color
= mem
->phys_page
& vm_color_mask
;
6464 queue_remove(&vm_page_queue_free
[color
],
6468 mem
->pageq
.next
= NULL
;
6469 mem
->pageq
.prev
= NULL
;
6471 vm_page_free_count
--;
6473 hibernate_teardown_found_free_pages
++;
6475 if ( !vm_pages
[compact_target_indx
].free
)
6476 compact_target_indx
= i
;
6479 * record this vm_page_t's original location
6480 * we need this even if it doesn't get moved
6481 * as an indicator to the rebuild function that
6482 * we don't have to move it
6484 mem
->next_m
= VM_PAGE_PACK_PTR(mem
);
6486 if (vm_pages
[compact_target_indx
].free
) {
6488 * we've got a hole to fill, so
6489 * move this vm_page_t to it's new home
6491 vm_pages
[compact_target_indx
] = *mem
;
6494 hibernate_teardown_last_valid_compact_indx
= compact_target_indx
;
6495 compact_target_indx
++;
6497 hibernate_teardown_last_valid_compact_indx
= i
;
6500 unneeded_vm_pages_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_pages
[hibernate_teardown_last_valid_compact_indx
+1],
6501 (addr64_t
)&vm_pages
[vm_pages_count
-1], page_list
, page_list_wired
);
6502 mark_as_unneeded_pages
+= unneeded_vm_pages_pages
;
6504 hibernate_teardown_pmap_structs(&start_of_unneeded
, &end_of_unneeded
);
6506 if (start_of_unneeded
) {
6507 unneeded_pmap_pages
= hibernate_mark_as_unneeded(start_of_unneeded
, end_of_unneeded
, page_list
, page_list_wired
);
6508 mark_as_unneeded_pages
+= unneeded_pmap_pages
;
6510 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages
, unneeded_vm_pages_pages
, unneeded_pmap_pages
);
6512 hibernate_rebuild_needed
= TRUE
;
6514 return (mark_as_unneeded_pages
);
6518 #endif /* HIBERNATION */
6520 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6522 #include <mach_vm_debug.h>
6525 #include <mach_debug/hash_info.h>
6526 #include <vm/vm_debug.h>
6529 * Routine: vm_page_info
6531 * Return information about the global VP table.
6532 * Fills the buffer with as much information as possible
6533 * and returns the desired size of the buffer.
6535 * Nothing locked. The caller should provide
6536 * possibly-pageable memory.
6541 hash_info_bucket_t
*info
,
6545 lck_spin_t
*bucket_lock
;
6547 if (vm_page_bucket_count
< count
)
6548 count
= vm_page_bucket_count
;
6550 for (i
= 0; i
< count
; i
++) {
6551 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
6552 unsigned int bucket_count
= 0;
6555 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6556 lck_spin_lock(bucket_lock
);
6558 for (m
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); m
!= VM_PAGE_NULL
; m
= VM_PAGE_UNPACK_PTR(m
->next_m
))
6561 lck_spin_unlock(bucket_lock
);
6563 /* don't touch pageable memory while holding locks */
6564 info
[i
].hib_count
= bucket_count
;
6567 return vm_page_bucket_count
;
6569 #endif /* MACH_VM_DEBUG */
6571 #if VM_PAGE_BUCKETS_CHECK
6573 vm_page_buckets_check(void)
6577 unsigned int p_hash
;
6578 vm_page_bucket_t
*bucket
;
6579 lck_spin_t
*bucket_lock
;
6581 if (!vm_page_buckets_check_ready
) {
6586 if (hibernate_rebuild_needed
||
6587 hibernate_rebuild_hash_list
) {
6588 panic("BUCKET_CHECK: hibernation in progress: "
6589 "rebuild_needed=%d rebuild_hash_list=%p\n",
6590 hibernate_rebuild_needed
,
6591 hibernate_rebuild_hash_list
);
6593 #endif /* HIBERNATION */
6595 #if VM_PAGE_FAKE_BUCKETS
6597 for (cp
= (char *) vm_page_fake_buckets_start
;
6598 cp
< (char *) vm_page_fake_buckets_end
;
6601 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6602 "[0x%llx:0x%llx]\n",
6604 (uint64_t) vm_page_fake_buckets_start
,
6605 (uint64_t) vm_page_fake_buckets_end
);
6608 #endif /* VM_PAGE_FAKE_BUCKETS */
6610 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6611 bucket
= &vm_page_buckets
[i
];
6612 if (!bucket
->page_list
) {
6616 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6617 lck_spin_lock(bucket_lock
);
6618 p
= VM_PAGE_UNPACK_PTR(bucket
->page_list
);
6619 while (p
!= VM_PAGE_NULL
) {
6621 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6622 "hash %d in bucket %d at %p "
6624 p
, p
->object
, p
->offset
,
6627 p_hash
= vm_page_hash(p
->object
, p
->offset
);
6629 panic("BUCKET_CHECK: corruption in bucket %d "
6630 "at %p: page %p object %p offset 0x%llx "
6632 i
, bucket
, p
, p
->object
, p
->offset
,
6635 p
= VM_PAGE_UNPACK_PTR(p
->next_m
);
6637 lck_spin_unlock(bucket_lock
);
6640 // printf("BUCKET_CHECK: checked buckets\n");
6642 #endif /* VM_PAGE_BUCKETS_CHECK */