2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
79 #include <kern/ledger.h>
81 #include <vm/vm_init.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
86 #include <kern/misc_protos.h>
87 #include <zone_debug.h>
89 #include <pexpert/pexpert.h>
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94 #include <vm/vm_compressor.h>
96 #if CONFIG_PHANTOM_CACHE
97 #include <vm/vm_phantom_cache.h>
100 #include <IOKit/IOHibernatePrivate.h>
102 #include <sys/kdebug.h>
104 boolean_t hibernate_cleaning_in_progress
= FALSE
;
105 boolean_t vm_page_free_verify
= TRUE
;
107 uint32_t vm_lopage_free_count
= 0;
108 uint32_t vm_lopage_free_limit
= 0;
109 uint32_t vm_lopage_lowater
= 0;
110 boolean_t vm_lopage_refill
= FALSE
;
111 boolean_t vm_lopage_needed
= FALSE
;
113 lck_mtx_ext_t vm_page_queue_lock_ext
;
114 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
115 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
117 int speculative_age_index
= 0;
118 int speculative_steal_index
= 0;
119 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
122 __private_extern__
void vm_page_init_lck_grp(void);
124 static void vm_page_free_prepare(vm_page_t page
);
125 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
131 * Associated with page of user-allocatable memory is a
136 * These variables record the values returned by vm_page_bootstrap,
137 * for debugging purposes. The implementation of pmap_steal_memory
138 * and pmap_startup here also uses them internally.
141 vm_offset_t virtual_space_start
;
142 vm_offset_t virtual_space_end
;
143 uint32_t vm_page_pages
;
146 * The vm_page_lookup() routine, which provides for fast
147 * (virtual memory object, offset) to page lookup, employs
148 * the following hash table. The vm_page_{insert,remove}
149 * routines install and remove associations in the table.
150 * [This table is often called the virtual-to-physical,
154 vm_page_packed_t page_list
;
155 #if MACH_PAGE_HASH_STATS
156 int cur_count
; /* current count */
157 int hi_count
; /* high water mark */
158 #endif /* MACH_PAGE_HASH_STATS */
162 #define BUCKETS_PER_LOCK 16
164 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
165 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
166 unsigned int vm_page_hash_mask
; /* Mask for hash function */
167 unsigned int vm_page_hash_shift
; /* Shift for hash function */
168 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
169 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
171 lck_spin_t
*vm_page_bucket_locks
;
173 #if VM_PAGE_BUCKETS_CHECK
174 boolean_t vm_page_buckets_check_ready
= FALSE
;
175 #if VM_PAGE_FAKE_BUCKETS
176 vm_page_bucket_t
*vm_page_fake_buckets
; /* decoy buckets */
177 vm_map_offset_t vm_page_fake_buckets_start
, vm_page_fake_buckets_end
;
178 #endif /* VM_PAGE_FAKE_BUCKETS */
179 #endif /* VM_PAGE_BUCKETS_CHECK */
181 #if MACH_PAGE_HASH_STATS
182 /* This routine is only for debug. It is intended to be called by
183 * hand by a developer using a kernel debugger. This routine prints
184 * out vm_page_hash table statistics to the kernel debug console.
194 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
195 if (vm_page_buckets
[i
].hi_count
) {
197 highsum
+= vm_page_buckets
[i
].hi_count
;
198 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
199 maxdepth
= vm_page_buckets
[i
].hi_count
;
202 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
203 printf("Number used buckets: %d = %d%%\n",
204 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
205 printf("Number unused buckets: %d = %d%%\n",
206 vm_page_bucket_count
- numbuckets
,
207 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
208 printf("Sum of bucket max depth: %d\n", highsum
);
209 printf("Average bucket depth: %d.%2d\n",
210 highsum
/vm_page_bucket_count
,
211 highsum%vm_page_bucket_count
);
212 printf("Maximum bucket depth: %d\n", maxdepth
);
214 #endif /* MACH_PAGE_HASH_STATS */
217 * The virtual page size is currently implemented as a runtime
218 * variable, but is constant once initialized using vm_set_page_size.
219 * This initialization must be done in the machine-dependent
220 * bootstrap sequence, before calling other machine-independent
223 * All references to the virtual page size outside this
224 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
227 vm_size_t page_size
= PAGE_SIZE
;
228 vm_size_t page_mask
= PAGE_MASK
;
229 int page_shift
= PAGE_SHIFT
;
232 * Resident page structures are initialized from
233 * a template (see vm_page_alloc).
235 * When adding a new field to the virtual memory
236 * object structure, be sure to add initialization
237 * (see vm_page_bootstrap).
239 struct vm_page vm_page_template
;
241 vm_page_t vm_pages
= VM_PAGE_NULL
;
242 unsigned int vm_pages_count
= 0;
243 ppnum_t vm_page_lowest
= 0;
246 * Resident pages that represent real memory
247 * are allocated from a set of free lists,
250 unsigned int vm_colors
;
251 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
252 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
253 unsigned int vm_free_magazine_refill_limit
= 0;
254 queue_head_t vm_page_queue_free
[MAX_COLORS
];
255 unsigned int vm_page_free_wanted
;
256 unsigned int vm_page_free_wanted_privileged
;
257 unsigned int vm_page_free_count
;
258 unsigned int vm_page_fictitious_count
;
261 * Occasionally, the virtual memory system uses
262 * resident page structures that do not refer to
263 * real pages, for example to leave a page with
264 * important state information in the VP table.
266 * These page structures are allocated the way
267 * most other kernel structures are.
270 vm_locks_array_t vm_page_locks
;
271 decl_lck_mtx_data(,vm_page_alloc_lock
)
272 lck_mtx_ext_t vm_page_alloc_lock_ext
;
274 unsigned int io_throttle_zero_fill
;
276 unsigned int vm_page_local_q_count
= 0;
277 unsigned int vm_page_local_q_soft_limit
= 250;
278 unsigned int vm_page_local_q_hard_limit
= 500;
279 struct vplq
*vm_page_local_q
= NULL
;
281 /* N.B. Guard and fictitious pages must not
282 * be assigned a zero phys_page value.
285 * Fictitious pages don't have a physical address,
286 * but we must initialize phys_page to something.
287 * For debugging, this should be a strange value
288 * that the pmap module can recognize in assertions.
290 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
293 * Guard pages are not accessible so they don't
294 * need a physical address, but we need to enter
296 * Let's make it recognizable and make sure that
297 * we don't use a real physical page with that
300 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
303 * Resident page structures are also chained on
304 * queues that are used by the page replacement
305 * system (pageout daemon). These queues are
306 * defined here, but are shared by the pageout
307 * module. The inactive queue is broken into
308 * file backed and anonymous for convenience as the
309 * pageout daemon often assignes a higher
310 * importance to anonymous pages (less likely to pick)
312 queue_head_t vm_page_queue_active
;
313 queue_head_t vm_page_queue_inactive
;
314 queue_head_t vm_page_queue_anonymous
; /* inactive memory queue for anonymous pages */
315 queue_head_t vm_page_queue_throttled
;
317 unsigned int vm_page_active_count
;
318 unsigned int vm_page_inactive_count
;
319 unsigned int vm_page_anonymous_count
;
320 unsigned int vm_page_throttled_count
;
321 unsigned int vm_page_speculative_count
;
322 unsigned int vm_page_wire_count
;
323 unsigned int vm_page_wire_count_initial
;
324 unsigned int vm_page_gobble_count
= 0;
326 #define VM_PAGE_WIRE_COUNT_WARNING 0
327 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
329 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
330 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
331 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
333 unsigned int vm_page_xpmapped_external_count
= 0;
334 unsigned int vm_page_external_count
= 0;
335 unsigned int vm_page_internal_count
= 0;
336 unsigned int vm_page_pageable_external_count
= 0;
337 unsigned int vm_page_pageable_internal_count
= 0;
339 #if DEVELOPMENT || DEBUG
340 unsigned int vm_page_speculative_recreated
= 0;
341 unsigned int vm_page_speculative_created
= 0;
342 unsigned int vm_page_speculative_used
= 0;
345 queue_head_t vm_page_queue_cleaned
;
347 unsigned int vm_page_cleaned_count
= 0;
348 unsigned int vm_pageout_enqueued_cleaned
= 0;
350 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
351 ppnum_t max_valid_low_ppnum
= 0xffffffff;
355 * Several page replacement parameters are also
356 * shared with this module, so that page allocation
357 * (done here in vm_page_alloc) can trigger the
360 unsigned int vm_page_free_target
= 0;
361 unsigned int vm_page_free_min
= 0;
362 unsigned int vm_page_throttle_limit
= 0;
363 unsigned int vm_page_inactive_target
= 0;
364 unsigned int vm_page_anonymous_min
= 0;
365 unsigned int vm_page_inactive_min
= 0;
366 unsigned int vm_page_free_reserved
= 0;
367 unsigned int vm_page_throttle_count
= 0;
371 * The VM system has a couple of heuristics for deciding
372 * that pages are "uninteresting" and should be placed
373 * on the inactive queue as likely candidates for replacement.
374 * These variables let the heuristics be controlled at run-time
375 * to make experimentation easier.
378 boolean_t vm_page_deactivate_hint
= TRUE
;
380 struct vm_page_stats_reusable vm_page_stats_reusable
;
385 * Sets the page size, perhaps based upon the memory
386 * size. Must be called before any use of page-size
387 * dependent functions.
389 * Sets page_shift and page_mask from page_size.
392 vm_set_page_size(void)
394 page_size
= PAGE_SIZE
;
395 page_mask
= PAGE_MASK
;
396 page_shift
= PAGE_SHIFT
;
398 if ((page_mask
& page_size
) != 0)
399 panic("vm_set_page_size: page size not a power of two");
401 for (page_shift
= 0; ; page_shift
++)
402 if ((1U << page_shift
) == page_size
)
406 #define COLOR_GROUPS_TO_STEAL 4
409 /* Called once during statup, once the cache geometry is known.
412 vm_page_set_colors( void )
414 unsigned int n
, override
;
416 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
418 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
419 n
= vm_cache_geometry_colors
;
420 else n
= DEFAULT_COLORS
; /* use default if all else fails */
424 if ( n
> MAX_COLORS
)
427 /* the count must be a power of 2 */
428 if ( ( n
& (n
- 1)) != 0 )
429 panic("vm_page_set_colors");
432 vm_color_mask
= n
- 1;
434 vm_free_magazine_refill_limit
= vm_colors
* COLOR_GROUPS_TO_STEAL
;
438 lck_grp_t vm_page_lck_grp_free
;
439 lck_grp_t vm_page_lck_grp_queue
;
440 lck_grp_t vm_page_lck_grp_local
;
441 lck_grp_t vm_page_lck_grp_purge
;
442 lck_grp_t vm_page_lck_grp_alloc
;
443 lck_grp_t vm_page_lck_grp_bucket
;
444 lck_grp_attr_t vm_page_lck_grp_attr
;
445 lck_attr_t vm_page_lck_attr
;
448 __private_extern__
void
449 vm_page_init_lck_grp(void)
452 * initialze the vm_page lock world
454 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
455 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
456 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
457 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
458 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
459 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
460 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
461 lck_attr_setdefault(&vm_page_lck_attr
);
462 lck_mtx_init_ext(&vm_page_alloc_lock
, &vm_page_alloc_lock_ext
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
464 vm_compressor_init_locks();
468 vm_page_init_local_q()
470 unsigned int num_cpus
;
472 struct vplq
*t_local_q
;
474 num_cpus
= ml_get_max_cpus();
477 * no point in this for a uni-processor system
480 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
482 for (i
= 0; i
< num_cpus
; i
++) {
485 lq
= &t_local_q
[i
].vpl_un
.vpl
;
486 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
487 queue_init(&lq
->vpl_queue
);
489 lq
->vpl_internal_count
= 0;
490 lq
->vpl_external_count
= 0;
492 vm_page_local_q_count
= num_cpus
;
494 vm_page_local_q
= (struct vplq
*)t_local_q
;
502 * Initializes the resident memory module.
504 * Allocates memory for the page cells, and
505 * for the object/offset-to-page hash table headers.
506 * Each page cell is initialized and placed on the free list.
507 * Returns the range of available kernel virtual memory.
515 register vm_page_t m
;
522 * Initialize the vm_page template.
525 m
= &vm_page_template
;
526 bzero(m
, sizeof (*m
));
528 m
->pageq
.next
= NULL
;
529 m
->pageq
.prev
= NULL
;
530 m
->listq
.next
= NULL
;
531 m
->listq
.prev
= NULL
;
532 m
->next_m
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
534 m
->object
= VM_OBJECT_NULL
; /* reset later */
535 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
541 m
->pageout_queue
= FALSE
;
542 m
->speculative
= FALSE
;
545 m
->reference
= FALSE
;
548 m
->throttled
= FALSE
;
549 m
->__unused_pageq_bits
= 0;
551 m
->phys_page
= 0; /* reset later */
557 m
->fictitious
= FALSE
;
566 m
->clustered
= FALSE
;
567 m
->overwriting
= FALSE
;
570 m
->encrypted
= FALSE
;
571 m
->encrypted_cleaning
= FALSE
;
572 m
->cs_validated
= FALSE
;
573 m
->cs_tainted
= FALSE
;
579 m
->compressor
= FALSE
;
580 m
->written_by_kernel
= FALSE
;
581 m
->__unused_object_bits
= 0;
584 * Initialize the page queues.
586 vm_page_init_lck_grp();
588 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
589 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
590 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
592 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
595 purgeable_queues
[i
].token_q_head
= 0;
596 purgeable_queues
[i
].token_q_tail
= 0;
597 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
598 queue_init(&purgeable_queues
[i
].objq
[group
]);
600 purgeable_queues
[i
].type
= i
;
601 purgeable_queues
[i
].new_pages
= 0;
603 purgeable_queues
[i
].debug_count_tokens
= 0;
604 purgeable_queues
[i
].debug_count_objects
= 0;
607 purgeable_nonvolatile_count
= 0;
608 queue_init(&purgeable_nonvolatile_queue
);
610 for (i
= 0; i
< MAX_COLORS
; i
++ )
611 queue_init(&vm_page_queue_free
[i
]);
613 queue_init(&vm_lopage_queue_free
);
614 queue_init(&vm_page_queue_active
);
615 queue_init(&vm_page_queue_inactive
);
616 queue_init(&vm_page_queue_cleaned
);
617 queue_init(&vm_page_queue_throttled
);
618 queue_init(&vm_page_queue_anonymous
);
620 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
621 queue_init(&vm_page_queue_speculative
[i
].age_q
);
623 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
624 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
626 vm_page_free_wanted
= 0;
627 vm_page_free_wanted_privileged
= 0;
629 vm_page_set_colors();
633 * Steal memory for the map and zone subsystems.
635 kernel_debug_string("zone_steal_memory");
637 kernel_debug_string("vm_map_steal_memory");
638 vm_map_steal_memory();
641 * Allocate (and initialize) the virtual-to-physical
642 * table hash buckets.
644 * The number of buckets should be a power of two to
645 * get a good hash function. The following computation
646 * chooses the first power of two that is greater
647 * than the number of physical pages in the system.
650 if (vm_page_bucket_count
== 0) {
651 unsigned int npages
= pmap_free_pages();
653 vm_page_bucket_count
= 1;
654 while (vm_page_bucket_count
< npages
)
655 vm_page_bucket_count
<<= 1;
657 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
659 vm_page_hash_mask
= vm_page_bucket_count
- 1;
662 * Calculate object shift value for hashing algorithm:
663 * O = log2(sizeof(struct vm_object))
664 * B = log2(vm_page_bucket_count)
665 * hash shifts the object left by
668 size
= vm_page_bucket_count
;
669 for (log1
= 0; size
> 1; log1
++)
671 size
= sizeof(struct vm_object
);
672 for (log2
= 0; size
> 1; log2
++)
674 vm_page_hash_shift
= log1
/2 - log2
+ 1;
676 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
677 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
678 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
680 if (vm_page_hash_mask
& vm_page_bucket_count
)
681 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
683 #if VM_PAGE_BUCKETS_CHECK
684 #if VM_PAGE_FAKE_BUCKETS
686 * Allocate a decoy set of page buckets, to detect
687 * any stomping there.
689 vm_page_fake_buckets
= (vm_page_bucket_t
*)
690 pmap_steal_memory(vm_page_bucket_count
*
691 sizeof(vm_page_bucket_t
));
692 vm_page_fake_buckets_start
= (vm_map_offset_t
) vm_page_fake_buckets
;
693 vm_page_fake_buckets_end
=
694 vm_map_round_page((vm_page_fake_buckets_start
+
695 (vm_page_bucket_count
*
696 sizeof (vm_page_bucket_t
))),
699 for (cp
= (char *)vm_page_fake_buckets_start
;
700 cp
< (char *)vm_page_fake_buckets_end
;
704 #endif /* VM_PAGE_FAKE_BUCKETS */
705 #endif /* VM_PAGE_BUCKETS_CHECK */
707 kernel_debug_string("vm_page_buckets");
708 vm_page_buckets
= (vm_page_bucket_t
*)
709 pmap_steal_memory(vm_page_bucket_count
*
710 sizeof(vm_page_bucket_t
));
712 kernel_debug_string("vm_page_bucket_locks");
713 vm_page_bucket_locks
= (lck_spin_t
*)
714 pmap_steal_memory(vm_page_bucket_lock_count
*
717 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
718 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
720 bucket
->page_list
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
721 #if MACH_PAGE_HASH_STATS
722 bucket
->cur_count
= 0;
723 bucket
->hi_count
= 0;
724 #endif /* MACH_PAGE_HASH_STATS */
727 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
728 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
730 #if VM_PAGE_BUCKETS_CHECK
731 vm_page_buckets_check_ready
= TRUE
;
732 #endif /* VM_PAGE_BUCKETS_CHECK */
735 * Machine-dependent code allocates the resident page table.
736 * It uses vm_page_init to initialize the page frames.
737 * The code also returns to us the virtual space available
738 * to the kernel. We don't trust the pmap module
739 * to get the alignment right.
742 kernel_debug_string("pmap_startup");
743 pmap_startup(&virtual_space_start
, &virtual_space_end
);
744 virtual_space_start
= round_page(virtual_space_start
);
745 virtual_space_end
= trunc_page(virtual_space_end
);
747 *startp
= virtual_space_start
;
748 *endp
= virtual_space_end
;
751 * Compute the initial "wire" count.
752 * Up until now, the pages which have been set aside are not under
753 * the VM system's control, so although they aren't explicitly
754 * wired, they nonetheless can't be moved. At this moment,
755 * all VM managed pages are "free", courtesy of pmap_startup.
757 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
758 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
759 vm_page_wire_count_initial
= vm_page_wire_count
;
761 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
762 vm_page_free_count
, vm_page_wire_count
);
764 kernel_debug_string("vm_page_bootstrap complete");
765 simple_lock_init(&vm_paging_lock
, 0);
768 #ifndef MACHINE_PAGES
770 * We implement pmap_steal_memory and pmap_startup with the help
771 * of two simpler functions, pmap_virtual_space and pmap_next_page.
778 vm_offset_t addr
, vaddr
;
782 * We round the size to a round multiple.
785 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
788 * If this is the first call to pmap_steal_memory,
789 * we have to initialize ourself.
792 if (virtual_space_start
== virtual_space_end
) {
793 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
796 * The initial values must be aligned properly, and
797 * we don't trust the pmap module to do it right.
800 virtual_space_start
= round_page(virtual_space_start
);
801 virtual_space_end
= trunc_page(virtual_space_end
);
805 * Allocate virtual memory for this request.
808 addr
= virtual_space_start
;
809 virtual_space_start
+= size
;
811 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
814 * Allocate and map physical pages to back new virtual pages.
817 for (vaddr
= round_page(addr
);
819 vaddr
+= PAGE_SIZE
) {
821 if (!pmap_next_page_hi(&phys_page
))
822 panic("pmap_steal_memory");
825 * XXX Logically, these mappings should be wired,
826 * but some pmap modules barf if they are.
828 #if defined(__LP64__)
829 pmap_pre_expand(kernel_pmap
, vaddr
);
832 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
833 VM_PROT_READ
|VM_PROT_WRITE
, VM_PROT_NONE
,
834 VM_WIMG_USE_DEFAULT
, FALSE
);
836 * Account for newly stolen memory
838 vm_page_wire_count
++;
842 return (void *) addr
;
845 void vm_page_release_startup(vm_page_t mem
);
851 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
856 #if defined(__LP64__)
858 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
860 assert(sizeof(struct vm_page
) == 64);
863 * make sure we are aligned on a 64 byte boundary
864 * for VM_PAGE_PACK_PTR (it clips off the low-order
865 * 6 bits of the pointer)
867 if (virtual_space_start
!= virtual_space_end
)
868 virtual_space_start
= round_page(virtual_space_start
);
872 * We calculate how many page frames we will have
873 * and then allocate the page structures in one chunk.
876 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
877 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
878 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
880 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
883 * Initialize the page frames.
885 kernel_debug_string("Initialize the page frames");
886 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
887 if (!pmap_next_page(&phys_page
))
889 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
890 vm_page_lowest
= phys_page
;
892 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
896 vm_pages_count
= pages_initialized
;
898 #if defined(__LP64__)
900 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[0])) != &vm_pages
[0])
901 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages
[0]);
903 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[vm_pages_count
-1])) != &vm_pages
[vm_pages_count
-1])
904 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages
[vm_pages_count
-1]);
906 kernel_debug_string("page fill/release");
908 * Check if we want to initialize pages to a known value
910 fill
= 0; /* Assume no fill */
911 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
913 /* This slows down booting the DEBUG kernel, particularly on
914 * large memory systems, but is worthwhile in deterministically
915 * trapping uninitialized memory usage.
919 fillval
= 0xDEB8F177;
923 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval
);
924 // -debug code remove
925 if (2 == vm_himemory_mode
) {
926 // free low -> high so high is preferred
927 for (i
= 1; i
<= pages_initialized
; i
++) {
928 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
929 vm_page_release_startup(&vm_pages
[i
- 1]);
933 // debug code remove-
936 * Release pages in reverse order so that physical pages
937 * initially get allocated in ascending addresses. This keeps
938 * the devices (which must address physical memory) happy if
939 * they require several consecutive pages.
941 for (i
= pages_initialized
; i
> 0; i
--) {
942 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
943 vm_page_release_startup(&vm_pages
[i
- 1]);
946 VM_CHECK_MEMORYSTATUS
;
950 vm_page_t xx
, xxo
, xxl
;
953 j
= 0; /* (BRINGUP) */
956 for( i
= 0; i
< vm_colors
; i
++ ) {
957 queue_iterate(&vm_page_queue_free
[i
],
960 pageq
) { /* BRINGUP */
962 if(j
> vm_page_free_count
) { /* (BRINGUP) */
963 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
966 l
= vm_page_free_count
- j
; /* (BRINGUP) */
967 k
= 0; /* (BRINGUP) */
969 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
971 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
973 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
974 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
975 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
983 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
984 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
991 * We have to re-align virtual_space_start,
992 * because pmap_steal_memory has been using it.
995 virtual_space_start
= round_page(virtual_space_start
);
997 *startp
= virtual_space_start
;
998 *endp
= virtual_space_end
;
1000 #endif /* MACHINE_PAGES */
1003 * Routine: vm_page_module_init
1005 * Second initialization pass, to be done after
1006 * the basic VM system is ready.
1009 vm_page_module_init(void)
1011 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
1012 0, PAGE_SIZE
, "vm pages");
1015 zone_debug_disable(vm_page_zone
);
1016 #endif /* ZONE_DEBUG */
1018 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
1019 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
1020 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
1021 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
1022 zone_change(vm_page_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
1024 * Adjust zone statistics to account for the real pages allocated
1025 * in vm_page_create(). [Q: is this really what we want?]
1027 vm_page_zone
->count
+= vm_page_pages
;
1028 vm_page_zone
->sum_count
+= vm_page_pages
;
1029 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
1033 * Routine: vm_page_create
1035 * After the VM system is up, machine-dependent code
1036 * may stumble across more physical memory. For example,
1037 * memory that it was reserving for a frame buffer.
1038 * vm_page_create turns this memory into available pages.
1049 for (phys_page
= start
;
1052 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
1054 vm_page_more_fictitious();
1056 m
->fictitious
= FALSE
;
1057 pmap_clear_noencrypt(phys_page
);
1067 * Distributes the object/offset key pair among hash buckets.
1069 * NOTE: The bucket count must be a power of 2
1071 #define vm_page_hash(object, offset) (\
1072 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1073 & vm_page_hash_mask)
1077 * vm_page_insert: [ internal use only ]
1079 * Inserts the given mem entry into the object/object-page
1080 * table and object list.
1082 * The object must be locked.
1088 vm_object_offset_t offset
)
1090 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
, FALSE
);
1094 vm_page_insert_internal(
1097 vm_object_offset_t offset
,
1098 boolean_t queues_lock_held
,
1099 boolean_t insert_in_hash
,
1100 boolean_t batch_pmap_op
)
1102 vm_page_bucket_t
*bucket
;
1103 lck_spin_t
*bucket_lock
;
1108 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1109 object
, offset
, mem
, 0,0);
1112 * we may not hold the page queue lock
1113 * so this check isn't safe to make
1118 assert(page_aligned(offset
));
1120 /* the vm_submap_object is only a placeholder for submaps */
1121 assert(object
!= vm_submap_object
);
1123 vm_object_lock_assert_exclusive(object
);
1125 lck_mtx_assert(&vm_page_queue_lock
,
1126 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1127 : LCK_MTX_ASSERT_NOTOWNED
);
1130 if (insert_in_hash
== TRUE
) {
1131 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1132 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1133 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1134 "already in (obj=%p,off=0x%llx)",
1135 mem
, object
, offset
, mem
->object
, mem
->offset
);
1137 assert(!object
->internal
|| offset
< object
->vo_size
);
1139 /* only insert "pageout" pages into "pageout" objects,
1140 * and normal pages into normal objects */
1141 assert(object
->pageout
== mem
->pageout
);
1143 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1146 * Record the object/offset pair in this page
1149 mem
->object
= object
;
1150 mem
->offset
= offset
;
1153 * Insert it into the object_object/offset hash table
1155 hash_id
= vm_page_hash(object
, offset
);
1156 bucket
= &vm_page_buckets
[hash_id
];
1157 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1159 lck_spin_lock(bucket_lock
);
1161 mem
->next_m
= bucket
->page_list
;
1162 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1163 assert(mem
== VM_PAGE_UNPACK_PTR(bucket
->page_list
));
1165 #if MACH_PAGE_HASH_STATS
1166 if (++bucket
->cur_count
> bucket
->hi_count
)
1167 bucket
->hi_count
= bucket
->cur_count
;
1168 #endif /* MACH_PAGE_HASH_STATS */
1170 lck_spin_unlock(bucket_lock
);
1174 unsigned int cache_attr
;
1176 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1178 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1179 PMAP_SET_CACHE_ATTR(mem
, object
, cache_attr
, batch_pmap_op
);
1183 * Now link into the object's list of backed pages.
1185 VM_PAGE_INSERT(mem
, object
);
1189 * Show that the object has one more resident page.
1192 object
->resident_page_count
++;
1193 if (VM_PAGE_WIRED(mem
)) {
1194 object
->wired_page_count
++;
1196 assert(object
->resident_page_count
>= object
->wired_page_count
);
1198 if (object
->internal
) {
1199 OSAddAtomic(1, &vm_page_internal_count
);
1201 OSAddAtomic(1, &vm_page_external_count
);
1205 * It wouldn't make sense to insert a "reusable" page in
1206 * an object (the page would have been marked "reusable" only
1207 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1208 * in the object at that time).
1209 * But a page could be inserted in a "all_reusable" object, if
1210 * something faults it in (a vm_read() from another task or a
1211 * "use-after-free" issue in user space, for example). It can
1212 * also happen if we're relocating a page from that object to
1213 * a different physical page during a physically-contiguous
1216 assert(!mem
->reusable
);
1217 if (mem
->object
->all_reusable
) {
1218 OSAddAtomic(+1, &vm_page_stats_reusable
.reusable_count
);
1221 if (object
->purgable
== VM_PURGABLE_DENY
) {
1224 owner
= object
->vo_purgeable_owner
;
1227 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1228 VM_PAGE_WIRED(mem
))) {
1229 /* more non-volatile bytes */
1230 ledger_credit(owner
->ledger
,
1231 task_ledgers
.purgeable_nonvolatile
,
1233 /* more footprint */
1234 ledger_credit(owner
->ledger
,
1235 task_ledgers
.phys_footprint
,
1239 (object
->purgable
== VM_PURGABLE_VOLATILE
||
1240 object
->purgable
== VM_PURGABLE_EMPTY
)) {
1241 assert(! VM_PAGE_WIRED(mem
));
1242 /* more volatile bytes */
1243 ledger_credit(owner
->ledger
,
1244 task_ledgers
.purgeable_volatile
,
1248 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1249 if (VM_PAGE_WIRED(mem
)) {
1250 OSAddAtomic(+1, &vm_page_purgeable_wired_count
);
1252 OSAddAtomic(+1, &vm_page_purgeable_count
);
1254 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1257 * This page belongs to a purged VM object but hasn't
1258 * been purged (because it was "busy").
1259 * It's in the "throttled" queue and hence not
1260 * visible to vm_pageout_scan(). Move it to a pageable
1261 * queue, so that it can eventually be reclaimed, instead
1262 * of lingering in the "empty" object.
1264 if (queues_lock_held
== FALSE
)
1265 vm_page_lockspin_queues();
1266 vm_page_deactivate(mem
);
1267 if (queues_lock_held
== FALSE
)
1268 vm_page_unlock_queues();
1271 #if VM_OBJECT_TRACKING_OP_MODIFIED
1272 if (vm_object_tracking_inited
&&
1274 object
->resident_page_count
== 0 &&
1275 object
->pager
== NULL
&&
1276 object
->shadow
!= NULL
&&
1277 object
->shadow
->copy
== object
) {
1278 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
1281 numsaved
=OSBacktrace(bt
, VM_OBJECT_TRACKING_BTDEPTH
);
1282 btlog_add_entry(vm_object_tracking_btlog
,
1284 VM_OBJECT_TRACKING_OP_MODIFIED
,
1288 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1294 * Exactly like vm_page_insert, except that we first
1295 * remove any existing page at the given offset in object.
1297 * The object must be locked.
1301 register vm_page_t mem
,
1302 register vm_object_t object
,
1303 register vm_object_offset_t offset
)
1305 vm_page_bucket_t
*bucket
;
1306 vm_page_t found_m
= VM_PAGE_NULL
;
1307 lck_spin_t
*bucket_lock
;
1312 * we don't hold the page queue lock
1313 * so this check isn't safe to make
1317 vm_object_lock_assert_exclusive(object
);
1318 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1319 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1320 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1321 "already in (obj=%p,off=0x%llx)",
1322 mem
, object
, offset
, mem
->object
, mem
->offset
);
1323 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1326 * Record the object/offset pair in this page
1329 mem
->object
= object
;
1330 mem
->offset
= offset
;
1333 * Insert it into the object_object/offset hash table,
1334 * replacing any page that might have been there.
1337 hash_id
= vm_page_hash(object
, offset
);
1338 bucket
= &vm_page_buckets
[hash_id
];
1339 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1341 lck_spin_lock(bucket_lock
);
1343 if (bucket
->page_list
) {
1344 vm_page_packed_t
*mp
= &bucket
->page_list
;
1345 vm_page_t m
= VM_PAGE_UNPACK_PTR(*mp
);
1348 if (m
->object
== object
&& m
->offset
== offset
) {
1350 * Remove old page from hash list
1359 } while ((m
= VM_PAGE_UNPACK_PTR(*mp
)));
1361 mem
->next_m
= bucket
->page_list
;
1363 mem
->next_m
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
1366 * insert new page at head of hash list
1368 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1371 lck_spin_unlock(bucket_lock
);
1375 * there was already a page at the specified
1376 * offset for this object... remove it from
1377 * the object and free it back to the free list
1379 vm_page_free_unlocked(found_m
, FALSE
);
1381 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
, FALSE
);
1385 * vm_page_remove: [ internal use only ]
1387 * Removes the given mem entry from the object/offset-page
1388 * table and the object page list.
1390 * The object must be locked.
1396 boolean_t remove_from_hash
)
1398 vm_page_bucket_t
*bucket
;
1400 lck_spin_t
*bucket_lock
;
1405 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1406 mem
->object
, mem
->offset
,
1409 vm_object_lock_assert_exclusive(mem
->object
);
1410 assert(mem
->tabled
);
1411 assert(!mem
->cleaning
);
1412 assert(!mem
->laundry
);
1415 * we don't hold the page queue lock
1416 * so this check isn't safe to make
1420 if (remove_from_hash
== TRUE
) {
1422 * Remove from the object_object/offset hash table
1424 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1425 bucket
= &vm_page_buckets
[hash_id
];
1426 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1428 lck_spin_lock(bucket_lock
);
1430 if ((this = VM_PAGE_UNPACK_PTR(bucket
->page_list
)) == mem
) {
1431 /* optimize for common case */
1433 bucket
->page_list
= mem
->next_m
;
1435 vm_page_packed_t
*prev
;
1437 for (prev
= &this->next_m
;
1438 (this = VM_PAGE_UNPACK_PTR(*prev
)) != mem
;
1439 prev
= &this->next_m
)
1441 *prev
= this->next_m
;
1443 #if MACH_PAGE_HASH_STATS
1444 bucket
->cur_count
--;
1445 #endif /* MACH_PAGE_HASH_STATS */
1446 mem
->hashed
= FALSE
;
1447 lck_spin_unlock(bucket_lock
);
1450 * Now remove from the object's list of backed pages.
1453 VM_PAGE_REMOVE(mem
);
1456 * And show that the object has one fewer resident
1460 assert(mem
->object
->resident_page_count
> 0);
1461 mem
->object
->resident_page_count
--;
1463 if (mem
->object
->internal
) {
1465 assert(vm_page_internal_count
);
1468 OSAddAtomic(-1, &vm_page_internal_count
);
1470 assert(vm_page_external_count
);
1471 OSAddAtomic(-1, &vm_page_external_count
);
1473 if (mem
->xpmapped
) {
1474 assert(vm_page_xpmapped_external_count
);
1475 OSAddAtomic(-1, &vm_page_xpmapped_external_count
);
1478 if (!mem
->object
->internal
&& (mem
->object
->objq
.next
|| mem
->object
->objq
.prev
)) {
1479 if (mem
->object
->resident_page_count
== 0)
1480 vm_object_cache_remove(mem
->object
);
1483 if (VM_PAGE_WIRED(mem
)) {
1484 assert(mem
->object
->wired_page_count
> 0);
1485 mem
->object
->wired_page_count
--;
1487 assert(mem
->object
->resident_page_count
>=
1488 mem
->object
->wired_page_count
);
1489 if (mem
->reusable
) {
1490 assert(mem
->object
->reusable_page_count
> 0);
1491 mem
->object
->reusable_page_count
--;
1492 assert(mem
->object
->reusable_page_count
<=
1493 mem
->object
->resident_page_count
);
1494 mem
->reusable
= FALSE
;
1495 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1496 vm_page_stats_reusable
.reused_remove
++;
1497 } else if (mem
->object
->all_reusable
) {
1498 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1499 vm_page_stats_reusable
.reused_remove
++;
1502 if (mem
->object
->purgable
== VM_PURGABLE_DENY
) {
1505 owner
= mem
->object
->vo_purgeable_owner
;
1508 (mem
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1509 VM_PAGE_WIRED(mem
))) {
1510 /* less non-volatile bytes */
1511 ledger_debit(owner
->ledger
,
1512 task_ledgers
.purgeable_nonvolatile
,
1514 /* less footprint */
1515 ledger_debit(owner
->ledger
,
1516 task_ledgers
.phys_footprint
,
1519 (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
1520 mem
->object
->purgable
== VM_PURGABLE_EMPTY
)) {
1521 assert(! VM_PAGE_WIRED(mem
));
1522 /* less volatile bytes */
1523 ledger_debit(owner
->ledger
,
1524 task_ledgers
.purgeable_volatile
,
1527 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1528 if (VM_PAGE_WIRED(mem
)) {
1529 assert(vm_page_purgeable_wired_count
> 0);
1530 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1532 assert(vm_page_purgeable_count
> 0);
1533 OSAddAtomic(-1, &vm_page_purgeable_count
);
1536 if (mem
->object
->set_cache_attr
== TRUE
)
1537 pmap_set_cache_attributes(mem
->phys_page
, 0);
1539 mem
->tabled
= FALSE
;
1540 mem
->object
= VM_OBJECT_NULL
;
1541 mem
->offset
= (vm_object_offset_t
) -1;
1548 * Returns the page associated with the object/offset
1549 * pair specified; if none is found, VM_PAGE_NULL is returned.
1551 * The object must be locked. No side effects.
1554 unsigned long vm_page_lookup_hint
= 0;
1555 unsigned long vm_page_lookup_hint_next
= 0;
1556 unsigned long vm_page_lookup_hint_prev
= 0;
1557 unsigned long vm_page_lookup_hint_miss
= 0;
1558 unsigned long vm_page_lookup_bucket_NULL
= 0;
1559 unsigned long vm_page_lookup_miss
= 0;
1565 vm_object_offset_t offset
)
1568 vm_page_bucket_t
*bucket
;
1570 lck_spin_t
*bucket_lock
;
1573 vm_object_lock_assert_held(object
);
1574 mem
= object
->memq_hint
;
1576 if (mem
!= VM_PAGE_NULL
) {
1577 assert(mem
->object
== object
);
1579 if (mem
->offset
== offset
) {
1580 vm_page_lookup_hint
++;
1583 qe
= queue_next(&mem
->listq
);
1585 if (! queue_end(&object
->memq
, qe
)) {
1586 vm_page_t next_page
;
1588 next_page
= (vm_page_t
) qe
;
1589 assert(next_page
->object
== object
);
1591 if (next_page
->offset
== offset
) {
1592 vm_page_lookup_hint_next
++;
1593 object
->memq_hint
= next_page
; /* new hint */
1597 qe
= queue_prev(&mem
->listq
);
1599 if (! queue_end(&object
->memq
, qe
)) {
1600 vm_page_t prev_page
;
1602 prev_page
= (vm_page_t
) qe
;
1603 assert(prev_page
->object
== object
);
1605 if (prev_page
->offset
== offset
) {
1606 vm_page_lookup_hint_prev
++;
1607 object
->memq_hint
= prev_page
; /* new hint */
1613 * Search the hash table for this object/offset pair
1615 hash_id
= vm_page_hash(object
, offset
);
1616 bucket
= &vm_page_buckets
[hash_id
];
1619 * since we hold the object lock, we are guaranteed that no
1620 * new pages can be inserted into this object... this in turn
1621 * guarantess that the page we're looking for can't exist
1622 * if the bucket it hashes to is currently NULL even when looked
1623 * at outside the scope of the hash bucket lock... this is a
1624 * really cheap optimiztion to avoid taking the lock
1626 if (!bucket
->page_list
) {
1627 vm_page_lookup_bucket_NULL
++;
1629 return (VM_PAGE_NULL
);
1631 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1633 lck_spin_lock(bucket_lock
);
1635 for (mem
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); mem
!= VM_PAGE_NULL
; mem
= VM_PAGE_UNPACK_PTR(mem
->next_m
)) {
1638 * we don't hold the page queue lock
1639 * so this check isn't safe to make
1643 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1646 lck_spin_unlock(bucket_lock
);
1648 if (mem
!= VM_PAGE_NULL
) {
1649 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1650 vm_page_lookup_hint_miss
++;
1652 assert(mem
->object
== object
);
1653 object
->memq_hint
= mem
;
1655 vm_page_lookup_miss
++;
1664 * Move the given memory entry from its
1665 * current object to the specified target object/offset.
1667 * The object must be locked.
1671 register vm_page_t mem
,
1672 register vm_object_t new_object
,
1673 vm_object_offset_t new_offset
,
1674 boolean_t encrypted_ok
)
1676 boolean_t internal_to_external
, external_to_internal
;
1678 assert(mem
->object
!= new_object
);
1682 * The encryption key is based on the page's memory object
1683 * (aka "pager") and paging offset. Moving the page to
1684 * another VM object changes its "pager" and "paging_offset"
1685 * so it has to be decrypted first, or we would lose the key.
1687 * One exception is VM object collapsing, where we transfer pages
1688 * from one backing object to its parent object. This operation also
1689 * transfers the paging information, so the <pager,paging_offset> info
1690 * should remain consistent. The caller (vm_object_do_collapse())
1691 * sets "encrypted_ok" in this case.
1693 if (!encrypted_ok
&& mem
->encrypted
) {
1694 panic("vm_page_rename: page %p is encrypted\n", mem
);
1698 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1699 new_object
, new_offset
,
1703 * Changes to mem->object require the page lock because
1704 * the pageout daemon uses that lock to get the object.
1706 vm_page_lockspin_queues();
1708 internal_to_external
= FALSE
;
1709 external_to_internal
= FALSE
;
1713 * it's much easier to get the vm_page_pageable_xxx accounting correct
1714 * if we first move the page to the active queue... it's going to end
1715 * up there anyway, and we don't do vm_page_rename's frequently enough
1716 * for this to matter.
1718 VM_PAGE_QUEUES_REMOVE(mem
);
1719 vm_page_activate(mem
);
1721 if (mem
->active
|| mem
->inactive
|| mem
->speculative
) {
1722 if (mem
->object
->internal
&& !new_object
->internal
) {
1723 internal_to_external
= TRUE
;
1725 if (!mem
->object
->internal
&& new_object
->internal
) {
1726 external_to_internal
= TRUE
;
1730 vm_page_remove(mem
, TRUE
);
1731 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
, FALSE
);
1733 if (internal_to_external
) {
1734 vm_page_pageable_internal_count
--;
1735 vm_page_pageable_external_count
++;
1736 } else if (external_to_internal
) {
1737 vm_page_pageable_external_count
--;
1738 vm_page_pageable_internal_count
++;
1741 vm_page_unlock_queues();
1747 * Initialize the fields in a new page.
1748 * This takes a structure with random values and initializes it
1749 * so that it can be given to vm_page_release or vm_page_insert.
1760 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
1761 if (!(pmap_valid_page(phys_page
))) {
1762 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
1766 *mem
= vm_page_template
;
1767 mem
->phys_page
= phys_page
;
1770 * we're leaving this turned off for now... currently pages
1771 * come off the free list and are either immediately dirtied/referenced
1772 * due to zero-fill or COW faults, or are used to read or write files...
1773 * in the file I/O case, the UPL mechanism takes care of clearing
1774 * the state of the HW ref/mod bits in a somewhat fragile way.
1775 * Since we may change the way this works in the future (to toughen it up),
1776 * I'm leaving this as a reminder of where these bits could get cleared
1780 * make sure both the h/w referenced and modified bits are
1781 * clear at this point... we are especially dependent on
1782 * not finding a 'stale' h/w modified in a number of spots
1783 * once this page goes back into use
1785 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
1787 mem
->lopage
= lopage
;
1791 * vm_page_grab_fictitious:
1793 * Remove a fictitious page from the free list.
1794 * Returns VM_PAGE_NULL if there are no free pages.
1796 int c_vm_page_grab_fictitious
= 0;
1797 int c_vm_page_grab_fictitious_failed
= 0;
1798 int c_vm_page_release_fictitious
= 0;
1799 int c_vm_page_more_fictitious
= 0;
1802 vm_page_grab_fictitious_common(
1807 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
1809 vm_page_init(m
, phys_addr
, FALSE
);
1810 m
->fictitious
= TRUE
;
1812 c_vm_page_grab_fictitious
++;
1814 c_vm_page_grab_fictitious_failed
++;
1820 vm_page_grab_fictitious(void)
1822 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1826 vm_page_grab_guard(void)
1828 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1833 * vm_page_release_fictitious:
1835 * Release a fictitious page to the zone pool
1838 vm_page_release_fictitious(
1842 assert(m
->fictitious
);
1843 assert(m
->phys_page
== vm_page_fictitious_addr
||
1844 m
->phys_page
== vm_page_guard_addr
);
1846 c_vm_page_release_fictitious
++;
1848 zfree(vm_page_zone
, m
);
1852 * vm_page_more_fictitious:
1854 * Add more fictitious pages to the zone.
1855 * Allowed to block. This routine is way intimate
1856 * with the zones code, for several reasons:
1857 * 1. we need to carve some page structures out of physical
1858 * memory before zones work, so they _cannot_ come from
1860 * 2. the zone needs to be collectable in order to prevent
1861 * growth without bound. These structures are used by
1862 * the device pager (by the hundreds and thousands), as
1863 * private pages for pageout, and as blocking pages for
1864 * pagein. Temporary bursts in demand should not result in
1865 * permanent allocation of a resource.
1866 * 3. To smooth allocation humps, we allocate single pages
1867 * with kernel_memory_allocate(), and cram them into the
1871 void vm_page_more_fictitious(void)
1874 kern_return_t retval
;
1876 c_vm_page_more_fictitious
++;
1879 * Allocate a single page from the zone_map. Do not wait if no physical
1880 * pages are immediately available, and do not zero the space. We need
1881 * our own blocking lock here to prevent having multiple,
1882 * simultaneous requests from piling up on the zone_map lock. Exactly
1883 * one (of our) threads should be potentially waiting on the map lock.
1884 * If winner is not vm-privileged, then the page allocation will fail,
1885 * and it will temporarily block here in the vm_page_wait().
1887 lck_mtx_lock(&vm_page_alloc_lock
);
1889 * If another thread allocated space, just bail out now.
1891 if (zone_free_count(vm_page_zone
) > 5) {
1893 * The number "5" is a small number that is larger than the
1894 * number of fictitious pages that any single caller will
1895 * attempt to allocate. Otherwise, a thread will attempt to
1896 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1897 * release all of the resources and locks already acquired,
1898 * and then call this routine. This routine finds the pages
1899 * that the caller released, so fails to allocate new space.
1900 * The process repeats infinitely. The largest known number
1901 * of fictitious pages required in this manner is 2. 5 is
1902 * simply a somewhat larger number.
1904 lck_mtx_unlock(&vm_page_alloc_lock
);
1908 retval
= kernel_memory_allocate(zone_map
,
1909 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1910 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1911 if (retval
!= KERN_SUCCESS
) {
1913 * No page was available. Drop the
1914 * lock to give another thread a chance at it, and
1915 * wait for the pageout daemon to make progress.
1917 lck_mtx_unlock(&vm_page_alloc_lock
);
1918 vm_page_wait(THREAD_UNINT
);
1922 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1923 OSAddAtomic64(1, &(vm_page_zone
->page_count
));
1925 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
1927 lck_mtx_unlock(&vm_page_alloc_lock
);
1934 * Return true if it is not likely that a non-vm_privileged thread
1935 * can get memory without blocking. Advisory only, since the
1936 * situation may change under us.
1941 /* No locking, at worst we will fib. */
1942 return( vm_page_free_count
<= vm_page_free_reserved
);
1948 * this is an interface to support bring-up of drivers
1949 * on platforms with physical memory > 4G...
1951 int vm_himemory_mode
= 2;
1955 * this interface exists to support hardware controllers
1956 * incapable of generating DMAs with more than 32 bits
1957 * of address on platforms with physical memory > 4G...
1959 unsigned int vm_lopages_allocated_q
= 0;
1960 unsigned int vm_lopages_allocated_cpm_success
= 0;
1961 unsigned int vm_lopages_allocated_cpm_failed
= 0;
1962 queue_head_t vm_lopage_queue_free
;
1965 vm_page_grablo(void)
1969 if (vm_lopage_needed
== FALSE
)
1970 return (vm_page_grab());
1972 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1974 if ( !queue_empty(&vm_lopage_queue_free
)) {
1975 queue_remove_first(&vm_lopage_queue_free
,
1979 assert(vm_lopage_free_count
);
1981 vm_lopage_free_count
--;
1982 vm_lopages_allocated_q
++;
1984 if (vm_lopage_free_count
< vm_lopage_lowater
)
1985 vm_lopage_refill
= TRUE
;
1987 lck_mtx_unlock(&vm_page_queue_free_lock
);
1989 lck_mtx_unlock(&vm_page_queue_free_lock
);
1991 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
1993 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1994 vm_lopages_allocated_cpm_failed
++;
1995 lck_mtx_unlock(&vm_page_queue_free_lock
);
1997 return (VM_PAGE_NULL
);
2001 vm_page_lockspin_queues();
2003 mem
->gobbled
= FALSE
;
2004 vm_page_gobble_count
--;
2005 vm_page_wire_count
--;
2007 vm_lopages_allocated_cpm_success
++;
2008 vm_page_unlock_queues();
2012 assert(!mem
->pmapped
);
2013 assert(!mem
->wpmapped
);
2014 assert(!pmap_is_noencrypt(mem
->phys_page
));
2016 mem
->pageq
.next
= NULL
;
2017 mem
->pageq
.prev
= NULL
;
2026 * first try to grab a page from the per-cpu free list...
2027 * this must be done while pre-emption is disabled... if
2028 * a page is available, we're done...
2029 * if no page is available, grab the vm_page_queue_free_lock
2030 * and see if current number of free pages would allow us
2031 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2032 * if there are pages available, disable preemption and
2033 * recheck the state of the per-cpu free list... we could
2034 * have been preempted and moved to a different cpu, or
2035 * some other thread could have re-filled it... if still
2036 * empty, figure out how many pages we can steal from the
2037 * global free queue and move to the per-cpu queue...
2038 * return 1 of these pages when done... only wakeup the
2039 * pageout_scan thread if we moved pages from the global
2040 * list... no need for the wakeup if we've satisfied the
2041 * request from the per-cpu queue.
2046 vm_page_grab( void )
2051 disable_preemption();
2053 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2054 return_page_from_cpu_list
:
2055 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2056 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
2058 enable_preemption();
2059 mem
->pageq
.next
= NULL
;
2061 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2062 assert(mem
->tabled
== FALSE
);
2063 assert(mem
->object
== VM_OBJECT_NULL
);
2064 assert(!mem
->laundry
);
2066 assert(pmap_verify_free(mem
->phys_page
));
2068 assert(!mem
->encrypted
);
2069 assert(!mem
->pmapped
);
2070 assert(!mem
->wpmapped
);
2071 assert(!mem
->active
);
2072 assert(!mem
->inactive
);
2073 assert(!mem
->throttled
);
2074 assert(!mem
->speculative
);
2075 assert(!pmap_is_noencrypt(mem
->phys_page
));
2079 enable_preemption();
2083 * Optionally produce warnings if the wire or gobble
2084 * counts exceed some threshold.
2086 #if VM_PAGE_WIRE_COUNT_WARNING
2087 if (vm_page_wire_count
>= VM_PAGE_WIRE_COUNT_WARNING
) {
2088 printf("mk: vm_page_grab(): high wired page count of %d\n",
2089 vm_page_wire_count
);
2092 #if VM_PAGE_GOBBLE_COUNT_WARNING
2093 if (vm_page_gobble_count
>= VM_PAGE_GOBBLE_COUNT_WARNING
) {
2094 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2095 vm_page_gobble_count
);
2098 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2101 * Only let privileged threads (involved in pageout)
2102 * dip into the reserved pool.
2104 if ((vm_page_free_count
< vm_page_free_reserved
) &&
2105 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
2106 lck_mtx_unlock(&vm_page_queue_free_lock
);
2112 unsigned int pages_to_steal
;
2115 while ( vm_page_free_count
== 0 ) {
2117 lck_mtx_unlock(&vm_page_queue_free_lock
);
2119 * must be a privileged thread to be
2120 * in this state since a non-privileged
2121 * thread would have bailed if we were
2122 * under the vm_page_free_reserved mark
2125 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2128 disable_preemption();
2130 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2131 lck_mtx_unlock(&vm_page_queue_free_lock
);
2134 * we got preempted and moved to another processor
2135 * or we got preempted and someone else ran and filled the cache
2137 goto return_page_from_cpu_list
;
2139 if (vm_page_free_count
<= vm_page_free_reserved
)
2142 if (vm_free_magazine_refill_limit
<= (vm_page_free_count
- vm_page_free_reserved
))
2143 pages_to_steal
= vm_free_magazine_refill_limit
;
2145 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
2147 color
= PROCESSOR_DATA(current_processor(), start_color
);
2150 vm_page_free_count
-= pages_to_steal
;
2152 while (pages_to_steal
--) {
2154 while (queue_empty(&vm_page_queue_free
[color
]))
2155 color
= (color
+ 1) & vm_color_mask
;
2157 queue_remove_first(&vm_page_queue_free
[color
],
2161 mem
->pageq
.next
= NULL
;
2162 mem
->pageq
.prev
= NULL
;
2164 assert(!mem
->active
);
2165 assert(!mem
->inactive
);
2166 assert(!mem
->throttled
);
2167 assert(!mem
->speculative
);
2169 color
= (color
+ 1) & vm_color_mask
;
2174 tail
->pageq
.next
= (queue_t
)mem
;
2177 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2178 assert(mem
->tabled
== FALSE
);
2179 assert(mem
->object
== VM_OBJECT_NULL
);
2180 assert(!mem
->laundry
);
2184 assert(pmap_verify_free(mem
->phys_page
));
2187 assert(!mem
->encrypted
);
2188 assert(!mem
->pmapped
);
2189 assert(!mem
->wpmapped
);
2190 assert(!pmap_is_noencrypt(mem
->phys_page
));
2192 lck_mtx_unlock(&vm_page_queue_free_lock
);
2194 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
2195 PROCESSOR_DATA(current_processor(), start_color
) = color
;
2198 * satisfy this request
2200 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2202 mem
->pageq
.next
= NULL
;
2204 enable_preemption();
2207 * Decide if we should poke the pageout daemon.
2208 * We do this if the free count is less than the low
2209 * water mark, or if the free count is less than the high
2210 * water mark (but above the low water mark) and the inactive
2211 * count is less than its target.
2213 * We don't have the counts locked ... if they change a little,
2214 * it doesn't really matter.
2216 if ((vm_page_free_count
< vm_page_free_min
) ||
2217 ((vm_page_free_count
< vm_page_free_target
) &&
2218 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
2219 thread_wakeup((event_t
) &vm_page_free_wanted
);
2221 VM_CHECK_MEMORYSTATUS
;
2223 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2231 * Return a page to the free list.
2236 register vm_page_t mem
)
2239 int need_wakeup
= 0;
2240 int need_priv_wakeup
= 0;
2243 assert(!mem
->private && !mem
->fictitious
);
2244 if (vm_page_free_verify
) {
2245 assert(pmap_verify_free(mem
->phys_page
));
2247 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2249 pmap_clear_noencrypt(mem
->phys_page
);
2251 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2254 panic("vm_page_release");
2258 assert(!mem
->laundry
);
2259 assert(mem
->object
== VM_OBJECT_NULL
);
2260 assert(mem
->pageq
.next
== NULL
&&
2261 mem
->pageq
.prev
== NULL
);
2262 assert(mem
->listq
.next
== NULL
&&
2263 mem
->listq
.prev
== NULL
);
2265 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2266 vm_lopage_free_count
< vm_lopage_free_limit
&&
2267 mem
->phys_page
< max_valid_low_ppnum
) {
2269 * this exists to support hardware controllers
2270 * incapable of generating DMAs with more than 32 bits
2271 * of address on platforms with physical memory > 4G...
2273 queue_enter_first(&vm_lopage_queue_free
,
2277 vm_lopage_free_count
++;
2279 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
2280 vm_lopage_refill
= FALSE
;
2284 mem
->lopage
= FALSE
;
2287 color
= mem
->phys_page
& vm_color_mask
;
2288 queue_enter_first(&vm_page_queue_free
[color
],
2292 vm_page_free_count
++;
2294 * Check if we should wake up someone waiting for page.
2295 * But don't bother waking them unless they can allocate.
2297 * We wakeup only one thread, to prevent starvation.
2298 * Because the scheduling system handles wait queues FIFO,
2299 * if we wakeup all waiting threads, one greedy thread
2300 * can starve multiple niceguy threads. When the threads
2301 * all wakeup, the greedy threads runs first, grabs the page,
2302 * and waits for another page. It will be the first to run
2303 * when the next page is freed.
2305 * However, there is a slight danger here.
2306 * The thread we wake might not use the free page.
2307 * Then the other threads could wait indefinitely
2308 * while the page goes unused. To forestall this,
2309 * the pageout daemon will keep making free pages
2310 * as long as vm_page_free_wanted is non-zero.
2313 assert(vm_page_free_count
> 0);
2314 if (vm_page_free_wanted_privileged
> 0) {
2315 vm_page_free_wanted_privileged
--;
2316 need_priv_wakeup
= 1;
2317 } else if (vm_page_free_wanted
> 0 &&
2318 vm_page_free_count
> vm_page_free_reserved
) {
2319 vm_page_free_wanted
--;
2323 lck_mtx_unlock(&vm_page_queue_free_lock
);
2325 if (need_priv_wakeup
)
2326 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2327 else if (need_wakeup
)
2328 thread_wakeup_one((event_t
) &vm_page_free_count
);
2330 VM_CHECK_MEMORYSTATUS
;
2334 * This version of vm_page_release() is used only at startup
2335 * when we are single-threaded and pages are being released
2336 * for the first time. Hence, no locking or unnecessary checks are made.
2337 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2340 vm_page_release_startup(
2341 register vm_page_t mem
)
2345 if (vm_lopage_free_count
< vm_lopage_free_limit
&&
2346 mem
->phys_page
< max_valid_low_ppnum
) {
2348 vm_lopage_free_count
++;
2349 queue_free
= &vm_lopage_queue_free
;
2351 mem
->lopage
= FALSE
;
2353 vm_page_free_count
++;
2354 queue_free
= &vm_page_queue_free
[mem
->phys_page
& vm_color_mask
];
2356 queue_enter_first(queue_free
, mem
, vm_page_t
, pageq
);
2362 * Wait for a page to become available.
2363 * If there are plenty of free pages, then we don't sleep.
2366 * TRUE: There may be another page, try again
2367 * FALSE: We were interrupted out of our wait, don't try again
2375 * We can't use vm_page_free_reserved to make this
2376 * determination. Consider: some thread might
2377 * need to allocate two pages. The first allocation
2378 * succeeds, the second fails. After the first page is freed,
2379 * a call to vm_page_wait must really block.
2381 kern_return_t wait_result
;
2382 int need_wakeup
= 0;
2383 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2385 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2387 if (is_privileged
&& vm_page_free_count
) {
2388 lck_mtx_unlock(&vm_page_queue_free_lock
);
2391 if (vm_page_free_count
< vm_page_free_target
) {
2393 if (is_privileged
) {
2394 if (vm_page_free_wanted_privileged
++ == 0)
2396 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2398 if (vm_page_free_wanted
++ == 0)
2400 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2402 lck_mtx_unlock(&vm_page_queue_free_lock
);
2403 counter(c_vm_page_wait_block
++);
2406 thread_wakeup((event_t
)&vm_page_free_wanted
);
2408 if (wait_result
== THREAD_WAITING
) {
2409 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_START
,
2410 vm_page_free_wanted_privileged
, vm_page_free_wanted
, 0, 0);
2411 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2412 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_END
, 0, 0, 0, 0);
2415 return(wait_result
== THREAD_AWAKENED
);
2417 lck_mtx_unlock(&vm_page_queue_free_lock
);
2425 * Allocate and return a memory cell associated
2426 * with this VM object/offset pair.
2428 * Object must be locked.
2434 vm_object_offset_t offset
)
2436 register vm_page_t mem
;
2438 vm_object_lock_assert_exclusive(object
);
2439 mem
= vm_page_grab();
2440 if (mem
== VM_PAGE_NULL
)
2441 return VM_PAGE_NULL
;
2443 vm_page_insert(mem
, object
, offset
);
2451 vm_object_offset_t offset
)
2453 register vm_page_t mem
;
2455 vm_object_lock_assert_exclusive(object
);
2456 mem
= vm_page_grablo();
2457 if (mem
== VM_PAGE_NULL
)
2458 return VM_PAGE_NULL
;
2460 vm_page_insert(mem
, object
, offset
);
2467 * vm_page_alloc_guard:
2469 * Allocate a fictitious page which will be used
2470 * as a guard page. The page will be inserted into
2471 * the object and returned to the caller.
2475 vm_page_alloc_guard(
2477 vm_object_offset_t offset
)
2479 register vm_page_t mem
;
2481 vm_object_lock_assert_exclusive(object
);
2482 mem
= vm_page_grab_guard();
2483 if (mem
== VM_PAGE_NULL
)
2484 return VM_PAGE_NULL
;
2486 vm_page_insert(mem
, object
, offset
);
2492 counter(unsigned int c_laundry_pages_freed
= 0;)
2495 * vm_page_free_prepare:
2497 * Removes page from any queue it may be on
2498 * and disassociates it from its VM object.
2500 * Object and page queues must be locked prior to entry.
2503 vm_page_free_prepare(
2506 vm_page_free_prepare_queues(mem
);
2507 vm_page_free_prepare_object(mem
, TRUE
);
2512 vm_page_free_prepare_queues(
2517 assert(!mem
->cleaning
);
2519 #if MACH_ASSERT || DEBUG
2520 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2522 panic("vm_page_free: freeing page on free list\n");
2523 #endif /* MACH_ASSERT || DEBUG */
2525 vm_object_lock_assert_exclusive(mem
->object
);
2529 * We may have to free a page while it's being laundered
2530 * if we lost its pager (due to a forced unmount, for example).
2531 * We need to call vm_pageout_steal_laundry() before removing
2532 * the page from its VM object, so that we can remove it
2533 * from its pageout queue and adjust the laundry accounting
2535 vm_pageout_steal_laundry(mem
, TRUE
);
2536 counter(++c_laundry_pages_freed
);
2539 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2541 if (VM_PAGE_WIRED(mem
)) {
2543 assert(mem
->object
->wired_page_count
> 0);
2544 mem
->object
->wired_page_count
--;
2545 assert(mem
->object
->resident_page_count
>=
2546 mem
->object
->wired_page_count
);
2548 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2549 OSAddAtomic(+1, &vm_page_purgeable_count
);
2550 assert(vm_page_purgeable_wired_count
> 0);
2551 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2553 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2554 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2555 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2558 owner
= mem
->object
->vo_purgeable_owner
;
2560 * While wired, this page was accounted
2561 * as "non-volatile" but it should now
2562 * be accounted as "volatile".
2564 /* one less "non-volatile"... */
2565 ledger_debit(owner
->ledger
,
2566 task_ledgers
.purgeable_nonvolatile
,
2568 /* ... and "phys_footprint" */
2569 ledger_debit(owner
->ledger
,
2570 task_ledgers
.phys_footprint
,
2572 /* one more "volatile" */
2573 ledger_credit(owner
->ledger
,
2574 task_ledgers
.purgeable_volatile
,
2578 if (!mem
->private && !mem
->fictitious
)
2579 vm_page_wire_count
--;
2580 mem
->wire_count
= 0;
2581 assert(!mem
->gobbled
);
2582 } else if (mem
->gobbled
) {
2583 if (!mem
->private && !mem
->fictitious
)
2584 vm_page_wire_count
--;
2585 vm_page_gobble_count
--;
2591 vm_page_free_prepare_object(
2593 boolean_t remove_from_hash
)
2596 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2598 PAGE_WAKEUP(mem
); /* clears wanted */
2601 mem
->private = FALSE
;
2602 mem
->fictitious
= TRUE
;
2603 mem
->phys_page
= vm_page_fictitious_addr
;
2605 if ( !mem
->fictitious
) {
2606 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2614 * Returns the given page to the free list,
2615 * disassociating it with any VM object.
2617 * Object and page queues must be locked prior to entry.
2623 vm_page_free_prepare(mem
);
2625 if (mem
->fictitious
) {
2626 vm_page_release_fictitious(mem
);
2628 vm_page_release(mem
);
2634 vm_page_free_unlocked(
2636 boolean_t remove_from_hash
)
2638 vm_page_lockspin_queues();
2639 vm_page_free_prepare_queues(mem
);
2640 vm_page_unlock_queues();
2642 vm_page_free_prepare_object(mem
, remove_from_hash
);
2644 if (mem
->fictitious
) {
2645 vm_page_release_fictitious(mem
);
2647 vm_page_release(mem
);
2653 * Free a list of pages. The list can be up to several hundred pages,
2654 * as blocked up by vm_pageout_scan().
2655 * The big win is not having to take the free list lock once
2661 boolean_t prepare_object
)
2665 vm_page_t local_freeq
;
2671 local_freeq
= VM_PAGE_NULL
;
2675 * break up the processing into smaller chunks so
2676 * that we can 'pipeline' the pages onto the
2677 * free list w/o introducing too much
2678 * contention on the global free queue lock
2680 while (mem
&& pg_count
< 64) {
2682 assert(!mem
->inactive
);
2683 assert(!mem
->active
);
2684 assert(!mem
->throttled
);
2686 assert(!mem
->speculative
);
2687 assert(!VM_PAGE_WIRED(mem
));
2688 assert(mem
->pageq
.prev
== NULL
);
2690 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2692 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2693 assert(pmap_verify_free(mem
->phys_page
));
2695 if (prepare_object
== TRUE
)
2696 vm_page_free_prepare_object(mem
, TRUE
);
2698 if (!mem
->fictitious
) {
2701 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2702 vm_lopage_free_count
< vm_lopage_free_limit
&&
2703 mem
->phys_page
< max_valid_low_ppnum
) {
2704 mem
->pageq
.next
= NULL
;
2705 vm_page_release(mem
);
2708 * IMPORTANT: we can't set the page "free" here
2709 * because that would make the page eligible for
2710 * a physically-contiguous allocation (see
2711 * vm_page_find_contiguous()) right away (we don't
2712 * hold the vm_page_queue_free lock). That would
2713 * cause trouble because the page is not actually
2714 * in the free queue yet...
2716 mem
->pageq
.next
= (queue_entry_t
)local_freeq
;
2720 pmap_clear_noencrypt(mem
->phys_page
);
2723 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2724 mem
->phys_page
== vm_page_guard_addr
);
2725 vm_page_release_fictitious(mem
);
2731 if ( (mem
= local_freeq
) ) {
2732 unsigned int avail_free_count
;
2733 unsigned int need_wakeup
= 0;
2734 unsigned int need_priv_wakeup
= 0;
2736 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2741 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2747 color
= mem
->phys_page
& vm_color_mask
;
2748 queue_enter_first(&vm_page_queue_free
[color
],
2754 vm_page_free_count
+= pg_count
;
2755 avail_free_count
= vm_page_free_count
;
2757 if (vm_page_free_wanted_privileged
> 0 && avail_free_count
> 0) {
2759 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2760 need_priv_wakeup
= avail_free_count
;
2761 vm_page_free_wanted_privileged
-= avail_free_count
;
2762 avail_free_count
= 0;
2764 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2765 vm_page_free_wanted_privileged
= 0;
2766 avail_free_count
-= vm_page_free_wanted_privileged
;
2769 if (vm_page_free_wanted
> 0 && avail_free_count
> vm_page_free_reserved
) {
2770 unsigned int available_pages
;
2772 available_pages
= avail_free_count
- vm_page_free_reserved
;
2774 if (available_pages
>= vm_page_free_wanted
) {
2775 need_wakeup
= vm_page_free_wanted
;
2776 vm_page_free_wanted
= 0;
2778 need_wakeup
= available_pages
;
2779 vm_page_free_wanted
-= available_pages
;
2782 lck_mtx_unlock(&vm_page_queue_free_lock
);
2784 if (need_priv_wakeup
!= 0) {
2786 * There shouldn't be that many VM-privileged threads,
2787 * so let's wake them all up, even if we don't quite
2788 * have enough pages to satisfy them all.
2790 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2792 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2794 * We don't expect to have any more waiters
2795 * after this, so let's wake them all up at
2798 thread_wakeup((event_t
) &vm_page_free_count
);
2799 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2801 * Wake up one waiter per page we just released.
2803 thread_wakeup_one((event_t
) &vm_page_free_count
);
2806 VM_CHECK_MEMORYSTATUS
;
2815 * Mark this page as wired down by yet
2816 * another map, removing it from paging queues
2819 * The page's object and the page queues must be locked.
2823 register vm_page_t mem
)
2826 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2830 vm_object_lock_assert_exclusive(mem
->object
);
2833 * In theory, the page should be in an object before it
2834 * gets wired, since we need to hold the object lock
2835 * to update some fields in the page structure.
2836 * However, some code (i386 pmap, for example) might want
2837 * to wire a page before it gets inserted into an object.
2838 * That's somewhat OK, as long as nobody else can get to
2839 * that page and update it at the same time.
2843 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2845 if ( !VM_PAGE_WIRED(mem
)) {
2847 if (mem
->pageout_queue
) {
2848 mem
->pageout
= FALSE
;
2849 vm_pageout_throttle_up(mem
);
2851 VM_PAGE_QUEUES_REMOVE(mem
);
2854 mem
->object
->wired_page_count
++;
2855 assert(mem
->object
->resident_page_count
>=
2856 mem
->object
->wired_page_count
);
2857 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2858 assert(vm_page_purgeable_count
> 0);
2859 OSAddAtomic(-1, &vm_page_purgeable_count
);
2860 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2862 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2863 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2864 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2867 owner
= mem
->object
->vo_purgeable_owner
;
2868 /* less volatile bytes */
2869 ledger_debit(owner
->ledger
,
2870 task_ledgers
.purgeable_volatile
,
2872 /* more not-quite-volatile bytes */
2873 ledger_credit(owner
->ledger
,
2874 task_ledgers
.purgeable_nonvolatile
,
2876 /* more footprint */
2877 ledger_credit(owner
->ledger
,
2878 task_ledgers
.phys_footprint
,
2881 if (mem
->object
->all_reusable
) {
2883 * Wired pages are not counted as "re-usable"
2884 * in "all_reusable" VM objects, so nothing
2887 } else if (mem
->reusable
) {
2889 * This page is not "re-usable" when it's
2890 * wired, so adjust its state and the
2893 vm_object_reuse_pages(mem
->object
,
2895 mem
->offset
+PAGE_SIZE_64
,
2899 assert(!mem
->reusable
);
2901 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2902 vm_page_wire_count
++;
2904 vm_page_gobble_count
--;
2905 mem
->gobbled
= FALSE
;
2907 VM_CHECK_MEMORYSTATUS
;
2911 * The page could be encrypted, but
2912 * We don't have to decrypt it here
2913 * because we don't guarantee that the
2914 * data is actually valid at this point.
2915 * The page will get decrypted in
2916 * vm_fault_wire() if needed.
2919 assert(!mem
->gobbled
);
2927 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2929 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2933 register vm_page_t mem
)
2935 vm_page_lockspin_queues();
2938 assert(!mem
->gobbled
);
2939 assert( !VM_PAGE_WIRED(mem
));
2941 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2942 if (!mem
->private && !mem
->fictitious
)
2943 vm_page_wire_count
++;
2945 vm_page_gobble_count
++;
2946 mem
->gobbled
= TRUE
;
2947 vm_page_unlock_queues();
2953 * Release one wiring of this page, potentially
2954 * enabling it to be paged again.
2956 * The page's object and the page queues must be locked.
2964 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2967 assert(VM_PAGE_WIRED(mem
));
2968 assert(mem
->object
!= VM_OBJECT_NULL
);
2970 vm_object_lock_assert_exclusive(mem
->object
);
2971 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2973 if (--mem
->wire_count
== 0) {
2974 assert(!mem
->private && !mem
->fictitious
);
2975 vm_page_wire_count
--;
2976 assert(mem
->object
->wired_page_count
> 0);
2977 mem
->object
->wired_page_count
--;
2978 assert(mem
->object
->resident_page_count
>=
2979 mem
->object
->wired_page_count
);
2980 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2981 OSAddAtomic(+1, &vm_page_purgeable_count
);
2982 assert(vm_page_purgeable_wired_count
> 0);
2983 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2985 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2986 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2987 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2990 owner
= mem
->object
->vo_purgeable_owner
;
2991 /* more volatile bytes */
2992 ledger_credit(owner
->ledger
,
2993 task_ledgers
.purgeable_volatile
,
2995 /* less not-quite-volatile bytes */
2996 ledger_debit(owner
->ledger
,
2997 task_ledgers
.purgeable_nonvolatile
,
2999 /* less footprint */
3000 ledger_debit(owner
->ledger
,
3001 task_ledgers
.phys_footprint
,
3004 assert(mem
->object
!= kernel_object
);
3005 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
3007 if (queueit
== TRUE
) {
3008 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
3009 vm_page_deactivate(mem
);
3011 vm_page_activate(mem
);
3015 VM_CHECK_MEMORYSTATUS
;
3022 * vm_page_deactivate:
3024 * Returns the given page to the inactive list,
3025 * indicating that no physical maps have access
3026 * to this page. [Used by the physical mapping system.]
3028 * The page queues must be locked.
3034 vm_page_deactivate_internal(m
, TRUE
);
3039 vm_page_deactivate_internal(
3041 boolean_t clear_hw_reference
)
3045 assert(m
->object
!= kernel_object
);
3046 assert(m
->phys_page
!= vm_page_guard_addr
);
3048 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3050 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3053 * This page is no longer very interesting. If it was
3054 * interesting (active or inactive/referenced), then we
3055 * clear the reference bit and (re)enter it in the
3056 * inactive queue. Note wired pages should not have
3057 * their reference bit cleared.
3059 assert ( !(m
->absent
&& !m
->unusual
));
3061 if (m
->gobbled
) { /* can this happen? */
3062 assert( !VM_PAGE_WIRED(m
));
3064 if (!m
->private && !m
->fictitious
)
3065 vm_page_wire_count
--;
3066 vm_page_gobble_count
--;
3070 * if this page is currently on the pageout queue, we can't do the
3071 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3072 * and we can't remove it manually since we would need the object lock
3073 * (which is not required here) to decrement the activity_in_progress
3074 * reference which is held on the object while the page is in the pageout queue...
3075 * just let the normal laundry processing proceed
3077 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
|| (VM_PAGE_WIRED(m
)))
3080 if (!m
->absent
&& clear_hw_reference
== TRUE
)
3081 pmap_clear_reference(m
->phys_page
);
3083 m
->reference
= FALSE
;
3084 m
->no_cache
= FALSE
;
3087 VM_PAGE_QUEUES_REMOVE(m
);
3089 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
3090 m
->dirty
&& m
->object
->internal
&&
3091 (m
->object
->purgable
== VM_PURGABLE_DENY
||
3092 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3093 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3094 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
3095 m
->throttled
= TRUE
;
3096 vm_page_throttled_count
++;
3098 if (m
->object
->named
&& m
->object
->ref_count
== 1) {
3099 vm_page_speculate(m
, FALSE
);
3100 #if DEVELOPMENT || DEBUG
3101 vm_page_speculative_recreated
++;
3104 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
3111 * vm_page_enqueue_cleaned
3113 * Put the page on the cleaned queue, mark it cleaned, etc.
3114 * Being on the cleaned queue (and having m->clean_queue set)
3115 * does ** NOT ** guarantee that the page is clean!
3117 * Call with the queues lock held.
3120 void vm_page_enqueue_cleaned(vm_page_t m
)
3122 assert(m
->phys_page
!= vm_page_guard_addr
);
3124 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3126 assert( !(m
->absent
&& !m
->unusual
));
3129 assert( !VM_PAGE_WIRED(m
));
3130 if (!m
->private && !m
->fictitious
)
3131 vm_page_wire_count
--;
3132 vm_page_gobble_count
--;
3136 * if this page is currently on the pageout queue, we can't do the
3137 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3138 * and we can't remove it manually since we would need the object lock
3139 * (which is not required here) to decrement the activity_in_progress
3140 * reference which is held on the object while the page is in the pageout queue...
3141 * just let the normal laundry processing proceed
3143 if (m
->laundry
|| m
->clean_queue
|| m
->pageout_queue
|| m
->private || m
->fictitious
)
3146 VM_PAGE_QUEUES_REMOVE(m
);
3148 queue_enter(&vm_page_queue_cleaned
, m
, vm_page_t
, pageq
);
3149 m
->clean_queue
= TRUE
;
3150 vm_page_cleaned_count
++;
3153 vm_page_inactive_count
++;
3154 if (m
->object
->internal
) {
3155 vm_page_pageable_internal_count
++;
3157 vm_page_pageable_external_count
++;
3160 vm_pageout_enqueued_cleaned
++;
3166 * Put the specified page on the active list (if appropriate).
3168 * The page queues must be locked.
3173 register vm_page_t m
)
3176 #ifdef FIXME_4778297
3177 assert(m
->object
!= kernel_object
);
3179 assert(m
->phys_page
!= vm_page_guard_addr
);
3181 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3183 assert( !(m
->absent
&& !m
->unusual
));
3186 assert( !VM_PAGE_WIRED(m
));
3187 if (!m
->private && !m
->fictitious
)
3188 vm_page_wire_count
--;
3189 vm_page_gobble_count
--;
3193 * if this page is currently on the pageout queue, we can't do the
3194 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3195 * and we can't remove it manually since we would need the object lock
3196 * (which is not required here) to decrement the activity_in_progress
3197 * reference which is held on the object while the page is in the pageout queue...
3198 * just let the normal laundry processing proceed
3200 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3205 panic("vm_page_activate: already active");
3208 if (m
->speculative
) {
3209 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
3210 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
3213 VM_PAGE_QUEUES_REMOVE(m
);
3215 if ( !VM_PAGE_WIRED(m
)) {
3217 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
3218 m
->dirty
&& m
->object
->internal
&&
3219 (m
->object
->purgable
== VM_PURGABLE_DENY
||
3220 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3221 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3222 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
3223 m
->throttled
= TRUE
;
3224 vm_page_throttled_count
++;
3226 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
3228 vm_page_active_count
++;
3229 if (m
->object
->internal
) {
3230 vm_page_pageable_internal_count
++;
3232 vm_page_pageable_external_count
++;
3235 m
->reference
= TRUE
;
3236 m
->no_cache
= FALSE
;
3243 * vm_page_speculate:
3245 * Put the specified page on the speculative list (if appropriate).
3247 * The page queues must be locked.
3254 struct vm_speculative_age_q
*aq
;
3257 assert(m
->object
!= kernel_object
);
3258 assert(m
->phys_page
!= vm_page_guard_addr
);
3260 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3262 assert( !(m
->absent
&& !m
->unusual
));
3265 * if this page is currently on the pageout queue, we can't do the
3266 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3267 * and we can't remove it manually since we would need the object lock
3268 * (which is not required here) to decrement the activity_in_progress
3269 * reference which is held on the object while the page is in the pageout queue...
3270 * just let the normal laundry processing proceed
3272 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3275 VM_PAGE_QUEUES_REMOVE(m
);
3277 if ( !VM_PAGE_WIRED(m
)) {
3282 clock_get_system_nanotime(&sec
, &nsec
);
3283 ts
.tv_sec
= (unsigned int) sec
;
3286 if (vm_page_speculative_count
== 0) {
3288 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3289 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3291 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3294 * set the timer to begin a new group
3296 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3297 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3299 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3301 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3303 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
3305 speculative_age_index
++;
3307 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3308 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3309 if (speculative_age_index
== speculative_steal_index
) {
3310 speculative_steal_index
= speculative_age_index
+ 1;
3312 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3313 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3315 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3317 if (!queue_empty(&aq
->age_q
))
3318 vm_page_speculate_ageit(aq
);
3320 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3321 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3323 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3326 enqueue_tail(&aq
->age_q
, &m
->pageq
);
3327 m
->speculative
= TRUE
;
3328 vm_page_speculative_count
++;
3329 if (m
->object
->internal
) {
3330 vm_page_pageable_internal_count
++;
3332 vm_page_pageable_external_count
++;
3336 vm_object_lock_assert_exclusive(m
->object
);
3338 m
->object
->pages_created
++;
3339 #if DEVELOPMENT || DEBUG
3340 vm_page_speculative_created
++;
3349 * move pages from the specified aging bin to
3350 * the speculative bin that pageout_scan claims from
3352 * The page queues must be locked.
3355 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
3357 struct vm_speculative_age_q
*sq
;
3360 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
3362 if (queue_empty(&sq
->age_q
)) {
3363 sq
->age_q
.next
= aq
->age_q
.next
;
3364 sq
->age_q
.prev
= aq
->age_q
.prev
;
3366 t
= (vm_page_t
)sq
->age_q
.next
;
3367 t
->pageq
.prev
= &sq
->age_q
;
3369 t
= (vm_page_t
)sq
->age_q
.prev
;
3370 t
->pageq
.next
= &sq
->age_q
;
3372 t
= (vm_page_t
)sq
->age_q
.prev
;
3373 t
->pageq
.next
= aq
->age_q
.next
;
3375 t
= (vm_page_t
)aq
->age_q
.next
;
3376 t
->pageq
.prev
= sq
->age_q
.prev
;
3378 t
= (vm_page_t
)aq
->age_q
.prev
;
3379 t
->pageq
.next
= &sq
->age_q
;
3381 sq
->age_q
.prev
= aq
->age_q
.prev
;
3383 queue_init(&aq
->age_q
);
3392 assert(m
->object
!= kernel_object
);
3393 assert(m
->phys_page
!= vm_page_guard_addr
);
3396 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3399 * if this page is currently on the pageout queue, we can't do the
3400 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3401 * and we can't remove it manually since we would need the object lock
3402 * (which is not required here) to decrement the activity_in_progress
3403 * reference which is held on the object while the page is in the pageout queue...
3404 * just let the normal laundry processing proceed
3406 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->compressor
|| (VM_PAGE_WIRED(m
)))
3409 m
->no_cache
= FALSE
;
3411 VM_PAGE_QUEUES_REMOVE(m
);
3413 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
3418 vm_page_reactivate_all_throttled(void)
3420 vm_page_t first_throttled
, last_throttled
;
3421 vm_page_t first_active
;
3423 int extra_active_count
;
3424 int extra_internal_count
, extra_external_count
;
3426 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
))
3429 extra_active_count
= 0;
3430 extra_internal_count
= 0;
3431 extra_external_count
= 0;
3432 vm_page_lock_queues();
3433 if (! queue_empty(&vm_page_queue_throttled
)) {
3435 * Switch "throttled" pages to "active".
3437 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3439 assert(m
->throttled
);
3441 assert(!m
->inactive
);
3442 assert(!m
->speculative
);
3443 assert(!VM_PAGE_WIRED(m
));
3445 extra_active_count
++;
3446 if (m
->object
->internal
) {
3447 extra_internal_count
++;
3449 extra_external_count
++;
3452 m
->throttled
= FALSE
;
3458 * Transfer the entire throttled queue to a regular LRU page queues.
3459 * We insert it at the head of the active queue, so that these pages
3460 * get re-evaluated by the LRU algorithm first, since they've been
3461 * completely out of it until now.
3463 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3464 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3465 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3466 if (queue_empty(&vm_page_queue_active
)) {
3467 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3469 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3471 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3472 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3473 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3476 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3478 queue_init(&vm_page_queue_throttled
);
3480 * Adjust the global page counts.
3482 vm_page_active_count
+= extra_active_count
;
3483 vm_page_pageable_internal_count
+= extra_internal_count
;
3484 vm_page_pageable_external_count
+= extra_external_count
;
3485 vm_page_throttled_count
= 0;
3487 assert(vm_page_throttled_count
== 0);
3488 assert(queue_empty(&vm_page_queue_throttled
));
3489 vm_page_unlock_queues();
3494 * move pages from the indicated local queue to the global active queue
3495 * its ok to fail if we're below the hard limit and force == FALSE
3496 * the nolocks == TRUE case is to allow this function to be run on
3497 * the hibernate path
3501 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3504 vm_page_t first_local
, last_local
;
3505 vm_page_t first_active
;
3509 if (vm_page_local_q
== NULL
)
3512 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3514 if (nolocks
== FALSE
) {
3515 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3516 if ( !vm_page_trylockspin_queues())
3519 vm_page_lockspin_queues();
3521 VPL_LOCK(&lq
->vpl_lock
);
3523 if (lq
->vpl_count
) {
3525 * Switch "local" pages to "active".
3527 assert(!queue_empty(&lq
->vpl_queue
));
3529 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3533 assert(!m
->inactive
);
3534 assert(!m
->speculative
);
3535 assert(!VM_PAGE_WIRED(m
));
3536 assert(!m
->throttled
);
3537 assert(!m
->fictitious
);
3539 if (m
->local_id
!= lid
)
3540 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3549 if (count
!= lq
->vpl_count
)
3550 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3553 * Transfer the entire local queue to a regular LRU page queues.
3555 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3556 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3557 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3559 if (queue_empty(&vm_page_queue_active
)) {
3560 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3562 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3564 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3565 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3566 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3568 queue_init(&lq
->vpl_queue
);
3570 * Adjust the global page counts.
3572 vm_page_active_count
+= lq
->vpl_count
;
3573 vm_page_pageable_internal_count
+= lq
->vpl_internal_count
;
3574 vm_page_pageable_external_count
+= lq
->vpl_external_count
;
3576 lq
->vpl_internal_count
= 0;
3577 lq
->vpl_external_count
= 0;
3579 assert(queue_empty(&lq
->vpl_queue
));
3581 if (nolocks
== FALSE
) {
3582 VPL_UNLOCK(&lq
->vpl_lock
);
3583 vm_page_unlock_queues();
3588 * vm_page_part_zero_fill:
3590 * Zero-fill a part of the page.
3592 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3594 vm_page_part_zero_fill(
3602 * we don't hold the page queue lock
3603 * so this check isn't safe to make
3608 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3609 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3613 tmp
= vm_page_grab();
3614 if (tmp
== VM_PAGE_NULL
) {
3615 vm_page_wait(THREAD_UNINT
);
3620 vm_page_zero_fill(tmp
);
3622 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3624 if((m_pa
+ len
) < PAGE_SIZE
) {
3625 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3626 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3628 vm_page_copy(tmp
,m
);
3635 * vm_page_zero_fill:
3637 * Zero-fill the specified page.
3644 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3645 m
->object
, m
->offset
, m
, 0,0);
3648 * we don't hold the page queue lock
3649 * so this check isn't safe to make
3654 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3655 pmap_zero_page(m
->phys_page
);
3659 * vm_page_part_copy:
3661 * copy part of one page to another
3674 * we don't hold the page queue lock
3675 * so this check isn't safe to make
3677 VM_PAGE_CHECK(src_m
);
3678 VM_PAGE_CHECK(dst_m
);
3680 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3681 dst_m
->phys_page
, dst_pa
, len
);
3687 * Copy one page to another
3690 * The source page should not be encrypted. The caller should
3691 * make sure the page is decrypted first, if necessary.
3694 int vm_page_copy_cs_validations
= 0;
3695 int vm_page_copy_cs_tainted
= 0;
3703 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3704 src_m
->object
, src_m
->offset
,
3705 dest_m
->object
, dest_m
->offset
,
3709 * we don't hold the page queue lock
3710 * so this check isn't safe to make
3712 VM_PAGE_CHECK(src_m
);
3713 VM_PAGE_CHECK(dest_m
);
3715 vm_object_lock_assert_held(src_m
->object
);
3719 * The source page should not be encrypted at this point.
3720 * The destination page will therefore not contain encrypted
3721 * data after the copy.
3723 if (src_m
->encrypted
) {
3724 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3726 dest_m
->encrypted
= FALSE
;
3728 if (src_m
->object
!= VM_OBJECT_NULL
&&
3729 src_m
->object
->code_signed
) {
3731 * We're copying a page from a code-signed object.
3732 * Whoever ends up mapping the copy page might care about
3733 * the original page's integrity, so let's validate the
3736 vm_page_copy_cs_validations
++;
3737 vm_page_validate_cs(src_m
);
3740 if (vm_page_is_slideable(src_m
)) {
3741 boolean_t was_busy
= src_m
->busy
;
3743 (void) vm_page_slide(src_m
, 0);
3744 assert(src_m
->busy
);
3746 PAGE_WAKEUP_DONE(src_m
);
3751 * Propagate the cs_tainted bit to the copy page. Do not propagate
3752 * the cs_validated bit.
3754 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3755 if (dest_m
->cs_tainted
) {
3756 vm_page_copy_cs_tainted
++;
3758 dest_m
->slid
= src_m
->slid
;
3759 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
3760 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3768 printf("vm_page %p: \n", p
);
3769 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3770 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3771 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p
->next_m
));
3772 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3773 printf(" wire_count=%u\n", p
->wire_count
);
3775 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3776 (p
->local
? "" : "!"),
3777 (p
->inactive
? "" : "!"),
3778 (p
->active
? "" : "!"),
3779 (p
->pageout_queue
? "" : "!"),
3780 (p
->speculative
? "" : "!"),
3781 (p
->laundry
? "" : "!"));
3782 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3783 (p
->free
? "" : "!"),
3784 (p
->reference
? "" : "!"),
3785 (p
->gobbled
? "" : "!"),
3786 (p
->private ? "" : "!"),
3787 (p
->throttled
? "" : "!"));
3788 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3789 (p
->busy
? "" : "!"),
3790 (p
->wanted
? "" : "!"),
3791 (p
->tabled
? "" : "!"),
3792 (p
->fictitious
? "" : "!"),
3793 (p
->pmapped
? "" : "!"),
3794 (p
->wpmapped
? "" : "!"));
3795 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3796 (p
->pageout
? "" : "!"),
3797 (p
->absent
? "" : "!"),
3798 (p
->error
? "" : "!"),
3799 (p
->dirty
? "" : "!"),
3800 (p
->cleaning
? "" : "!"),
3801 (p
->precious
? "" : "!"),
3802 (p
->clustered
? "" : "!"));
3803 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3804 (p
->overwriting
? "" : "!"),
3805 (p
->restart
? "" : "!"),
3806 (p
->unusual
? "" : "!"),
3807 (p
->encrypted
? "" : "!"),
3808 (p
->encrypted_cleaning
? "" : "!"));
3809 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
3810 (p
->cs_validated
? "" : "!"),
3811 (p
->cs_tainted
? "" : "!"),
3812 (p
->cs_nx
? "" : "!"),
3813 (p
->no_cache
? "" : "!"));
3815 printf("phys_page=0x%x\n", p
->phys_page
);
3819 * Check that the list of pages is ordered by
3820 * ascending physical address and has no holes.
3823 vm_page_verify_contiguous(
3825 unsigned int npages
)
3827 register vm_page_t m
;
3828 unsigned int page_count
;
3829 vm_offset_t prev_addr
;
3831 prev_addr
= pages
->phys_page
;
3833 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3834 if (m
->phys_page
!= prev_addr
+ 1) {
3835 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3836 m
, (long)prev_addr
, m
->phys_page
);
3837 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
3838 panic("vm_page_verify_contiguous: not contiguous!");
3840 prev_addr
= m
->phys_page
;
3843 if (page_count
!= npages
) {
3844 printf("pages %p actual count 0x%x but requested 0x%x\n",
3845 pages
, page_count
, npages
);
3846 panic("vm_page_verify_contiguous: count error");
3853 * Check the free lists for proper length etc.
3855 static boolean_t vm_page_verify_this_free_list_enabled
= FALSE
;
3857 vm_page_verify_free_list(
3858 queue_head_t
*vm_page_queue
,
3860 vm_page_t look_for_page
,
3861 boolean_t expect_page
)
3863 unsigned int npages
;
3866 boolean_t found_page
;
3868 if (! vm_page_verify_this_free_list_enabled
)
3873 prev_m
= (vm_page_t
) vm_page_queue
;
3874 queue_iterate(vm_page_queue
,
3879 if (m
== look_for_page
) {
3882 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3883 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3884 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3886 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3888 if (color
!= (unsigned int) -1) {
3889 if ((m
->phys_page
& vm_color_mask
) != color
)
3890 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3891 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3893 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3899 if (look_for_page
!= VM_PAGE_NULL
) {
3900 unsigned int other_color
;
3902 if (expect_page
&& !found_page
) {
3903 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3904 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3905 _vm_page_print(look_for_page
);
3906 for (other_color
= 0;
3907 other_color
< vm_colors
;
3909 if (other_color
== color
)
3911 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
3912 other_color
, look_for_page
, FALSE
);
3914 if (color
== (unsigned int) -1) {
3915 vm_page_verify_free_list(&vm_lopage_queue_free
,
3916 (unsigned int) -1, look_for_page
, FALSE
);
3918 panic("vm_page_verify_free_list(color=%u)\n", color
);
3920 if (!expect_page
&& found_page
) {
3921 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3922 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3928 static boolean_t vm_page_verify_all_free_lists_enabled
= FALSE
;
3930 vm_page_verify_free_lists( void )
3932 unsigned int color
, npages
, nlopages
;
3933 boolean_t toggle
= TRUE
;
3935 if (! vm_page_verify_all_free_lists_enabled
)
3940 lck_mtx_lock(&vm_page_queue_free_lock
);
3942 if (vm_page_verify_this_free_list_enabled
== TRUE
) {
3944 * This variable has been set globally for extra checking of
3945 * each free list Q. Since we didn't set it, we don't own it
3946 * and we shouldn't toggle it.
3951 if (toggle
== TRUE
) {
3952 vm_page_verify_this_free_list_enabled
= TRUE
;
3955 for( color
= 0; color
< vm_colors
; color
++ ) {
3956 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
3957 color
, VM_PAGE_NULL
, FALSE
);
3959 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
3961 VM_PAGE_NULL
, FALSE
);
3962 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
3963 panic("vm_page_verify_free_lists: "
3964 "npages %u free_count %d nlopages %u lo_free_count %u",
3965 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
3967 if (toggle
== TRUE
) {
3968 vm_page_verify_this_free_list_enabled
= FALSE
;
3971 lck_mtx_unlock(&vm_page_queue_free_lock
);
3975 vm_page_queues_assert(
3980 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3982 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3983 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3984 _vm_page_print(mem
);
3985 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3987 if (VM_PAGE_WIRED(mem
)) {
3988 assert(!mem
->active
);
3989 assert(!mem
->inactive
);
3990 assert(!mem
->speculative
);
3991 assert(!mem
->throttled
);
3992 assert(!mem
->pageout_queue
);
3995 #endif /* MACH_ASSERT */
3999 * CONTIGUOUS PAGE ALLOCATION
4001 * Find a region large enough to contain at least n pages
4002 * of contiguous physical memory.
4004 * This is done by traversing the vm_page_t array in a linear fashion
4005 * we assume that the vm_page_t array has the avaiable physical pages in an
4006 * ordered, ascending list... this is currently true of all our implementations
4007 * and must remain so... there can be 'holes' in the array... we also can
4008 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4009 * which use to happen via 'vm_page_convert'... that function was no longer
4010 * being called and was removed...
4012 * The basic flow consists of stabilizing some of the interesting state of
4013 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4014 * sweep at the beginning of the array looking for pages that meet our criterea
4015 * for a 'stealable' page... currently we are pretty conservative... if the page
4016 * meets this criterea and is physically contiguous to the previous page in the 'run'
4017 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4018 * and start to develop a new run... if at this point we've already considered
4019 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4020 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4021 * to other threads trying to acquire free pages (or move pages from q to q),
4022 * and then continue from the spot we left off... we only make 1 pass through the
4023 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4024 * which steals the pages from the queues they're currently on... pages on the free
4025 * queue can be stolen directly... pages that are on any of the other queues
4026 * must be removed from the object they are tabled on... this requires taking the
4027 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4028 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4029 * dump the pages we've currently stolen back to the free list, and pick up our
4030 * scan from the point where we aborted the 'current' run.
4034 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4036 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4041 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4044 #define RESET_STATE_OF_RUN() \
4046 prevcontaddr = -2; \
4048 free_considered = 0; \
4049 substitute_needed = 0; \
4054 * Can we steal in-use (i.e. not free) pages when searching for
4055 * physically-contiguous pages ?
4057 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4059 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
4061 int vm_page_find_contig_debug
= 0;
4065 vm_page_find_contiguous(
4066 unsigned int contig_pages
,
4073 ppnum_t prevcontaddr
;
4075 unsigned int npages
, considered
, scanned
;
4076 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
4077 unsigned int idx_last_contig_page_found
= 0;
4078 int free_considered
, free_available
;
4079 int substitute_needed
;
4082 clock_sec_t tv_start_sec
, tv_end_sec
;
4083 clock_usec_t tv_start_usec
, tv_end_usec
;
4088 int stolen_pages
= 0;
4089 int compressed_pages
= 0;
4092 if (contig_pages
== 0)
4093 return VM_PAGE_NULL
;
4096 vm_page_verify_free_lists();
4099 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
4101 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4103 vm_page_lock_queues();
4104 lck_mtx_lock(&vm_page_queue_free_lock
);
4106 RESET_STATE_OF_RUN();
4110 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4114 if(flags
& KMA_LOMEM
)
4115 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
4117 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
4119 orig_last_idx
= idx_last_contig_page_found
;
4120 last_idx
= orig_last_idx
;
4122 for (page_idx
= last_idx
, start_idx
= last_idx
;
4123 npages
< contig_pages
&& page_idx
< vm_pages_count
;
4128 page_idx
>= orig_last_idx
) {
4130 * We're back where we started and we haven't
4131 * found any suitable contiguous range. Let's
4137 m
= &vm_pages
[page_idx
];
4139 assert(!m
->fictitious
);
4140 assert(!m
->private);
4142 if (max_pnum
&& m
->phys_page
> max_pnum
) {
4143 /* no more low pages... */
4146 if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
4150 RESET_STATE_OF_RUN();
4152 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
4153 m
->encrypted_cleaning
||
4154 m
->pageout_queue
|| m
->laundry
|| m
->wanted
||
4155 m
->cleaning
|| m
->overwriting
|| m
->pageout
) {
4157 * page is in a transient state
4158 * or a state we don't want to deal
4159 * with, so don't consider it which
4160 * means starting a new run
4162 RESET_STATE_OF_RUN();
4164 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
&& !m
->compressor
) {
4166 * page needs to be on one of our queues
4167 * or it needs to belong to the compressor pool
4168 * in order for it to be stable behind the
4169 * locks we hold at this point...
4170 * if not, don't consider it which
4171 * means starting a new run
4173 RESET_STATE_OF_RUN();
4175 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
4177 * pages on the free list are always 'busy'
4178 * so we couldn't test for 'busy' in the check
4179 * for the transient states... pages that are
4180 * 'free' are never 'tabled', so we also couldn't
4181 * test for 'tabled'. So we check here to make
4182 * sure that a non-free page is not busy and is
4183 * tabled on an object...
4184 * if not, don't consider it which
4185 * means starting a new run
4187 RESET_STATE_OF_RUN();
4190 if (m
->phys_page
!= prevcontaddr
+ 1) {
4191 if ((m
->phys_page
& pnum_mask
) != 0) {
4192 RESET_STATE_OF_RUN();
4196 start_idx
= page_idx
;
4197 start_pnum
= m
->phys_page
;
4202 prevcontaddr
= m
->phys_page
;
4209 * This page is not free.
4210 * If we can't steal used pages,
4211 * we have to give up this run
4213 * Otherwise, we might need to
4214 * move the contents of this page
4215 * into a substitute page.
4217 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4218 if (m
->pmapped
|| m
->dirty
|| m
->precious
) {
4219 substitute_needed
++;
4222 RESET_STATE_OF_RUN();
4226 if ((free_considered
+ substitute_needed
) > free_available
) {
4228 * if we let this run continue
4229 * we will end up dropping the vm_page_free_count
4230 * below the reserve limit... we need to abort
4231 * this run, but we can at least re-consider this
4232 * page... thus the jump back to 'retry'
4234 RESET_STATE_OF_RUN();
4236 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
4241 * free_available == 0
4242 * so can't consider any free pages... if
4243 * we went to retry in this case, we'd
4244 * get stuck looking at the same page
4245 * w/o making any forward progress
4246 * we also want to take this path if we've already
4247 * reached our limit that controls the lock latency
4252 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
4254 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4256 lck_mtx_unlock(&vm_page_queue_free_lock
);
4257 vm_page_unlock_queues();
4261 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4263 vm_page_lock_queues();
4264 lck_mtx_lock(&vm_page_queue_free_lock
);
4266 RESET_STATE_OF_RUN();
4268 * reset our free page limit since we
4269 * dropped the lock protecting the vm_page_free_queue
4271 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4282 if (npages
!= contig_pages
) {
4285 * We didn't find a contiguous range but we didn't
4286 * start from the very first page.
4287 * Start again from the very first page.
4289 RESET_STATE_OF_RUN();
4290 if( flags
& KMA_LOMEM
)
4291 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
4293 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
4295 page_idx
= last_idx
;
4299 lck_mtx_unlock(&vm_page_queue_free_lock
);
4303 unsigned int cur_idx
;
4304 unsigned int tmp_start_idx
;
4305 vm_object_t locked_object
= VM_OBJECT_NULL
;
4306 boolean_t abort_run
= FALSE
;
4308 assert(page_idx
- start_idx
== contig_pages
);
4310 tmp_start_idx
= start_idx
;
4313 * first pass through to pull the free pages
4314 * off of the free queue so that in case we
4315 * need substitute pages, we won't grab any
4316 * of the free pages in the run... we'll clear
4317 * the 'free' bit in the 2nd pass, and even in
4318 * an abort_run case, we'll collect all of the
4319 * free pages in this run and return them to the free list
4321 while (start_idx
< page_idx
) {
4323 m1
= &vm_pages
[start_idx
++];
4325 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4332 color
= m1
->phys_page
& vm_color_mask
;
4334 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, m1
, TRUE
);
4336 queue_remove(&vm_page_queue_free
[color
],
4340 m1
->pageq
.next
= NULL
;
4341 m1
->pageq
.prev
= NULL
;
4343 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, VM_PAGE_NULL
, FALSE
);
4346 * Clear the "free" bit so that this page
4347 * does not get considered for another
4348 * concurrent physically-contiguous allocation.
4353 vm_page_free_count
--;
4356 if( flags
& KMA_LOMEM
)
4357 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4359 vm_page_find_contiguous_last_idx
= page_idx
;
4362 * we can drop the free queue lock at this point since
4363 * we've pulled any 'free' candidates off of the list
4364 * we need it dropped so that we can do a vm_page_grab
4365 * when substituing for pmapped/dirty pages
4367 lck_mtx_unlock(&vm_page_queue_free_lock
);
4369 start_idx
= tmp_start_idx
;
4370 cur_idx
= page_idx
- 1;
4372 while (start_idx
++ < page_idx
) {
4374 * must go through the list from back to front
4375 * so that the page list is created in the
4376 * correct order - low -> high phys addresses
4378 m1
= &vm_pages
[cur_idx
--];
4382 if (m1
->object
== VM_OBJECT_NULL
) {
4384 * page has already been removed from
4385 * the free list in the 1st pass
4387 assert(m1
->offset
== (vm_object_offset_t
) -1);
4389 assert(!m1
->wanted
);
4390 assert(!m1
->laundry
);
4394 boolean_t disconnected
, reusable
;
4396 if (abort_run
== TRUE
)
4399 object
= m1
->object
;
4401 if (object
!= locked_object
) {
4402 if (locked_object
) {
4403 vm_object_unlock(locked_object
);
4404 locked_object
= VM_OBJECT_NULL
;
4406 if (vm_object_lock_try(object
))
4407 locked_object
= object
;
4409 if (locked_object
== VM_OBJECT_NULL
||
4410 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
4411 m1
->encrypted_cleaning
||
4412 m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
||
4413 m1
->cleaning
|| m1
->overwriting
|| m1
->pageout
|| m1
->busy
)) {
4415 if (locked_object
) {
4416 vm_object_unlock(locked_object
);
4417 locked_object
= VM_OBJECT_NULL
;
4419 tmp_start_idx
= cur_idx
;
4424 disconnected
= FALSE
;
4427 if ((m1
->reusable
||
4428 m1
->object
->all_reusable
) &&
4432 /* reusable page... */
4433 refmod
= pmap_disconnect(m1
->phys_page
);
4434 disconnected
= TRUE
;
4437 * ... not reused: can steal
4438 * without relocating contents.
4448 vm_object_offset_t offset
;
4450 m2
= vm_page_grab();
4452 if (m2
== VM_PAGE_NULL
) {
4453 if (locked_object
) {
4454 vm_object_unlock(locked_object
);
4455 locked_object
= VM_OBJECT_NULL
;
4457 tmp_start_idx
= cur_idx
;
4461 if (! disconnected
) {
4463 refmod
= pmap_disconnect(m1
->phys_page
);
4468 /* copy the page's contents */
4469 pmap_copy_page(m1
->phys_page
, m2
->phys_page
);
4470 /* copy the page's state */
4471 assert(!VM_PAGE_WIRED(m1
));
4473 assert(!m1
->pageout_queue
);
4474 assert(!m1
->laundry
);
4475 m2
->reference
= m1
->reference
;
4476 assert(!m1
->gobbled
);
4477 assert(!m1
->private);
4478 m2
->no_cache
= m1
->no_cache
;
4481 assert(!m1
->wanted
);
4482 assert(!m1
->fictitious
);
4483 m2
->pmapped
= m1
->pmapped
; /* should flush cache ? */
4484 m2
->wpmapped
= m1
->wpmapped
;
4485 assert(!m1
->pageout
);
4486 m2
->absent
= m1
->absent
;
4487 m2
->error
= m1
->error
;
4488 m2
->dirty
= m1
->dirty
;
4489 assert(!m1
->cleaning
);
4490 m2
->precious
= m1
->precious
;
4491 m2
->clustered
= m1
->clustered
;
4492 assert(!m1
->overwriting
);
4493 m2
->restart
= m1
->restart
;
4494 m2
->unusual
= m1
->unusual
;
4495 m2
->encrypted
= m1
->encrypted
;
4496 assert(!m1
->encrypted_cleaning
);
4497 m2
->cs_validated
= m1
->cs_validated
;
4498 m2
->cs_tainted
= m1
->cs_tainted
;
4499 m2
->cs_nx
= m1
->cs_nx
;
4502 * If m1 had really been reusable,
4503 * we would have just stolen it, so
4504 * let's not propagate it's "reusable"
4505 * bit and assert that m2 is not
4506 * marked as "reusable".
4508 // m2->reusable = m1->reusable;
4509 assert(!m2
->reusable
);
4511 assert(!m1
->lopage
);
4512 m2
->slid
= m1
->slid
;
4513 m2
->compressor
= m1
->compressor
;
4516 * page may need to be flushed if
4517 * it is marshalled into a UPL
4518 * that is going to be used by a device
4519 * that doesn't support coherency
4521 m2
->written_by_kernel
= TRUE
;
4524 * make sure we clear the ref/mod state
4525 * from the pmap layer... else we risk
4526 * inheriting state from the last time
4527 * this page was used...
4529 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
4531 if (refmod
& VM_MEM_REFERENCED
)
4532 m2
->reference
= TRUE
;
4533 if (refmod
& VM_MEM_MODIFIED
) {
4534 SET_PAGE_DIRTY(m2
, TRUE
);
4536 offset
= m1
->offset
;
4539 * completely cleans up the state
4540 * of the page so that it is ready
4541 * to be put onto the free list, or
4542 * for this purpose it looks like it
4543 * just came off of the free list
4545 vm_page_free_prepare(m1
);
4548 * now put the substitute page
4551 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
, FALSE
);
4553 if (m2
->compressor
) {
4555 m2
->wpmapped
= TRUE
;
4557 PMAP_ENTER(kernel_pmap
, m2
->offset
, m2
,
4558 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
);
4564 vm_page_activate(m2
);
4566 vm_page_deactivate(m2
);
4568 PAGE_WAKEUP_DONE(m2
);
4571 assert(!m1
->compressor
);
4574 * completely cleans up the state
4575 * of the page so that it is ready
4576 * to be put onto the free list, or
4577 * for this purpose it looks like it
4578 * just came off of the free list
4580 vm_page_free_prepare(m1
);
4586 m1
->pageq
.next
= (queue_entry_t
) m
;
4587 m1
->pageq
.prev
= NULL
;
4590 if (locked_object
) {
4591 vm_object_unlock(locked_object
);
4592 locked_object
= VM_OBJECT_NULL
;
4595 if (abort_run
== TRUE
) {
4596 if (m
!= VM_PAGE_NULL
) {
4597 vm_page_free_list(m
, FALSE
);
4603 * want the index of the last
4604 * page in this run that was
4605 * successfully 'stolen', so back
4606 * it up 1 for the auto-decrement on use
4607 * and 1 more to bump back over this page
4609 page_idx
= tmp_start_idx
+ 2;
4610 if (page_idx
>= vm_pages_count
) {
4613 page_idx
= last_idx
= 0;
4619 * We didn't find a contiguous range but we didn't
4620 * start from the very first page.
4621 * Start again from the very first page.
4623 RESET_STATE_OF_RUN();
4625 if( flags
& KMA_LOMEM
)
4626 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4628 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4630 last_idx
= page_idx
;
4632 lck_mtx_lock(&vm_page_queue_free_lock
);
4634 * reset our free page limit since we
4635 * dropped the lock protecting the vm_page_free_queue
4637 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4641 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4649 vm_page_gobble_count
+= npages
;
4652 * gobbled pages are also counted as wired pages
4654 vm_page_wire_count
+= npages
;
4656 assert(vm_page_verify_contiguous(m
, npages
));
4659 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4661 vm_page_unlock_queues();
4664 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4666 tv_end_sec
-= tv_start_sec
;
4667 if (tv_end_usec
< tv_start_usec
) {
4669 tv_end_usec
+= 1000000;
4671 tv_end_usec
-= tv_start_usec
;
4672 if (tv_end_usec
>= 1000000) {
4674 tv_end_sec
-= 1000000;
4676 if (vm_page_find_contig_debug
) {
4677 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4678 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4679 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4680 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
);
4685 vm_page_verify_free_lists();
4691 * Allocate a list of contiguous, wired pages.
4703 unsigned int npages
;
4705 if (size
% PAGE_SIZE
!= 0)
4706 return KERN_INVALID_ARGUMENT
;
4708 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4709 if (npages
!= size
/ PAGE_SIZE
) {
4710 /* 32-bit overflow */
4711 return KERN_INVALID_ARGUMENT
;
4715 * Obtain a pointer to a subset of the free
4716 * list large enough to satisfy the request;
4717 * the region will be physically contiguous.
4719 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4721 if (pages
== VM_PAGE_NULL
)
4722 return KERN_NO_SPACE
;
4724 * determine need for wakeups
4726 if ((vm_page_free_count
< vm_page_free_min
) ||
4727 ((vm_page_free_count
< vm_page_free_target
) &&
4728 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4729 thread_wakeup((event_t
) &vm_page_free_wanted
);
4731 VM_CHECK_MEMORYSTATUS
;
4734 * The CPM pages should now be available and
4735 * ordered by ascending physical address.
4737 assert(vm_page_verify_contiguous(pages
, npages
));
4740 return KERN_SUCCESS
;
4744 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
4747 * when working on a 'run' of pages, it is necessary to hold
4748 * the vm_page_queue_lock (a hot global lock) for certain operations
4749 * on the page... however, the majority of the work can be done
4750 * while merely holding the object lock... in fact there are certain
4751 * collections of pages that don't require any work brokered by the
4752 * vm_page_queue_lock... to mitigate the time spent behind the global
4753 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4754 * while doing all of the work that doesn't require the vm_page_queue_lock...
4755 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4756 * necessary work for each page... we will grab the busy bit on the page
4757 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4758 * if it can't immediately take the vm_page_queue_lock in order to compete
4759 * for the locks in the same order that vm_pageout_scan takes them.
4760 * the operation names are modeled after the names of the routines that
4761 * need to be called in order to make the changes very obvious in the
4766 vm_page_do_delayed_work(
4768 struct vm_page_delayed_work
*dwp
,
4773 vm_page_t local_free_q
= VM_PAGE_NULL
;
4776 * pageout_scan takes the vm_page_lock_queues first
4777 * then tries for the object lock... to avoid what
4778 * is effectively a lock inversion, we'll go to the
4779 * trouble of taking them in that same order... otherwise
4780 * if this object contains the majority of the pages resident
4781 * in the UBC (or a small set of large objects actively being
4782 * worked on contain the majority of the pages), we could
4783 * cause the pageout_scan thread to 'starve' in its attempt
4784 * to find pages to move to the free queue, since it has to
4785 * successfully acquire the object lock of any candidate page
4786 * before it can steal/clean it.
4788 if (!vm_page_trylockspin_queues()) {
4789 vm_object_unlock(object
);
4791 vm_page_lockspin_queues();
4793 for (j
= 0; ; j
++) {
4794 if (!vm_object_lock_avoid(object
) &&
4795 _vm_object_lock_try(object
))
4797 vm_page_unlock_queues();
4799 vm_page_lockspin_queues();
4802 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
4806 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
4807 vm_pageout_throttle_up(m
);
4808 #if CONFIG_PHANTOM_CACHE
4809 if (dwp
->dw_mask
& DW_vm_phantom_cache_update
)
4810 vm_phantom_cache_update(m
);
4812 if (dwp
->dw_mask
& DW_vm_page_wire
)
4814 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
4817 queueit
= (dwp
->dw_mask
& (DW_vm_page_free
| DW_vm_page_deactivate_internal
)) ? FALSE
: TRUE
;
4819 vm_page_unwire(m
, queueit
);
4821 if (dwp
->dw_mask
& DW_vm_page_free
) {
4822 vm_page_free_prepare_queues(m
);
4824 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
4826 * Add this page to our list of reclaimed pages,
4827 * to be freed later.
4829 m
->pageq
.next
= (queue_entry_t
) local_free_q
;
4832 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
4833 vm_page_deactivate_internal(m
, FALSE
);
4834 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
4835 if (m
->active
== FALSE
) {
4836 vm_page_activate(m
);
4839 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
4840 vm_page_speculate(m
, TRUE
);
4841 else if (dwp
->dw_mask
& DW_enqueue_cleaned
) {
4843 * if we didn't hold the object lock and did this,
4844 * we might disconnect the page, then someone might
4845 * soft fault it back in, then we would put it on the
4846 * cleaned queue, and so we would have a referenced (maybe even dirty)
4847 * page on that queue, which we don't want
4849 int refmod_state
= pmap_disconnect(m
->phys_page
);
4851 if ((refmod_state
& VM_MEM_REFERENCED
)) {
4853 * this page has been touched since it got cleaned; let's activate it
4854 * if it hasn't already been
4856 vm_pageout_enqueued_cleaned
++;
4857 vm_pageout_cleaned_reactivated
++;
4858 vm_pageout_cleaned_commit_reactivated
++;
4860 if (m
->active
== FALSE
)
4861 vm_page_activate(m
);
4863 m
->reference
= FALSE
;
4864 vm_page_enqueue_cleaned(m
);
4867 else if (dwp
->dw_mask
& DW_vm_page_lru
)
4869 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
) {
4870 if ( !m
->pageout_queue
)
4871 VM_PAGE_QUEUES_REMOVE(m
);
4873 if (dwp
->dw_mask
& DW_set_reference
)
4874 m
->reference
= TRUE
;
4875 else if (dwp
->dw_mask
& DW_clear_reference
)
4876 m
->reference
= FALSE
;
4878 if (dwp
->dw_mask
& DW_move_page
) {
4879 if ( !m
->pageout_queue
) {
4880 VM_PAGE_QUEUES_REMOVE(m
);
4882 assert(m
->object
!= kernel_object
);
4884 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
4887 if (dwp
->dw_mask
& DW_clear_busy
)
4890 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
4894 vm_page_unlock_queues();
4897 vm_page_free_list(local_free_q
, TRUE
);
4899 VM_CHECK_MEMORYSTATUS
;
4909 vm_page_t lo_page_list
= VM_PAGE_NULL
;
4913 if ( !(flags
& KMA_LOMEM
))
4914 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4916 for (i
= 0; i
< page_count
; i
++) {
4918 mem
= vm_page_grablo();
4920 if (mem
== VM_PAGE_NULL
) {
4922 vm_page_free_list(lo_page_list
, FALSE
);
4924 *list
= VM_PAGE_NULL
;
4926 return (KERN_RESOURCE_SHORTAGE
);
4928 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
4931 *list
= lo_page_list
;
4933 return (KERN_SUCCESS
);
4937 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
4939 page
->offset
= offset
;
4943 vm_page_get_next(vm_page_t page
)
4945 return ((vm_page_t
) page
->pageq
.next
);
4949 vm_page_get_offset(vm_page_t page
)
4951 return (page
->offset
);
4955 vm_page_get_phys_page(vm_page_t page
)
4957 return (page
->phys_page
);
4961 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4965 static vm_page_t hibernate_gobble_queue
;
4967 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4969 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
4970 static int hibernate_flush_dirty_pages(int);
4971 static int hibernate_flush_queue(queue_head_t
*, int);
4973 void hibernate_flush_wait(void);
4974 void hibernate_mark_in_progress(void);
4975 void hibernate_clear_in_progress(void);
4977 void hibernate_free_range(int, int);
4978 void hibernate_hash_insert_page(vm_page_t
);
4979 uint32_t hibernate_mark_as_unneeded(addr64_t
, addr64_t
, hibernate_page_list_t
*, hibernate_page_list_t
*);
4980 void hibernate_rebuild_vm_structs(void);
4981 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t
*, hibernate_page_list_t
*);
4982 ppnum_t
hibernate_lookup_paddr(unsigned int);
4984 struct hibernate_statistics
{
4985 int hibernate_considered
;
4986 int hibernate_reentered_on_q
;
4987 int hibernate_found_dirty
;
4988 int hibernate_skipped_cleaning
;
4989 int hibernate_skipped_transient
;
4990 int hibernate_skipped_precious
;
4991 int hibernate_skipped_external
;
4992 int hibernate_queue_nolock
;
4993 int hibernate_queue_paused
;
4994 int hibernate_throttled
;
4995 int hibernate_throttle_timeout
;
4996 int hibernate_drained
;
4997 int hibernate_drain_timeout
;
4999 int cd_found_precious
;
5002 int cd_found_unusual
;
5003 int cd_found_cleaning
;
5004 int cd_found_laundry
;
5006 int cd_found_xpmapped
;
5007 int cd_skipped_xpmapped
;
5010 int cd_vm_page_wire_count
;
5011 int cd_vm_struct_pages_unneeded
;
5019 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5020 * so that we don't overrun the estimated image size, which would
5021 * result in a hibernation failure.
5023 #define HIBERNATE_XPMAPPED_LIMIT 40000
5027 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
5029 wait_result_t wait_result
;
5031 vm_page_lock_queues();
5033 while ( !queue_empty(&q
->pgo_pending
) ) {
5035 q
->pgo_draining
= TRUE
;
5037 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
5039 vm_page_unlock_queues();
5041 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
5043 if (wait_result
== THREAD_TIMED_OUT
&& !queue_empty(&q
->pgo_pending
)) {
5044 hibernate_stats
.hibernate_drain_timeout
++;
5046 if (q
== &vm_pageout_queue_external
)
5051 vm_page_lock_queues();
5053 hibernate_stats
.hibernate_drained
++;
5055 vm_page_unlock_queues();
5061 boolean_t hibernate_skip_external
= FALSE
;
5064 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
5067 vm_object_t l_object
= NULL
;
5068 vm_object_t m_object
= NULL
;
5069 int refmod_state
= 0;
5070 int try_failed_count
= 0;
5072 int current_run
= 0;
5073 struct vm_pageout_queue
*iq
;
5074 struct vm_pageout_queue
*eq
;
5075 struct vm_pageout_queue
*tq
;
5078 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
5080 iq
= &vm_pageout_queue_internal
;
5081 eq
= &vm_pageout_queue_external
;
5083 vm_page_lock_queues();
5085 while (qcount
&& !queue_empty(q
)) {
5087 if (current_run
++ == 1000) {
5088 if (hibernate_should_abort()) {
5095 m
= (vm_page_t
) queue_first(q
);
5096 m_object
= m
->object
;
5099 * check to see if we currently are working
5100 * with the same object... if so, we've
5101 * already got the lock
5103 if (m_object
!= l_object
) {
5105 * the object associated with candidate page is
5106 * different from the one we were just working
5107 * with... dump the lock if we still own it
5109 if (l_object
!= NULL
) {
5110 vm_object_unlock(l_object
);
5114 * Try to lock object; since we've alread got the
5115 * page queues lock, we can only 'try' for this one.
5116 * if the 'try' fails, we need to do a mutex_pause
5117 * to allow the owner of the object lock a chance to
5120 if ( !vm_object_lock_try_scan(m_object
)) {
5122 if (try_failed_count
> 20) {
5123 hibernate_stats
.hibernate_queue_nolock
++;
5125 goto reenter_pg_on_q
;
5128 vm_page_unlock_queues();
5129 mutex_pause(try_failed_count
++);
5130 vm_page_lock_queues();
5132 hibernate_stats
.hibernate_queue_paused
++;
5135 l_object
= m_object
;
5138 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
) {
5140 * page is not to be cleaned
5141 * put it back on the head of its queue
5144 hibernate_stats
.hibernate_skipped_cleaning
++;
5146 hibernate_stats
.hibernate_skipped_transient
++;
5148 goto reenter_pg_on_q
;
5150 if (m_object
->copy
== VM_OBJECT_NULL
) {
5151 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
5153 * let the normal hibernate image path
5156 goto reenter_pg_on_q
;
5159 if ( !m
->dirty
&& m
->pmapped
) {
5160 refmod_state
= pmap_get_refmod(m
->phys_page
);
5162 if ((refmod_state
& VM_MEM_MODIFIED
)) {
5163 SET_PAGE_DIRTY(m
, FALSE
);
5170 * page is not to be cleaned
5171 * put it back on the head of its queue
5174 hibernate_stats
.hibernate_skipped_precious
++;
5176 goto reenter_pg_on_q
;
5179 if (hibernate_skip_external
== TRUE
&& !m_object
->internal
) {
5181 hibernate_stats
.hibernate_skipped_external
++;
5183 goto reenter_pg_on_q
;
5187 if (m_object
->internal
) {
5188 if (VM_PAGE_Q_THROTTLED(iq
))
5190 } else if (VM_PAGE_Q_THROTTLED(eq
))
5194 wait_result_t wait_result
;
5197 if (l_object
!= NULL
) {
5198 vm_object_unlock(l_object
);
5202 while (retval
== 0) {
5204 tq
->pgo_throttled
= TRUE
;
5206 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
5208 vm_page_unlock_queues();
5210 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
5212 vm_page_lock_queues();
5214 if (wait_result
!= THREAD_TIMED_OUT
)
5216 if (!VM_PAGE_Q_THROTTLED(tq
))
5219 if (hibernate_should_abort())
5222 if (--wait_count
== 0) {
5224 hibernate_stats
.hibernate_throttle_timeout
++;
5227 hibernate_skip_external
= TRUE
;
5236 hibernate_stats
.hibernate_throttled
++;
5241 * we've already factored out pages in the laundry which
5242 * means this page can't be on the pageout queue so it's
5243 * safe to do the VM_PAGE_QUEUES_REMOVE
5245 assert(!m
->pageout_queue
);
5247 VM_PAGE_QUEUES_REMOVE(m
);
5249 if (COMPRESSED_PAGER_IS_ACTIVE
&& m_object
->internal
== TRUE
)
5250 pmap_disconnect_options(m
->phys_page
, PMAP_OPTIONS_COMPRESSOR
, NULL
);
5252 vm_pageout_cluster(m
, FALSE
);
5254 hibernate_stats
.hibernate_found_dirty
++;
5259 queue_remove(q
, m
, vm_page_t
, pageq
);
5260 queue_enter(q
, m
, vm_page_t
, pageq
);
5262 hibernate_stats
.hibernate_reentered_on_q
++;
5264 hibernate_stats
.hibernate_considered
++;
5267 try_failed_count
= 0;
5269 if (l_object
!= NULL
) {
5270 vm_object_unlock(l_object
);
5274 vm_page_unlock_queues();
5276 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
5283 hibernate_flush_dirty_pages(int pass
)
5285 struct vm_speculative_age_q
*aq
;
5288 if (vm_page_local_q
) {
5289 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5290 vm_page_reactivate_local(i
, TRUE
, FALSE
);
5293 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
5297 aq
= &vm_page_queue_speculative
[i
];
5299 if (queue_empty(&aq
->age_q
))
5303 vm_page_lockspin_queues();
5305 queue_iterate(&aq
->age_q
,
5312 vm_page_unlock_queues();
5315 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
5319 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_page_anonymous_count
- vm_page_cleaned_count
))
5321 if (hibernate_flush_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
))
5323 if (hibernate_flush_queue(&vm_page_queue_cleaned
, vm_page_cleaned_count
))
5325 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
5328 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5329 vm_compressor_record_warmup_start();
5331 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
)) {
5332 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5333 vm_compressor_record_warmup_end();
5336 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
)) {
5337 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5338 vm_compressor_record_warmup_end();
5341 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5342 vm_compressor_record_warmup_end();
5344 if (hibernate_skip_external
== FALSE
&& hibernate_drain_pageout_queue(&vm_pageout_queue_external
))
5352 hibernate_reset_stats()
5354 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
5359 hibernate_flush_memory()
5363 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
5365 hibernate_cleaning_in_progress
= TRUE
;
5366 hibernate_skip_external
= FALSE
;
5368 if ((retval
= hibernate_flush_dirty_pages(1)) == 0) {
5370 if (COMPRESSED_PAGER_IS_ACTIVE
) {
5372 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_START
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5374 vm_compressor_flush();
5376 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_END
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5378 if (consider_buffer_cache_collect
!= NULL
) {
5379 unsigned int orig_wire_count
;
5381 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5382 orig_wire_count
= vm_page_wire_count
;
5384 (void)(*consider_buffer_cache_collect
)(1);
5385 consider_zone_gc(TRUE
);
5387 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count
- vm_page_wire_count
);
5389 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, orig_wire_count
- vm_page_wire_count
, 0, 0, 0, 0);
5392 hibernate_cleaning_in_progress
= FALSE
;
5394 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
5396 if (retval
&& COMPRESSED_PAGER_IS_ACTIVE
)
5397 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT
);
5400 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5401 hibernate_stats
.hibernate_considered
,
5402 hibernate_stats
.hibernate_reentered_on_q
,
5403 hibernate_stats
.hibernate_found_dirty
);
5404 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5405 hibernate_stats
.hibernate_skipped_cleaning
,
5406 hibernate_stats
.hibernate_skipped_transient
,
5407 hibernate_stats
.hibernate_skipped_precious
,
5408 hibernate_stats
.hibernate_skipped_external
,
5409 hibernate_stats
.hibernate_queue_nolock
);
5410 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5411 hibernate_stats
.hibernate_queue_paused
,
5412 hibernate_stats
.hibernate_throttled
,
5413 hibernate_stats
.hibernate_throttle_timeout
,
5414 hibernate_stats
.hibernate_drained
,
5415 hibernate_stats
.hibernate_drain_timeout
);
5422 hibernate_page_list_zero(hibernate_page_list_t
*list
)
5425 hibernate_bitmap_t
* bitmap
;
5427 bitmap
= &list
->bank_bitmap
[0];
5428 for (bank
= 0; bank
< list
->bank_count
; bank
++)
5432 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
5433 // set out-of-bound bits at end of bitmap.
5434 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
5436 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
5438 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5443 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
5447 uint64_t start
, end
, timeout
, nsec
;
5448 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
5449 clock_get_uptime(&start
);
5451 for (i
= 0; i
< gobble_count
; i
++)
5453 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
5455 clock_get_uptime(&end
);
5465 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
5466 hibernate_gobble_queue
= m
;
5469 clock_get_uptime(&end
);
5470 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5471 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
5475 hibernate_free_gobble_pages(void)
5480 m
= (vm_page_t
) hibernate_gobble_queue
;
5483 next
= (vm_page_t
) m
->pageq
.next
;
5488 hibernate_gobble_queue
= VM_PAGE_NULL
;
5491 HIBLOG("Freed %d pages\n", count
);
5495 hibernate_consider_discard(vm_page_t m
, boolean_t preflight
)
5497 vm_object_t object
= NULL
;
5499 boolean_t discard
= FALSE
;
5504 panic("hibernate_consider_discard: private");
5506 if (!vm_object_lock_try(m
->object
)) {
5507 if (!preflight
) hibernate_stats
.cd_lock_failed
++;
5512 if (VM_PAGE_WIRED(m
)) {
5513 if (!preflight
) hibernate_stats
.cd_found_wired
++;
5517 if (!preflight
) hibernate_stats
.cd_found_precious
++;
5520 if (m
->busy
|| !object
->alive
) {
5522 * Somebody is playing with this page.
5524 if (!preflight
) hibernate_stats
.cd_found_busy
++;
5527 if (m
->absent
|| m
->unusual
|| m
->error
) {
5529 * If it's unusual in anyway, ignore it
5531 if (!preflight
) hibernate_stats
.cd_found_unusual
++;
5535 if (!preflight
) hibernate_stats
.cd_found_cleaning
++;
5539 if (!preflight
) hibernate_stats
.cd_found_laundry
++;
5544 refmod_state
= pmap_get_refmod(m
->phys_page
);
5546 if (refmod_state
& VM_MEM_REFERENCED
)
5547 m
->reference
= TRUE
;
5548 if (refmod_state
& VM_MEM_MODIFIED
) {
5549 SET_PAGE_DIRTY(m
, FALSE
);
5554 * If it's clean or purgeable we can discard the page on wakeup.
5556 discard
= (!m
->dirty
)
5557 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
5558 || (VM_PURGABLE_EMPTY
== object
->purgable
);
5561 if (discard
== FALSE
) {
5563 hibernate_stats
.cd_found_dirty
++;
5564 } else if (m
->xpmapped
&& m
->reference
&& !object
->internal
) {
5565 if (hibernate_stats
.cd_found_xpmapped
< HIBERNATE_XPMAPPED_LIMIT
) {
5567 hibernate_stats
.cd_found_xpmapped
++;
5571 hibernate_stats
.cd_skipped_xpmapped
++;
5578 vm_object_unlock(object
);
5585 hibernate_discard_page(vm_page_t m
)
5587 if (m
->absent
|| m
->unusual
|| m
->error
)
5589 * If it's unusual in anyway, ignore
5593 #if MACH_ASSERT || DEBUG
5594 vm_object_t object
= m
->object
;
5595 if (!vm_object_lock_try(m
->object
))
5596 panic("hibernate_discard_page(%p) !vm_object_lock_try", m
);
5598 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5599 makes sure these locks are uncontended before sleep */
5600 #endif /* MACH_ASSERT || DEBUG */
5602 if (m
->pmapped
== TRUE
)
5604 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
5608 panic("hibernate_discard_page(%p) laundry", m
);
5610 panic("hibernate_discard_page(%p) private", m
);
5612 panic("hibernate_discard_page(%p) fictitious", m
);
5614 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
5616 /* object should be on a queue */
5617 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
5618 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
5620 if (m
->object
->purgeable_when_ripe
) {
5621 vm_purgeable_token_delete_first(old_queue
);
5623 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
5626 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5627 * accounted in the "volatile" ledger, so no change here.
5628 * We have to update vm_page_purgeable_count, though, since we're
5629 * effectively purging this object.
5632 assert(m
->object
->resident_page_count
>= m
->object
->wired_page_count
);
5633 delta
= (m
->object
->resident_page_count
- m
->object
->wired_page_count
);
5634 assert(vm_page_purgeable_count
>= delta
);
5636 OSAddAtomic(-delta
, (SInt32
*)&vm_page_purgeable_count
);
5641 #if MACH_ASSERT || DEBUG
5642 vm_object_unlock(object
);
5643 #endif /* MACH_ASSERT || DEBUG */
5647 Grab locks for hibernate_page_list_setall()
5650 hibernate_vm_lock_queues(void)
5652 vm_object_lock(compressor_object
);
5653 vm_page_lock_queues();
5654 lck_mtx_lock(&vm_page_queue_free_lock
);
5656 if (vm_page_local_q
) {
5658 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5660 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5661 VPL_LOCK(&lq
->vpl_lock
);
5667 hibernate_vm_unlock_queues(void)
5669 if (vm_page_local_q
) {
5671 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5673 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5674 VPL_UNLOCK(&lq
->vpl_lock
);
5677 lck_mtx_unlock(&vm_page_queue_free_lock
);
5678 vm_page_unlock_queues();
5679 vm_object_unlock(compressor_object
);
5683 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5684 pages known to VM to not need saving are subtracted.
5685 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5689 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
5690 hibernate_page_list_t
* page_list_wired
,
5691 hibernate_page_list_t
* page_list_pal
,
5692 boolean_t preflight
,
5693 boolean_t will_discard
,
5694 uint32_t * pagesOut
)
5696 uint64_t start
, end
, nsec
;
5699 uint32_t pages
= page_list
->page_count
;
5700 uint32_t count_anonymous
= 0, count_throttled
= 0, count_compressor
= 0;
5701 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0, count_cleaned
= 0;
5702 uint32_t count_wire
= pages
;
5703 uint32_t count_discard_active
= 0;
5704 uint32_t count_discard_inactive
= 0;
5705 uint32_t count_discard_cleaned
= 0;
5706 uint32_t count_discard_purgeable
= 0;
5707 uint32_t count_discard_speculative
= 0;
5708 uint32_t count_discard_vm_struct_pages
= 0;
5711 hibernate_bitmap_t
* bitmap
;
5712 hibernate_bitmap_t
* bitmap_wired
;
5713 boolean_t discard_all
;
5716 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight
, page_list
, page_list_wired
);
5720 page_list_wired
= NULL
;
5721 page_list_pal
= NULL
;
5722 discard_all
= FALSE
;
5724 discard_all
= will_discard
;
5727 #if MACH_ASSERT || DEBUG
5730 vm_page_lock_queues();
5731 if (vm_page_local_q
) {
5732 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5734 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5735 VPL_LOCK(&lq
->vpl_lock
);
5739 #endif /* MACH_ASSERT || DEBUG */
5742 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
5744 clock_get_uptime(&start
);
5747 hibernate_page_list_zero(page_list
);
5748 hibernate_page_list_zero(page_list_wired
);
5749 hibernate_page_list_zero(page_list_pal
);
5751 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
5752 hibernate_stats
.cd_pages
= pages
;
5755 if (vm_page_local_q
) {
5756 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5757 vm_page_reactivate_local(i
, TRUE
, !preflight
);
5761 vm_object_lock(compressor_object
);
5762 vm_page_lock_queues();
5763 lck_mtx_lock(&vm_page_queue_free_lock
);
5766 m
= (vm_page_t
) hibernate_gobble_queue
;
5772 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5773 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5775 m
= (vm_page_t
) m
->pageq
.next
;
5778 if (!preflight
) for( i
= 0; i
< real_ncpus
; i
++ )
5780 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
5782 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
5786 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5787 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5789 hibernate_stats
.cd_local_free
++;
5790 hibernate_stats
.cd_total_free
++;
5795 for( i
= 0; i
< vm_colors
; i
++ )
5797 queue_iterate(&vm_page_queue_free
[i
],
5805 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5806 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5808 hibernate_stats
.cd_total_free
++;
5813 queue_iterate(&vm_lopage_queue_free
,
5821 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5822 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5824 hibernate_stats
.cd_total_free
++;
5828 m
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
5829 while (m
&& !queue_end(&vm_page_queue_throttled
, (queue_entry_t
)m
))
5831 next
= (vm_page_t
) m
->pageq
.next
;
5833 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5834 && hibernate_consider_discard(m
, preflight
))
5836 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5837 count_discard_inactive
++;
5838 discard
= discard_all
;
5843 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5845 if (discard
) hibernate_discard_page(m
);
5849 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
5850 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
5852 next
= (vm_page_t
) m
->pageq
.next
;
5854 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5855 && hibernate_consider_discard(m
, preflight
))
5857 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5859 count_discard_purgeable
++;
5861 count_discard_inactive
++;
5862 discard
= discard_all
;
5867 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5868 if (discard
) hibernate_discard_page(m
);
5872 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
5873 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
5875 next
= (vm_page_t
) m
->pageq
.next
;
5877 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5878 && hibernate_consider_discard(m
, preflight
))
5880 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5882 count_discard_purgeable
++;
5884 count_discard_cleaned
++;
5885 discard
= discard_all
;
5890 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5891 if (discard
) hibernate_discard_page(m
);
5895 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5896 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5898 next
= (vm_page_t
) m
->pageq
.next
;
5900 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
5901 && hibernate_consider_discard(m
, preflight
))
5903 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5905 count_discard_purgeable
++;
5907 count_discard_active
++;
5908 discard
= discard_all
;
5913 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5914 if (discard
) hibernate_discard_page(m
);
5918 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5919 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5921 next
= (vm_page_t
) m
->pageq
.next
;
5923 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5924 && hibernate_consider_discard(m
, preflight
))
5926 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5928 count_discard_purgeable
++;
5930 count_discard_inactive
++;
5931 discard
= discard_all
;
5936 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5937 if (discard
) hibernate_discard_page(m
);
5941 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5943 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5944 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5946 next
= (vm_page_t
) m
->pageq
.next
;
5948 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5949 && hibernate_consider_discard(m
, preflight
))
5951 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5952 count_discard_speculative
++;
5953 discard
= discard_all
;
5956 count_speculative
++;
5958 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5959 if (discard
) hibernate_discard_page(m
);
5964 queue_iterate(&compressor_object
->memq
, m
, vm_page_t
, listq
)
5968 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5971 if (preflight
== FALSE
&& discard_all
== TRUE
) {
5972 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5974 HIBLOG("hibernate_teardown started\n");
5975 count_discard_vm_struct_pages
= hibernate_teardown_vm_structs(page_list
, page_list_wired
);
5976 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages
);
5978 pages
-= count_discard_vm_struct_pages
;
5979 count_wire
-= count_discard_vm_struct_pages
;
5981 hibernate_stats
.cd_vm_struct_pages_unneeded
= count_discard_vm_struct_pages
;
5983 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
5987 // pull wired from hibernate_bitmap
5988 bitmap
= &page_list
->bank_bitmap
[0];
5989 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
5990 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
5992 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
5993 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
5994 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5995 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
5999 // machine dependent adjustments
6000 hibernate_page_list_setall_machine(page_list
, page_list_wired
, preflight
, &pages
);
6003 hibernate_stats
.cd_count_wire
= count_wire
;
6004 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+
6005 count_discard_speculative
+ count_discard_cleaned
+ count_discard_vm_struct_pages
;
6008 clock_get_uptime(&end
);
6009 absolutetime_to_nanoseconds(end
- start
, &nsec
);
6010 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
6012 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6013 pages
, count_wire
, count_active
, count_inactive
, count_cleaned
, count_speculative
, count_anonymous
, count_throttled
, count_compressor
, hibernate_stats
.cd_found_xpmapped
,
6014 discard_all
? "did" : "could",
6015 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
6017 if (hibernate_stats
.cd_skipped_xpmapped
)
6018 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats
.cd_skipped_xpmapped
);
6020 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
- count_discard_cleaned
;
6022 if (preflight
&& will_discard
) *pagesOut
-= count_compressor
+ count_throttled
+ count_anonymous
+ count_inactive
+ count_cleaned
+ count_speculative
+ count_active
;
6024 #if MACH_ASSERT || DEBUG
6027 if (vm_page_local_q
) {
6028 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6030 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6031 VPL_UNLOCK(&lq
->vpl_lock
);
6034 vm_page_unlock_queues();
6036 #endif /* MACH_ASSERT || DEBUG */
6039 lck_mtx_unlock(&vm_page_queue_free_lock
);
6040 vm_page_unlock_queues();
6041 vm_object_unlock(compressor_object
);
6044 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
6048 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
6050 uint64_t start
, end
, nsec
;
6054 uint32_t count_discard_active
= 0;
6055 uint32_t count_discard_inactive
= 0;
6056 uint32_t count_discard_purgeable
= 0;
6057 uint32_t count_discard_cleaned
= 0;
6058 uint32_t count_discard_speculative
= 0;
6061 #if MACH_ASSERT || DEBUG
6062 vm_page_lock_queues();
6063 if (vm_page_local_q
) {
6064 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6066 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6067 VPL_LOCK(&lq
->vpl_lock
);
6070 #endif /* MACH_ASSERT || DEBUG */
6072 clock_get_uptime(&start
);
6074 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
6075 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
6077 next
= (vm_page_t
) m
->pageq
.next
;
6078 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6081 count_discard_purgeable
++;
6083 count_discard_inactive
++;
6084 hibernate_discard_page(m
);
6089 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
6091 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
6092 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
6094 next
= (vm_page_t
) m
->pageq
.next
;
6095 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6097 count_discard_speculative
++;
6098 hibernate_discard_page(m
);
6104 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
6105 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
6107 next
= (vm_page_t
) m
->pageq
.next
;
6108 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6111 count_discard_purgeable
++;
6113 count_discard_inactive
++;
6114 hibernate_discard_page(m
);
6119 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
6120 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
6122 next
= (vm_page_t
) m
->pageq
.next
;
6123 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6126 count_discard_purgeable
++;
6128 count_discard_active
++;
6129 hibernate_discard_page(m
);
6134 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
6135 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
6137 next
= (vm_page_t
) m
->pageq
.next
;
6138 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6141 count_discard_purgeable
++;
6143 count_discard_cleaned
++;
6144 hibernate_discard_page(m
);
6149 #if MACH_ASSERT || DEBUG
6150 if (vm_page_local_q
) {
6151 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6153 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6154 VPL_UNLOCK(&lq
->vpl_lock
);
6157 vm_page_unlock_queues();
6158 #endif /* MACH_ASSERT || DEBUG */
6160 clock_get_uptime(&end
);
6161 absolutetime_to_nanoseconds(end
- start
, &nsec
);
6162 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6164 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
6167 boolean_t hibernate_paddr_map_inited
= FALSE
;
6168 boolean_t hibernate_rebuild_needed
= FALSE
;
6169 unsigned int hibernate_teardown_last_valid_compact_indx
= -1;
6170 vm_page_t hibernate_rebuild_hash_list
= NULL
;
6172 unsigned int hibernate_teardown_found_tabled_pages
= 0;
6173 unsigned int hibernate_teardown_found_created_pages
= 0;
6174 unsigned int hibernate_teardown_found_free_pages
= 0;
6175 unsigned int hibernate_teardown_vm_page_free_count
;
6178 struct ppnum_mapping
{
6179 struct ppnum_mapping
*ppnm_next
;
6180 ppnum_t ppnm_base_paddr
;
6181 unsigned int ppnm_sindx
;
6182 unsigned int ppnm_eindx
;
6185 struct ppnum_mapping
*ppnm_head
;
6186 struct ppnum_mapping
*ppnm_last_found
= NULL
;
6190 hibernate_create_paddr_map()
6193 ppnum_t next_ppnum_in_run
= 0;
6194 struct ppnum_mapping
*ppnm
= NULL
;
6196 if (hibernate_paddr_map_inited
== FALSE
) {
6198 for (i
= 0; i
< vm_pages_count
; i
++) {
6201 ppnm
->ppnm_eindx
= i
;
6203 if (ppnm
== NULL
|| vm_pages
[i
].phys_page
!= next_ppnum_in_run
) {
6205 ppnm
= kalloc(sizeof(struct ppnum_mapping
));
6207 ppnm
->ppnm_next
= ppnm_head
;
6210 ppnm
->ppnm_sindx
= i
;
6211 ppnm
->ppnm_base_paddr
= vm_pages
[i
].phys_page
;
6213 next_ppnum_in_run
= vm_pages
[i
].phys_page
+ 1;
6217 hibernate_paddr_map_inited
= TRUE
;
6222 hibernate_lookup_paddr(unsigned int indx
)
6224 struct ppnum_mapping
*ppnm
= NULL
;
6226 ppnm
= ppnm_last_found
;
6229 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
)
6232 for (ppnm
= ppnm_head
; ppnm
; ppnm
= ppnm
->ppnm_next
) {
6234 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
) {
6235 ppnm_last_found
= ppnm
;
6240 panic("hibernate_lookup_paddr of %d failed\n", indx
);
6242 return (ppnm
->ppnm_base_paddr
+ (indx
- ppnm
->ppnm_sindx
));
6247 hibernate_mark_as_unneeded(addr64_t saddr
, addr64_t eaddr
, hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6249 addr64_t saddr_aligned
;
6250 addr64_t eaddr_aligned
;
6253 unsigned int mark_as_unneeded_pages
= 0;
6255 saddr_aligned
= (saddr
+ PAGE_MASK_64
) & ~PAGE_MASK_64
;
6256 eaddr_aligned
= eaddr
& ~PAGE_MASK_64
;
6258 for (addr
= saddr_aligned
; addr
< eaddr_aligned
; addr
+= PAGE_SIZE_64
) {
6260 paddr
= pmap_find_phys(kernel_pmap
, addr
);
6264 hibernate_page_bitset(page_list
, TRUE
, paddr
);
6265 hibernate_page_bitset(page_list_wired
, TRUE
, paddr
);
6267 mark_as_unneeded_pages
++;
6269 return (mark_as_unneeded_pages
);
6274 hibernate_hash_insert_page(vm_page_t mem
)
6276 vm_page_bucket_t
*bucket
;
6279 assert(mem
->hashed
);
6280 assert(mem
->object
);
6281 assert(mem
->offset
!= (vm_object_offset_t
) -1);
6284 * Insert it into the object_object/offset hash table
6286 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
6287 bucket
= &vm_page_buckets
[hash_id
];
6289 mem
->next_m
= bucket
->page_list
;
6290 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
6295 hibernate_free_range(int sindx
, int eindx
)
6300 while (sindx
< eindx
) {
6301 mem
= &vm_pages
[sindx
];
6303 vm_page_init(mem
, hibernate_lookup_paddr(sindx
), FALSE
);
6305 mem
->lopage
= FALSE
;
6308 color
= mem
->phys_page
& vm_color_mask
;
6309 queue_enter_first(&vm_page_queue_free
[color
],
6313 vm_page_free_count
++;
6320 extern void hibernate_rebuild_pmap_structs(void);
6323 hibernate_rebuild_vm_structs(void)
6325 int cindx
, sindx
, eindx
;
6326 vm_page_t mem
, tmem
, mem_next
;
6327 AbsoluteTime startTime
, endTime
;
6330 if (hibernate_rebuild_needed
== FALSE
)
6333 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
6334 HIBLOG("hibernate_rebuild started\n");
6336 clock_get_uptime(&startTime
);
6338 hibernate_rebuild_pmap_structs();
6340 bzero(&vm_page_buckets
[0], vm_page_bucket_count
* sizeof(vm_page_bucket_t
));
6341 eindx
= vm_pages_count
;
6343 for (cindx
= hibernate_teardown_last_valid_compact_indx
; cindx
>= 0; cindx
--) {
6345 mem
= &vm_pages
[cindx
];
6347 * hibernate_teardown_vm_structs leaves the location where
6348 * this vm_page_t must be located in "next".
6350 tmem
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6351 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
6353 sindx
= (int)(tmem
- &vm_pages
[0]);
6357 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6358 * so move it back to its real location
6364 hibernate_hash_insert_page(mem
);
6366 * the 'hole' between this vm_page_t and the previous
6367 * vm_page_t we moved needs to be initialized as
6368 * a range of free vm_page_t's
6370 hibernate_free_range(sindx
+ 1, eindx
);
6375 hibernate_free_range(0, sindx
);
6377 assert(vm_page_free_count
== hibernate_teardown_vm_page_free_count
);
6380 * process the list of vm_page_t's that were entered in the hash,
6381 * but were not located in the vm_pages arrary... these are
6382 * vm_page_t's that were created on the fly (i.e. fictitious)
6384 for (mem
= hibernate_rebuild_hash_list
; mem
; mem
= mem_next
) {
6385 mem_next
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6387 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
6388 hibernate_hash_insert_page(mem
);
6390 hibernate_rebuild_hash_list
= NULL
;
6392 clock_get_uptime(&endTime
);
6393 SUB_ABSOLUTETIME(&endTime
, &startTime
);
6394 absolutetime_to_nanoseconds(endTime
, &nsec
);
6396 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec
/ 1000000ULL);
6398 hibernate_rebuild_needed
= FALSE
;
6400 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
6404 extern void hibernate_teardown_pmap_structs(addr64_t
*, addr64_t
*);
6407 hibernate_teardown_vm_structs(hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6410 unsigned int compact_target_indx
;
6411 vm_page_t mem
, mem_next
;
6412 vm_page_bucket_t
*bucket
;
6413 unsigned int mark_as_unneeded_pages
= 0;
6414 unsigned int unneeded_vm_page_bucket_pages
= 0;
6415 unsigned int unneeded_vm_pages_pages
= 0;
6416 unsigned int unneeded_pmap_pages
= 0;
6417 addr64_t start_of_unneeded
= 0;
6418 addr64_t end_of_unneeded
= 0;
6421 if (hibernate_should_abort())
6424 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6425 vm_page_wire_count
, vm_page_free_count
, vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
,
6426 vm_page_cleaned_count
, compressor_object
->resident_page_count
);
6428 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6430 bucket
= &vm_page_buckets
[i
];
6432 for (mem
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); mem
!= VM_PAGE_NULL
; mem
= mem_next
) {
6433 assert(mem
->hashed
);
6435 mem_next
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6437 if (mem
< &vm_pages
[0] || mem
>= &vm_pages
[vm_pages_count
]) {
6438 mem
->next_m
= VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list
);
6439 hibernate_rebuild_hash_list
= mem
;
6443 unneeded_vm_page_bucket_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_page_buckets
[0], (addr64_t
)&vm_page_buckets
[vm_page_bucket_count
], page_list
, page_list_wired
);
6444 mark_as_unneeded_pages
+= unneeded_vm_page_bucket_pages
;
6446 hibernate_teardown_vm_page_free_count
= vm_page_free_count
;
6448 compact_target_indx
= 0;
6450 for (i
= 0; i
< vm_pages_count
; i
++) {
6458 assert(!mem
->lopage
);
6460 color
= mem
->phys_page
& vm_color_mask
;
6462 queue_remove(&vm_page_queue_free
[color
],
6466 mem
->pageq
.next
= NULL
;
6467 mem
->pageq
.prev
= NULL
;
6469 vm_page_free_count
--;
6471 hibernate_teardown_found_free_pages
++;
6473 if ( !vm_pages
[compact_target_indx
].free
)
6474 compact_target_indx
= i
;
6477 * record this vm_page_t's original location
6478 * we need this even if it doesn't get moved
6479 * as an indicator to the rebuild function that
6480 * we don't have to move it
6482 mem
->next_m
= VM_PAGE_PACK_PTR(mem
);
6484 if (vm_pages
[compact_target_indx
].free
) {
6486 * we've got a hole to fill, so
6487 * move this vm_page_t to it's new home
6489 vm_pages
[compact_target_indx
] = *mem
;
6492 hibernate_teardown_last_valid_compact_indx
= compact_target_indx
;
6493 compact_target_indx
++;
6495 hibernate_teardown_last_valid_compact_indx
= i
;
6498 unneeded_vm_pages_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_pages
[hibernate_teardown_last_valid_compact_indx
+1],
6499 (addr64_t
)&vm_pages
[vm_pages_count
-1], page_list
, page_list_wired
);
6500 mark_as_unneeded_pages
+= unneeded_vm_pages_pages
;
6502 hibernate_teardown_pmap_structs(&start_of_unneeded
, &end_of_unneeded
);
6504 if (start_of_unneeded
) {
6505 unneeded_pmap_pages
= hibernate_mark_as_unneeded(start_of_unneeded
, end_of_unneeded
, page_list
, page_list_wired
);
6506 mark_as_unneeded_pages
+= unneeded_pmap_pages
;
6508 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages
, unneeded_vm_pages_pages
, unneeded_pmap_pages
);
6510 hibernate_rebuild_needed
= TRUE
;
6512 return (mark_as_unneeded_pages
);
6516 #endif /* HIBERNATION */
6518 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6520 #include <mach_vm_debug.h>
6523 #include <mach_debug/hash_info.h>
6524 #include <vm/vm_debug.h>
6527 * Routine: vm_page_info
6529 * Return information about the global VP table.
6530 * Fills the buffer with as much information as possible
6531 * and returns the desired size of the buffer.
6533 * Nothing locked. The caller should provide
6534 * possibly-pageable memory.
6539 hash_info_bucket_t
*info
,
6543 lck_spin_t
*bucket_lock
;
6545 if (vm_page_bucket_count
< count
)
6546 count
= vm_page_bucket_count
;
6548 for (i
= 0; i
< count
; i
++) {
6549 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
6550 unsigned int bucket_count
= 0;
6553 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6554 lck_spin_lock(bucket_lock
);
6556 for (m
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); m
!= VM_PAGE_NULL
; m
= VM_PAGE_UNPACK_PTR(m
->next_m
))
6559 lck_spin_unlock(bucket_lock
);
6561 /* don't touch pageable memory while holding locks */
6562 info
[i
].hib_count
= bucket_count
;
6565 return vm_page_bucket_count
;
6567 #endif /* MACH_VM_DEBUG */
6569 #if VM_PAGE_BUCKETS_CHECK
6571 vm_page_buckets_check(void)
6575 unsigned int p_hash
;
6576 vm_page_bucket_t
*bucket
;
6577 lck_spin_t
*bucket_lock
;
6579 if (!vm_page_buckets_check_ready
) {
6584 if (hibernate_rebuild_needed
||
6585 hibernate_rebuild_hash_list
) {
6586 panic("BUCKET_CHECK: hibernation in progress: "
6587 "rebuild_needed=%d rebuild_hash_list=%p\n",
6588 hibernate_rebuild_needed
,
6589 hibernate_rebuild_hash_list
);
6591 #endif /* HIBERNATION */
6593 #if VM_PAGE_FAKE_BUCKETS
6595 for (cp
= (char *) vm_page_fake_buckets_start
;
6596 cp
< (char *) vm_page_fake_buckets_end
;
6599 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6600 "[0x%llx:0x%llx]\n",
6602 (uint64_t) vm_page_fake_buckets_start
,
6603 (uint64_t) vm_page_fake_buckets_end
);
6606 #endif /* VM_PAGE_FAKE_BUCKETS */
6608 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6609 bucket
= &vm_page_buckets
[i
];
6610 if (!bucket
->page_list
) {
6614 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6615 lck_spin_lock(bucket_lock
);
6616 p
= VM_PAGE_UNPACK_PTR(bucket
->page_list
);
6617 while (p
!= VM_PAGE_NULL
) {
6619 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6620 "hash %d in bucket %d at %p "
6622 p
, p
->object
, p
->offset
,
6625 p_hash
= vm_page_hash(p
->object
, p
->offset
);
6627 panic("BUCKET_CHECK: corruption in bucket %d "
6628 "at %p: page %p object %p offset 0x%llx "
6630 i
, bucket
, p
, p
->object
, p
->offset
,
6633 p
= VM_PAGE_UNPACK_PTR(p
->next_m
);
6635 lck_spin_unlock(bucket_lock
);
6638 // printf("BUCKET_CHECK: checked buckets\n");
6640 #endif /* VM_PAGE_BUCKETS_CHECK */