2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
73 #include <kern/counter.h>
74 #include <kern/host_statistics.h>
75 #include <kern/sched_prim.h>
76 #include <kern/policy_internal.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/kalloc.h>
80 #include <kern/zalloc_internal.h>
81 #include <kern/ledger.h>
83 #include <vm/vm_init.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
88 #include <kern/misc_protos.h>
89 #include <mach_debug/zone_info.h>
91 #include <pexpert/pexpert.h>
92 #include <san/kasan.h>
94 #include <vm/vm_protos.h>
95 #include <vm/memory_object.h>
96 #include <vm/vm_purgeable_internal.h>
97 #include <vm/vm_compressor.h>
98 #if defined (__x86_64__)
99 #include <i386/misc_protos.h>
102 #if CONFIG_PHANTOM_CACHE
103 #include <vm/vm_phantom_cache.h>
107 #include <IOKit/IOHibernatePrivate.h>
108 #include <machine/pal_hibernate.h>
109 #endif /* HIBERNATION */
111 #include <sys/kdebug.h>
113 #if defined(HAS_APPLE_PAC)
116 #if defined(__arm64__)
117 #include <arm/cpu_internal.h>
118 #endif /* defined(__arm64__) */
122 #define ASSERT_PMAP_FREE(mem) pmap_assert_free(VM_PAGE_GET_PHYS_PAGE(mem))
124 #else /* MACH_ASSERT */
126 #define ASSERT_PMAP_FREE(mem) /* nothing */
128 #endif /* MACH_ASSERT */
130 extern boolean_t vm_pageout_running
;
131 extern thread_t vm_pageout_scan_thread
;
132 extern boolean_t vps_dynamic_priority_enabled
;
134 char vm_page_inactive_states
[VM_PAGE_Q_STATE_ARRAY_SIZE
];
135 char vm_page_pageable_states
[VM_PAGE_Q_STATE_ARRAY_SIZE
];
136 char vm_page_non_speculative_pageable_states
[VM_PAGE_Q_STATE_ARRAY_SIZE
];
137 char vm_page_active_or_inactive_states
[VM_PAGE_Q_STATE_ARRAY_SIZE
];
139 #if CONFIG_SECLUDED_MEMORY
140 struct vm_page_secluded_data vm_page_secluded
;
141 #endif /* CONFIG_SECLUDED_MEMORY */
143 #if DEVELOPMENT || DEBUG
144 extern struct memory_object_pager_ops shared_region_pager_ops
;
145 unsigned int shared_region_pagers_resident_count
= 0;
146 unsigned int shared_region_pagers_resident_peak
= 0;
147 #endif /* DEVELOPMENT || DEBUG */
149 int PERCPU_DATA(start_color
);
150 vm_page_t
PERCPU_DATA(free_pages
);
151 boolean_t hibernate_cleaning_in_progress
= FALSE
;
152 boolean_t vm_page_free_verify
= TRUE
;
154 uint32_t vm_lopage_free_count
= 0;
155 uint32_t vm_lopage_free_limit
= 0;
156 uint32_t vm_lopage_lowater
= 0;
157 boolean_t vm_lopage_refill
= FALSE
;
158 boolean_t vm_lopage_needed
= FALSE
;
160 lck_mtx_ext_t vm_page_queue_lock_ext
;
161 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
162 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
164 int speculative_age_index
= 0;
165 int speculative_steal_index
= 0;
166 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
168 boolean_t hibernation_vmqueues_inspection
= FALSE
; /* Tracks if the hibernation code is looking at the VM queues.
169 * Updated and checked behind the vm_page_queues_lock. */
171 static void vm_page_free_prepare(vm_page_t page
);
172 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t
, boolean_t
);
174 static void vm_tag_init(void);
176 /* for debugging purposes */
177 SECURITY_READ_ONLY_EARLY(uint32_t) vm_packed_from_vm_pages_array_mask
=
178 VM_PAGE_PACKED_FROM_ARRAY
;
179 SECURITY_READ_ONLY_EARLY(vm_packing_params_t
) vm_page_packing_params
=
180 VM_PACKING_PARAMS(VM_PAGE_PACKED_PTR
);
183 * Associated with page of user-allocatable memory is a
188 * These variables record the values returned by vm_page_bootstrap,
189 * for debugging purposes. The implementation of pmap_steal_memory
190 * and pmap_startup here also uses them internally.
193 vm_offset_t virtual_space_start
;
194 vm_offset_t virtual_space_end
;
195 uint32_t vm_page_pages
;
198 * The vm_page_lookup() routine, which provides for fast
199 * (virtual memory object, offset) to page lookup, employs
200 * the following hash table. The vm_page_{insert,remove}
201 * routines install and remove associations in the table.
202 * [This table is often called the virtual-to-physical,
206 vm_page_packed_t page_list
;
207 #if MACH_PAGE_HASH_STATS
208 int cur_count
; /* current count */
209 int hi_count
; /* high water mark */
210 #endif /* MACH_PAGE_HASH_STATS */
214 #define BUCKETS_PER_LOCK 16
216 SECURITY_READ_ONLY_LATE(vm_page_bucket_t
*) vm_page_buckets
; /* Array of buckets */
217 SECURITY_READ_ONLY_LATE(unsigned int) vm_page_bucket_count
= 0; /* How big is array? */
218 SECURITY_READ_ONLY_LATE(unsigned int) vm_page_hash_mask
; /* Mask for hash function */
219 SECURITY_READ_ONLY_LATE(unsigned int) vm_page_hash_shift
; /* Shift for hash function */
220 SECURITY_READ_ONLY_LATE(uint32_t) vm_page_bucket_hash
; /* Basic bucket hash */
221 SECURITY_READ_ONLY_LATE(unsigned int) vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
223 #ifndef VM_TAG_ACTIVE_UPDATE
224 #error VM_TAG_ACTIVE_UPDATE
226 #ifndef VM_MAX_TAG_ZONES
227 #error VM_MAX_TAG_ZONES
231 SECURITY_READ_ONLY_LATE(bool) vm_tag_active_update
= VM_TAG_ACTIVE_UPDATE
;
232 SECURITY_READ_ONLY_LATE(lck_spin_t
*) vm_page_bucket_locks
;
234 vm_allocation_site_t vm_allocation_sites_static
[VM_KERN_MEMORY_FIRST_DYNAMIC
+ 1];
235 vm_allocation_site_t
* vm_allocation_sites
[VM_MAX_TAG_VALUE
];
237 static vm_allocation_zone_total_t
**vm_allocation_zone_totals
;
238 #endif /* VM_MAX_TAG_ZONES */
240 vm_tag_t vm_allocation_tag_highest
;
242 #if VM_PAGE_BUCKETS_CHECK
243 boolean_t vm_page_buckets_check_ready
= FALSE
;
244 #if VM_PAGE_FAKE_BUCKETS
245 vm_page_bucket_t
*vm_page_fake_buckets
; /* decoy buckets */
246 vm_map_offset_t vm_page_fake_buckets_start
, vm_page_fake_buckets_end
;
247 #endif /* VM_PAGE_FAKE_BUCKETS */
248 #endif /* VM_PAGE_BUCKETS_CHECK */
250 #if MACH_PAGE_HASH_STATS
251 /* This routine is only for debug. It is intended to be called by
252 * hand by a developer using a kernel debugger. This routine prints
253 * out vm_page_hash table statistics to the kernel debug console.
263 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
264 if (vm_page_buckets
[i
].hi_count
) {
266 highsum
+= vm_page_buckets
[i
].hi_count
;
267 if (vm_page_buckets
[i
].hi_count
> maxdepth
) {
268 maxdepth
= vm_page_buckets
[i
].hi_count
;
272 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
273 printf("Number used buckets: %d = %d%%\n",
274 numbuckets
, 100 * numbuckets
/ vm_page_bucket_count
);
275 printf("Number unused buckets: %d = %d%%\n",
276 vm_page_bucket_count
- numbuckets
,
277 100 * (vm_page_bucket_count
- numbuckets
) / vm_page_bucket_count
);
278 printf("Sum of bucket max depth: %d\n", highsum
);
279 printf("Average bucket depth: %d.%2d\n",
280 highsum
/ vm_page_bucket_count
,
281 highsum
% vm_page_bucket_count
);
282 printf("Maximum bucket depth: %d\n", maxdepth
);
284 #endif /* MACH_PAGE_HASH_STATS */
287 * The virtual page size is currently implemented as a runtime
288 * variable, but is constant once initialized using vm_set_page_size.
289 * This initialization must be done in the machine-dependent
290 * bootstrap sequence, before calling other machine-independent
293 * All references to the virtual page size outside this
294 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
297 #if defined(__arm__) || defined(__arm64__)
302 vm_size_t page_size
= PAGE_SIZE
;
303 vm_size_t page_mask
= PAGE_MASK
;
304 int page_shift
= PAGE_SHIFT
;
307 SECURITY_READ_ONLY_LATE(vm_page_t
) vm_pages
= VM_PAGE_NULL
;
308 SECURITY_READ_ONLY_LATE(vm_page_t
) vm_page_array_beginning_addr
;
309 vm_page_t vm_page_array_ending_addr
;
311 unsigned int vm_pages_count
= 0;
314 * Resident pages that represent real memory
315 * are allocated from a set of free lists,
318 unsigned int vm_colors
;
319 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
320 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
321 unsigned int vm_free_magazine_refill_limit
= 0;
324 struct vm_page_queue_free_head
{
325 vm_page_queue_head_t qhead
;
326 } VM_PAGE_PACKED_ALIGNED
;
328 struct vm_page_queue_free_head vm_page_queue_free
[MAX_COLORS
];
331 unsigned int vm_page_free_wanted
;
332 unsigned int vm_page_free_wanted_privileged
;
333 #if CONFIG_SECLUDED_MEMORY
334 unsigned int vm_page_free_wanted_secluded
;
335 #endif /* CONFIG_SECLUDED_MEMORY */
336 unsigned int vm_page_free_count
;
339 * Occasionally, the virtual memory system uses
340 * resident page structures that do not refer to
341 * real pages, for example to leave a page with
342 * important state information in the VP table.
344 * These page structures are allocated the way
345 * most other kernel structures are.
347 SECURITY_READ_ONLY_LATE(zone_t
) vm_page_zone
;
348 vm_locks_array_t vm_page_locks
;
350 LCK_ATTR_DECLARE(vm_page_lck_attr
, 0, 0);
351 LCK_GRP_DECLARE(vm_page_lck_grp_free
, "vm_page_free");
352 LCK_GRP_DECLARE(vm_page_lck_grp_queue
, "vm_page_queue");
353 LCK_GRP_DECLARE(vm_page_lck_grp_local
, "vm_page_queue_local");
354 LCK_GRP_DECLARE(vm_page_lck_grp_purge
, "vm_page_purge");
355 LCK_GRP_DECLARE(vm_page_lck_grp_alloc
, "vm_page_alloc");
356 LCK_GRP_DECLARE(vm_page_lck_grp_bucket
, "vm_page_bucket");
357 LCK_SPIN_DECLARE_ATTR(vm_objects_wired_lock
, &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
358 LCK_SPIN_DECLARE_ATTR(vm_allocation_sites_lock
, &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
360 unsigned int vm_page_local_q_soft_limit
= 250;
361 unsigned int vm_page_local_q_hard_limit
= 500;
362 struct vpl
*__zpercpu vm_page_local_q
;
364 /* N.B. Guard and fictitious pages must not
365 * be assigned a zero phys_page value.
368 * Fictitious pages don't have a physical address,
369 * but we must initialize phys_page to something.
370 * For debugging, this should be a strange value
371 * that the pmap module can recognize in assertions.
373 const ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
376 * Guard pages are not accessible so they don't
377 * need a physical address, but we need to enter
379 * Let's make it recognizable and make sure that
380 * we don't use a real physical page with that
383 const ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
386 * Resident page structures are also chained on
387 * queues that are used by the page replacement
388 * system (pageout daemon). These queues are
389 * defined here, but are shared by the pageout
390 * module. The inactive queue is broken into
391 * file backed and anonymous for convenience as the
392 * pageout daemon often assignes a higher
393 * importance to anonymous pages (less likely to pick)
395 vm_page_queue_head_t vm_page_queue_active VM_PAGE_PACKED_ALIGNED
;
396 vm_page_queue_head_t vm_page_queue_inactive VM_PAGE_PACKED_ALIGNED
;
397 #if CONFIG_SECLUDED_MEMORY
398 vm_page_queue_head_t vm_page_queue_secluded VM_PAGE_PACKED_ALIGNED
;
399 #endif /* CONFIG_SECLUDED_MEMORY */
400 vm_page_queue_head_t vm_page_queue_anonymous VM_PAGE_PACKED_ALIGNED
; /* inactive memory queue for anonymous pages */
401 vm_page_queue_head_t vm_page_queue_throttled VM_PAGE_PACKED_ALIGNED
;
403 queue_head_t vm_objects_wired
;
405 void vm_update_darkwake_mode(boolean_t
);
407 #if CONFIG_BACKGROUND_QUEUE
408 vm_page_queue_head_t vm_page_queue_background VM_PAGE_PACKED_ALIGNED
;
409 uint32_t vm_page_background_target
;
410 uint32_t vm_page_background_target_snapshot
;
411 uint32_t vm_page_background_count
;
412 uint64_t vm_page_background_promoted_count
;
414 uint32_t vm_page_background_internal_count
;
415 uint32_t vm_page_background_external_count
;
417 uint32_t vm_page_background_mode
;
418 uint32_t vm_page_background_exclude_external
;
421 unsigned int vm_page_active_count
;
422 unsigned int vm_page_inactive_count
;
423 unsigned int vm_page_kernelcache_count
;
424 #if CONFIG_SECLUDED_MEMORY
425 unsigned int vm_page_secluded_count
;
426 unsigned int vm_page_secluded_count_free
;
427 unsigned int vm_page_secluded_count_inuse
;
428 unsigned int vm_page_secluded_count_over_target
;
429 #endif /* CONFIG_SECLUDED_MEMORY */
430 unsigned int vm_page_anonymous_count
;
431 unsigned int vm_page_throttled_count
;
432 unsigned int vm_page_speculative_count
;
434 unsigned int vm_page_wire_count
;
435 unsigned int vm_page_wire_count_on_boot
= 0;
436 unsigned int vm_page_stolen_count
= 0;
437 unsigned int vm_page_wire_count_initial
;
438 unsigned int vm_page_gobble_count
= 0;
439 unsigned int vm_page_kern_lpage_count
= 0;
441 uint64_t booter_size
; /* external so it can be found in core dumps */
443 #define VM_PAGE_WIRE_COUNT_WARNING 0
444 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
446 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
447 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
448 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
450 unsigned int vm_page_xpmapped_external_count
= 0;
451 unsigned int vm_page_external_count
= 0;
452 unsigned int vm_page_internal_count
= 0;
453 unsigned int vm_page_pageable_external_count
= 0;
454 unsigned int vm_page_pageable_internal_count
= 0;
456 #if DEVELOPMENT || DEBUG
457 unsigned int vm_page_speculative_recreated
= 0;
458 unsigned int vm_page_speculative_created
= 0;
459 unsigned int vm_page_speculative_used
= 0;
462 vm_page_queue_head_t vm_page_queue_cleaned VM_PAGE_PACKED_ALIGNED
;
464 unsigned int vm_page_cleaned_count
= 0;
466 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
467 ppnum_t max_valid_low_ppnum
= PPNUM_MAX
;
471 * Several page replacement parameters are also
472 * shared with this module, so that page allocation
473 * (done here in vm_page_alloc) can trigger the
476 unsigned int vm_page_free_target
= 0;
477 unsigned int vm_page_free_min
= 0;
478 unsigned int vm_page_throttle_limit
= 0;
479 unsigned int vm_page_inactive_target
= 0;
480 #if CONFIG_SECLUDED_MEMORY
481 unsigned int vm_page_secluded_target
= 0;
482 #endif /* CONFIG_SECLUDED_MEMORY */
483 unsigned int vm_page_anonymous_min
= 0;
484 unsigned int vm_page_free_reserved
= 0;
488 * The VM system has a couple of heuristics for deciding
489 * that pages are "uninteresting" and should be placed
490 * on the inactive queue as likely candidates for replacement.
491 * These variables let the heuristics be controlled at run-time
492 * to make experimentation easier.
495 boolean_t vm_page_deactivate_hint
= TRUE
;
497 struct vm_page_stats_reusable vm_page_stats_reusable
;
502 * Sets the page size, perhaps based upon the memory
503 * size. Must be called before any use of page-size
504 * dependent functions.
506 * Sets page_shift and page_mask from page_size.
509 vm_set_page_size(void)
511 page_size
= PAGE_SIZE
;
512 page_mask
= PAGE_MASK
;
513 page_shift
= PAGE_SHIFT
;
515 if ((page_mask
& page_size
) != 0) {
516 panic("vm_set_page_size: page size not a power of two");
519 for (page_shift
= 0;; page_shift
++) {
520 if ((1U << page_shift
) == page_size
) {
526 #if defined (__x86_64__)
528 #define MAX_CLUMP_SIZE 16
529 #define DEFAULT_CLUMP_SIZE 4
531 unsigned int vm_clump_size
, vm_clump_mask
, vm_clump_shift
, vm_clump_promote_threshold
;
533 #if DEVELOPMENT || DEBUG
534 unsigned long vm_clump_stats
[MAX_CLUMP_SIZE
+ 1];
535 unsigned long vm_clump_allocs
, vm_clump_inserts
, vm_clump_inrange
, vm_clump_promotes
;
538 vm_clump_update_stats(unsigned int c
)
540 assert(c
<= vm_clump_size
);
541 if (c
> 0 && c
<= vm_clump_size
) {
542 vm_clump_stats
[c
] += c
;
544 vm_clump_allocs
+= c
;
546 #endif /* if DEVELOPMENT || DEBUG */
548 /* Called once to setup the VM clump knobs */
550 vm_page_setup_clump( void )
552 unsigned int override
, n
;
554 vm_clump_size
= DEFAULT_CLUMP_SIZE
;
555 if (PE_parse_boot_argn("clump_size", &override
, sizeof(override
))) {
556 vm_clump_size
= override
;
559 if (vm_clump_size
> MAX_CLUMP_SIZE
) {
560 panic("vm_page_setup_clump:: clump_size is too large!");
562 if (vm_clump_size
< 1) {
563 panic("vm_page_setup_clump:: clump_size must be >= 1");
565 if ((vm_clump_size
& (vm_clump_size
- 1)) != 0) {
566 panic("vm_page_setup_clump:: clump_size must be a power of 2");
569 vm_clump_promote_threshold
= vm_clump_size
;
570 vm_clump_mask
= vm_clump_size
- 1;
571 for (vm_clump_shift
= 0, n
= vm_clump_size
; n
> 1; n
>>= 1, vm_clump_shift
++) {
575 #if DEVELOPMENT || DEBUG
576 bzero(vm_clump_stats
, sizeof(vm_clump_stats
));
577 vm_clump_allocs
= vm_clump_inserts
= vm_clump_inrange
= vm_clump_promotes
= 0;
578 #endif /* if DEVELOPMENT || DEBUG */
581 #endif /* #if defined (__x86_64__) */
583 #define COLOR_GROUPS_TO_STEAL 4
585 /* Called once during statup, once the cache geometry is known.
588 vm_page_set_colors( void )
590 unsigned int n
, override
;
592 #if defined (__x86_64__)
593 /* adjust #colors because we need to color outside the clump boundary */
594 vm_cache_geometry_colors
>>= vm_clump_shift
;
596 if (PE_parse_boot_argn("colors", &override
, sizeof(override
))) { /* colors specified as a boot-arg? */
598 } else if (vm_cache_geometry_colors
) { /* do we know what the cache geometry is? */
599 n
= vm_cache_geometry_colors
;
601 n
= DEFAULT_COLORS
; /* use default if all else fails */
606 if (n
> MAX_COLORS
) {
610 /* the count must be a power of 2 */
611 if ((n
& (n
- 1)) != 0) {
612 n
= DEFAULT_COLORS
; /* use default if all else fails */
615 vm_color_mask
= n
- 1;
617 vm_free_magazine_refill_limit
= vm_colors
* COLOR_GROUPS_TO_STEAL
;
619 #if defined (__x86_64__)
620 /* adjust for reduction in colors due to clumping and multiple cores */
622 vm_free_magazine_refill_limit
*= (vm_clump_size
* real_ncpus
);
628 * During single threaded early boot we don't initialize all pages.
629 * This avoids some delay during boot. They'll be initialized and
630 * added to the free list as needed or after we are multithreaded by
631 * what becomes the pageout thread.
633 static boolean_t fill
= FALSE
;
634 static unsigned int fillval
;
635 uint_t vm_delayed_count
= 0; /* when non-zero, indicates we may have more pages to init */
636 ppnum_t delay_above_pnum
= PPNUM_MAX
;
639 * For x86 first 8 Gig initializes quickly and gives us lots of lowmem + mem above to start off with.
640 * If ARM ever uses delayed page initialization, this value may need to be quite different.
642 #define DEFAULT_DELAY_ABOVE_PHYS_GB (8)
645 * When we have to dip into more delayed pages due to low memory, free up
646 * a large chunk to get things back to normal. This avoids contention on the
647 * delayed code allocating page by page.
649 #define VM_DELAY_PAGE_CHUNK ((1024 * 1024 * 1024) / PAGE_SIZE)
652 * Get and initialize the next delayed page.
655 vm_get_delayed_page(int grab_options
)
661 * Get a new page if we have one.
663 lck_mtx_lock(&vm_page_queue_free_lock
);
664 if (vm_delayed_count
== 0) {
665 lck_mtx_unlock(&vm_page_queue_free_lock
);
668 if (!pmap_next_page(&pnum
)) {
669 vm_delayed_count
= 0;
670 lck_mtx_unlock(&vm_page_queue_free_lock
);
674 assert(vm_delayed_count
> 0);
677 #if defined(__x86_64__)
678 /* x86 cluster code requires increasing phys_page in vm_pages[] */
679 if (vm_pages_count
> 0) {
680 assert(pnum
> vm_pages
[vm_pages_count
- 1].vmp_phys_page
);
683 p
= &vm_pages
[vm_pages_count
];
684 assert(p
< vm_page_array_ending_addr
);
685 vm_page_init(p
, pnum
, FALSE
);
688 lck_mtx_unlock(&vm_page_queue_free_lock
);
691 * These pages were initially counted as wired, undo that now.
693 if (grab_options
& VM_PAGE_GRAB_Q_LOCK_HELD
) {
694 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
696 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
697 vm_page_lockspin_queues();
699 --vm_page_wire_count
;
700 --vm_page_wire_count_initial
;
701 if (vm_page_wire_count_on_boot
!= 0) {
702 --vm_page_wire_count_on_boot
;
704 if (!(grab_options
& VM_PAGE_GRAB_Q_LOCK_HELD
)) {
705 vm_page_unlock_queues();
710 fillPage(pnum
, fillval
);
715 static void vm_page_module_init_delayed(void);
718 * Free all remaining delayed pages to the free lists.
721 vm_free_delayed_pages(void)
724 vm_page_t list
= NULL
;
726 vm_offset_t start_free_va
;
729 while ((p
= vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE
)) != NULL
) {
730 if (vm_himemory_mode
) {
731 vm_page_release(p
, FALSE
);
740 * Free the pages in reverse order if not himemory mode.
741 * Hence the low memory pages will be first on free lists. (LIFO)
743 while (list
!= NULL
) {
747 vm_page_release(p
, FALSE
);
749 #if DEVELOPMENT || DEBUG
750 kprintf("vm_free_delayed_pages: initialized %d free pages\n", cnt
);
754 * Free up any unused full pages at the end of the vm_pages[] array
756 start_free_va
= round_page((vm_offset_t
)&vm_pages
[vm_pages_count
]);
758 #if defined(__x86_64__)
760 * Since x86 might have used large pages for vm_pages[], we can't
761 * free starting in the middle of a partially used large page.
763 if (pmap_query_pagesize(kernel_pmap
, start_free_va
) == I386_LPGBYTES
) {
764 start_free_va
= ((start_free_va
+ I386_LPGMASK
) & ~I386_LPGMASK
);
767 if (start_free_va
< (vm_offset_t
)vm_page_array_ending_addr
) {
768 free_size
= trunc_page((vm_offset_t
)vm_page_array_ending_addr
- start_free_va
);
770 ml_static_mfree(start_free_va
, (vm_offset_t
)free_size
);
771 vm_page_array_ending_addr
= (void *)start_free_va
;
774 * Note there's no locking here, as only this thread will ever change this value.
775 * The reader, vm_page_diagnose, doesn't grab any locks for the counts it looks at.
777 vm_page_stolen_count
-= (free_size
>> PAGE_SHIFT
);
779 #if DEVELOPMENT || DEBUG
780 kprintf("Freeing final unused %ld bytes from vm_pages[] at 0x%lx\n",
781 (long)free_size
, (long)start_free_va
);
788 * now we can create the VM page array zone
790 vm_page_module_init_delayed();
794 * Try and free up enough delayed pages to match a contig memory allocation.
797 vm_free_delayed_pages_contig(
807 * Treat 0 as the absolute max page number.
810 max_pnum
= PPNUM_MAX
;
814 * Free till we get a properly aligned start page
817 p
= vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE
);
821 pnum
= VM_PAGE_GET_PHYS_PAGE(p
);
822 vm_page_release(p
, FALSE
);
823 if (pnum
>= max_pnum
) {
826 if ((pnum
& pnum_mask
) == 0) {
832 * Having a healthy pool of free pages will help performance. We don't
833 * want to fall back to the delayed code for every page allocation.
835 if (vm_page_free_count
< VM_DELAY_PAGE_CHUNK
) {
836 npages
+= VM_DELAY_PAGE_CHUNK
;
840 * Now free up the pages
842 for (cnt
= 1; cnt
< npages
; ++cnt
) {
843 p
= vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE
);
847 vm_page_release(p
, FALSE
);
851 #define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
854 vm_page_init_local_q(unsigned int num_cpus
)
856 struct vpl
*t_local_q
;
859 * no point in this for a uni-processor system
862 ml_cpu_info_t cpu_info
;
865 * Force the allocation alignment to a cacheline,
866 * because the `vpl` struct has a lock and will be taken
867 * cross CPU so we want to isolate the rest of the per-CPU
868 * data to avoid false sharing due to this lock being taken.
871 ml_cpu_get_info(&cpu_info
);
873 t_local_q
= zalloc_percpu_permanent(sizeof(struct vpl
),
874 cpu_info
.cache_line_size
- 1);
876 zpercpu_foreach(lq
, t_local_q
) {
877 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
878 vm_page_queue_init(&lq
->vpl_queue
);
881 /* make the initialization visible to all cores */
882 os_atomic_store(&vm_page_local_q
, t_local_q
, release
);
887 * vm_init_before_launchd
889 * This should be called right before launchd is loaded.
892 vm_init_before_launchd()
894 vm_page_lockspin_queues();
895 vm_page_wire_count_on_boot
= vm_page_wire_count
;
896 vm_page_unlock_queues();
903 * Initializes the resident memory module.
905 * Allocates memory for the page cells, and
906 * for the object/offset-to-page hash table headers.
907 * Each page cell is initialized and placed on the free list.
908 * Returns the range of available kernel virtual memory.
922 * Initialize the page queues.
925 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
926 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
927 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
929 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
932 purgeable_queues
[i
].token_q_head
= 0;
933 purgeable_queues
[i
].token_q_tail
= 0;
934 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++) {
935 queue_init(&purgeable_queues
[i
].objq
[group
]);
938 purgeable_queues
[i
].type
= i
;
939 purgeable_queues
[i
].new_pages
= 0;
941 purgeable_queues
[i
].debug_count_tokens
= 0;
942 purgeable_queues
[i
].debug_count_objects
= 0;
946 purgeable_nonvolatile_count
= 0;
947 queue_init(&purgeable_nonvolatile_queue
);
949 for (i
= 0; i
< MAX_COLORS
; i
++) {
950 vm_page_queue_init(&vm_page_queue_free
[i
].qhead
);
953 vm_page_queue_init(&vm_lopage_queue_free
);
954 vm_page_queue_init(&vm_page_queue_active
);
955 vm_page_queue_init(&vm_page_queue_inactive
);
956 #if CONFIG_SECLUDED_MEMORY
957 vm_page_queue_init(&vm_page_queue_secluded
);
958 #endif /* CONFIG_SECLUDED_MEMORY */
959 vm_page_queue_init(&vm_page_queue_cleaned
);
960 vm_page_queue_init(&vm_page_queue_throttled
);
961 vm_page_queue_init(&vm_page_queue_anonymous
);
962 queue_init(&vm_objects_wired
);
964 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
965 vm_page_queue_init(&vm_page_queue_speculative
[i
].age_q
);
967 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
968 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
970 #if CONFIG_BACKGROUND_QUEUE
971 vm_page_queue_init(&vm_page_queue_background
);
973 vm_page_background_count
= 0;
974 vm_page_background_internal_count
= 0;
975 vm_page_background_external_count
= 0;
976 vm_page_background_promoted_count
= 0;
978 vm_page_background_target
= (unsigned int)(atop_64(max_mem
) / 25);
980 if (vm_page_background_target
> VM_PAGE_BACKGROUND_TARGET_MAX
) {
981 vm_page_background_target
= VM_PAGE_BACKGROUND_TARGET_MAX
;
984 vm_page_background_mode
= VM_PAGE_BG_LEVEL_1
;
985 vm_page_background_exclude_external
= 0;
987 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode
, sizeof(vm_page_background_mode
));
988 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external
, sizeof(vm_page_background_exclude_external
));
989 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target
, sizeof(vm_page_background_target
));
991 if (vm_page_background_mode
> VM_PAGE_BG_LEVEL_1
) {
992 vm_page_background_mode
= VM_PAGE_BG_LEVEL_1
;
995 vm_page_free_wanted
= 0;
996 vm_page_free_wanted_privileged
= 0;
997 #if CONFIG_SECLUDED_MEMORY
998 vm_page_free_wanted_secluded
= 0;
999 #endif /* CONFIG_SECLUDED_MEMORY */
1001 #if defined (__x86_64__)
1002 /* this must be called before vm_page_set_colors() */
1003 vm_page_setup_clump();
1006 vm_page_set_colors();
1008 bzero(vm_page_inactive_states
, sizeof(vm_page_inactive_states
));
1009 vm_page_inactive_states
[VM_PAGE_ON_INACTIVE_INTERNAL_Q
] = 1;
1010 vm_page_inactive_states
[VM_PAGE_ON_INACTIVE_EXTERNAL_Q
] = 1;
1011 vm_page_inactive_states
[VM_PAGE_ON_INACTIVE_CLEANED_Q
] = 1;
1013 bzero(vm_page_pageable_states
, sizeof(vm_page_pageable_states
));
1014 vm_page_pageable_states
[VM_PAGE_ON_INACTIVE_INTERNAL_Q
] = 1;
1015 vm_page_pageable_states
[VM_PAGE_ON_INACTIVE_EXTERNAL_Q
] = 1;
1016 vm_page_pageable_states
[VM_PAGE_ON_INACTIVE_CLEANED_Q
] = 1;
1017 vm_page_pageable_states
[VM_PAGE_ON_ACTIVE_Q
] = 1;
1018 vm_page_pageable_states
[VM_PAGE_ON_SPECULATIVE_Q
] = 1;
1019 vm_page_pageable_states
[VM_PAGE_ON_THROTTLED_Q
] = 1;
1020 #if CONFIG_SECLUDED_MEMORY
1021 vm_page_pageable_states
[VM_PAGE_ON_SECLUDED_Q
] = 1;
1022 #endif /* CONFIG_SECLUDED_MEMORY */
1024 bzero(vm_page_non_speculative_pageable_states
, sizeof(vm_page_non_speculative_pageable_states
));
1025 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_INACTIVE_INTERNAL_Q
] = 1;
1026 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_INACTIVE_EXTERNAL_Q
] = 1;
1027 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_INACTIVE_CLEANED_Q
] = 1;
1028 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_ACTIVE_Q
] = 1;
1029 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_THROTTLED_Q
] = 1;
1030 #if CONFIG_SECLUDED_MEMORY
1031 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_SECLUDED_Q
] = 1;
1032 #endif /* CONFIG_SECLUDED_MEMORY */
1034 bzero(vm_page_active_or_inactive_states
, sizeof(vm_page_active_or_inactive_states
));
1035 vm_page_active_or_inactive_states
[VM_PAGE_ON_INACTIVE_INTERNAL_Q
] = 1;
1036 vm_page_active_or_inactive_states
[VM_PAGE_ON_INACTIVE_EXTERNAL_Q
] = 1;
1037 vm_page_active_or_inactive_states
[VM_PAGE_ON_INACTIVE_CLEANED_Q
] = 1;
1038 vm_page_active_or_inactive_states
[VM_PAGE_ON_ACTIVE_Q
] = 1;
1039 #if CONFIG_SECLUDED_MEMORY
1040 vm_page_active_or_inactive_states
[VM_PAGE_ON_SECLUDED_Q
] = 1;
1041 #endif /* CONFIG_SECLUDED_MEMORY */
1043 for (vm_tag_t t
= 0; t
< VM_KERN_MEMORY_FIRST_DYNAMIC
; t
++) {
1044 vm_allocation_sites_static
[t
].refcount
= 2;
1045 vm_allocation_sites_static
[t
].tag
= t
;
1046 vm_allocation_sites
[t
] = &vm_allocation_sites_static
[t
];
1048 vm_allocation_sites_static
[VM_KERN_MEMORY_FIRST_DYNAMIC
].refcount
= 2;
1049 vm_allocation_sites_static
[VM_KERN_MEMORY_FIRST_DYNAMIC
].tag
= VM_KERN_MEMORY_ANY
;
1050 vm_allocation_sites
[VM_KERN_MEMORY_ANY
] = &vm_allocation_sites_static
[VM_KERN_MEMORY_FIRST_DYNAMIC
];
1053 * Steal memory for the map and zone subsystems.
1055 kernel_startup_initialize_upto(STARTUP_SUB_PMAP_STEAL
);
1058 * Allocate (and initialize) the virtual-to-physical
1059 * table hash buckets.
1061 * The number of buckets should be a power of two to
1062 * get a good hash function. The following computation
1063 * chooses the first power of two that is greater
1064 * than the number of physical pages in the system.
1067 if (vm_page_bucket_count
== 0) {
1068 unsigned int npages
= pmap_free_pages();
1070 vm_page_bucket_count
= 1;
1071 while (vm_page_bucket_count
< npages
) {
1072 vm_page_bucket_count
<<= 1;
1075 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
1077 vm_page_hash_mask
= vm_page_bucket_count
- 1;
1080 * Calculate object shift value for hashing algorithm:
1081 * O = log2(sizeof(struct vm_object))
1082 * B = log2(vm_page_bucket_count)
1083 * hash shifts the object left by
1086 size
= vm_page_bucket_count
;
1087 for (log1
= 0; size
> 1; log1
++) {
1090 size
= sizeof(struct vm_object
);
1091 for (log2
= 0; size
> 1; log2
++) {
1094 vm_page_hash_shift
= log1
/ 2 - log2
+ 1;
1096 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
1097 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
1098 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1100 if (vm_page_hash_mask
& vm_page_bucket_count
) {
1101 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
1104 #if VM_PAGE_BUCKETS_CHECK
1105 #if VM_PAGE_FAKE_BUCKETS
1107 * Allocate a decoy set of page buckets, to detect
1108 * any stomping there.
1110 vm_page_fake_buckets
= (vm_page_bucket_t
*)
1111 pmap_steal_memory(vm_page_bucket_count
*
1112 sizeof(vm_page_bucket_t
));
1113 vm_page_fake_buckets_start
= (vm_map_offset_t
) vm_page_fake_buckets
;
1114 vm_page_fake_buckets_end
=
1115 vm_map_round_page((vm_page_fake_buckets_start
+
1116 (vm_page_bucket_count
*
1117 sizeof(vm_page_bucket_t
))),
1120 for (cp
= (char *)vm_page_fake_buckets_start
;
1121 cp
< (char *)vm_page_fake_buckets_end
;
1125 #endif /* VM_PAGE_FAKE_BUCKETS */
1126 #endif /* VM_PAGE_BUCKETS_CHECK */
1128 kernel_debug_string_early("vm_page_buckets");
1129 vm_page_buckets
= (vm_page_bucket_t
*)
1130 pmap_steal_memory(vm_page_bucket_count
*
1131 sizeof(vm_page_bucket_t
));
1133 kernel_debug_string_early("vm_page_bucket_locks");
1134 vm_page_bucket_locks
= (lck_spin_t
*)
1135 pmap_steal_memory(vm_page_bucket_lock_count
*
1136 sizeof(lck_spin_t
));
1138 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
1139 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
1141 bucket
->page_list
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
1142 #if MACH_PAGE_HASH_STATS
1143 bucket
->cur_count
= 0;
1144 bucket
->hi_count
= 0;
1145 #endif /* MACH_PAGE_HASH_STATS */
1148 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++) {
1149 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
1154 #if VM_PAGE_BUCKETS_CHECK
1155 vm_page_buckets_check_ready
= TRUE
;
1156 #endif /* VM_PAGE_BUCKETS_CHECK */
1159 * Machine-dependent code allocates the resident page table.
1160 * It uses vm_page_init to initialize the page frames.
1161 * The code also returns to us the virtual space available
1162 * to the kernel. We don't trust the pmap module
1163 * to get the alignment right.
1166 kernel_debug_string_early("pmap_startup");
1167 pmap_startup(&virtual_space_start
, &virtual_space_end
);
1168 virtual_space_start
= round_page(virtual_space_start
);
1169 virtual_space_end
= trunc_page(virtual_space_end
);
1171 *startp
= virtual_space_start
;
1172 *endp
= virtual_space_end
;
1175 * Compute the initial "wire" count.
1176 * Up until now, the pages which have been set aside are not under
1177 * the VM system's control, so although they aren't explicitly
1178 * wired, they nonetheless can't be moved. At this moment,
1179 * all VM managed pages are "free", courtesy of pmap_startup.
1181 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
1182 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) -
1183 vm_page_free_count
- vm_lopage_free_count
;
1184 #if CONFIG_SECLUDED_MEMORY
1185 vm_page_wire_count
-= vm_page_secluded_count
;
1187 vm_page_wire_count_initial
= vm_page_wire_count
;
1189 /* capture this for later use */
1190 booter_size
= ml_get_booter_memory_size();
1192 printf("vm_page_bootstrap: %d free pages, %d wired pages, (up to %d of which are delayed free)\n",
1193 vm_page_free_count
, vm_page_wire_count
, vm_delayed_count
);
1195 kernel_debug_string_early("vm_page_bootstrap complete");
1198 #ifndef MACHINE_PAGES
1200 * This is the early boot time allocator for data structures needed to bootstrap the VM system.
1201 * On x86 it will allocate large pages if size is sufficiently large. We don't need to do this
1202 * on ARM yet, due to the combination of a large base page size and smaller RAM devices.
1205 pmap_steal_memory_internal(
1207 boolean_t might_free
)
1211 vm_offset_t map_addr
;
1215 * Size needs to be aligned to word size.
1217 size
= (size
+ sizeof(void *) - 1) & ~(sizeof(void *) - 1);
1220 * On the first call, get the initial values for virtual address space
1221 * and page align them.
1223 if (virtual_space_start
== virtual_space_end
) {
1224 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
1225 virtual_space_start
= round_page(virtual_space_start
);
1226 virtual_space_end
= trunc_page(virtual_space_end
);
1228 #if defined(__x86_64__)
1230 * Release remaining unused section of preallocated KVA and the 4K page tables
1231 * that map it. This makes the VA available for large page mappings.
1233 Idle_PTs_release(virtual_space_start
, virtual_space_end
);
1238 * Allocate the virtual space for this request. On x86, we'll align to a large page
1239 * address if the size is big enough to back with at least 1 large page.
1241 #if defined(__x86_64__)
1242 if (size
>= I386_LPGBYTES
) {
1243 virtual_space_start
= ((virtual_space_start
+ I386_LPGMASK
) & ~I386_LPGMASK
);
1246 addr
= virtual_space_start
;
1247 virtual_space_start
+= size
;
1249 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1252 * Allocate and map physical pages to back the new virtual space.
1254 map_addr
= round_page(addr
);
1255 while (map_addr
< addr
+ size
) {
1256 #if defined(__x86_64__)
1258 * Back with a large page if properly aligned on x86
1260 if ((map_addr
& I386_LPGMASK
) == 0 &&
1261 map_addr
+ I386_LPGBYTES
<= addr
+ size
&&
1262 pmap_pre_expand_large(kernel_pmap
, map_addr
) == KERN_SUCCESS
&&
1263 pmap_next_page_large(&phys_page
) == KERN_SUCCESS
) {
1264 kr
= pmap_enter(kernel_pmap
, map_addr
, phys_page
,
1265 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
,
1266 VM_WIMG_USE_DEFAULT
| VM_MEM_SUPERPAGE
, FALSE
);
1268 if (kr
!= KERN_SUCCESS
) {
1269 panic("pmap_steal_memory: pmap_enter() large failed, new_addr=%#lx, phys_page=%u",
1270 (unsigned long)map_addr
, phys_page
);
1272 map_addr
+= I386_LPGBYTES
;
1273 vm_page_wire_count
+= I386_LPGBYTES
>> PAGE_SHIFT
;
1274 vm_page_stolen_count
+= I386_LPGBYTES
>> PAGE_SHIFT
;
1275 vm_page_kern_lpage_count
++;
1280 if (!pmap_next_page_hi(&phys_page
, might_free
)) {
1281 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size
);
1284 #if defined(__x86_64__)
1285 pmap_pre_expand(kernel_pmap
, map_addr
);
1288 kr
= pmap_enter(kernel_pmap
, map_addr
, phys_page
,
1289 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
,
1290 VM_WIMG_USE_DEFAULT
, FALSE
);
1292 if (kr
!= KERN_SUCCESS
) {
1293 panic("pmap_steal_memory() pmap_enter failed, map_addr=%#lx, phys_page=%u",
1294 (unsigned long)map_addr
, phys_page
);
1296 map_addr
+= PAGE_SIZE
;
1299 * Account for newly stolen memory
1301 vm_page_wire_count
++;
1302 vm_page_stolen_count
++;
1305 #if defined(__x86_64__)
1307 * The call with might_free is currently the last use of pmap_steal_memory*().
1308 * Notify the pmap layer to record which high pages were allocated so far.
1311 pmap_hi_pages_done();
1315 kasan_notify_address(round_page(addr
), size
);
1317 return (void *) addr
;
1324 return pmap_steal_memory_internal(size
, FALSE
);
1328 pmap_steal_freeable_memory(
1331 return pmap_steal_memory_internal(size
, TRUE
);
1334 #if defined(__arm64__)
1336 * Retire a page at startup.
1337 * These pages will eventually wind up on the retired_pages_object
1338 * in vm_retire_boot_pages().
1340 static vm_page_queue_head_t vm_page_queue_retired VM_PAGE_PACKED_ALIGNED
;
1342 vm_page_retire_startup(vm_page_t p
)
1344 p
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
1345 p
->vmp_error
= true;
1346 p
->vmp_unusual
= true;
1347 vm_page_queue_enter(&vm_page_queue_retired
, p
, vmp_pageq
);
1348 printf("To be retired at boot: page at 0x%llx\n", (long long)ptoa(VM_PAGE_GET_PHYS_PAGE(p
)));
1350 #endif /* defined(__arm64__) */
1352 #if CONFIG_SECLUDED_MEMORY
1353 /* boot-args to control secluded memory */
1354 unsigned int secluded_mem_mb
= 0; /* # of MBs of RAM to seclude */
1355 int secluded_for_iokit
= 1; /* IOKit can use secluded memory */
1356 int secluded_for_apps
= 1; /* apps can use secluded memory */
1357 int secluded_for_filecache
= 2; /* filecache can use seclude memory */
1359 int secluded_for_fbdp
= 0;
1361 uint64_t secluded_shutoff_trigger
= 0;
1362 uint64_t secluded_shutoff_headroom
= 150 * 1024 * 1024; /* original value from N56 */
1363 #endif /* CONFIG_SECLUDED_MEMORY */
1366 #if defined(__arm__) || defined(__arm64__)
1367 extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1368 unsigned int vm_first_phys_ppnum
= 0;
1371 void vm_page_release_startup(vm_page_t mem
);
1374 vm_offset_t
*startp
,
1377 unsigned int i
, npages
;
1382 uint_t low_page_count
= 0;
1384 #if defined(__LP64__)
1386 * make sure we are aligned on a 64 byte boundary
1387 * for VM_PAGE_PACK_PTR (it clips off the low-order
1388 * 6 bits of the pointer)
1390 if (virtual_space_start
!= virtual_space_end
) {
1391 virtual_space_start
= round_page(virtual_space_start
);
1396 * We calculate how many page frames we will have
1397 * and then allocate the page structures in one chunk.
1399 * Note that the calculation here doesn't take into account
1400 * the memory needed to map what's being allocated, i.e. the page
1401 * table entries. So the actual number of pages we get will be
1402 * less than this. To do someday: include that in the computation.
1404 * Also for ARM, we don't use the count of free_pages, but rather the
1405 * range from last page to first page (ignore holes due to retired pages).
1407 #if defined(__arm__) || defined(__arm64__)
1408 mem_sz
= pmap_free_pages_span() * (uint64_t)PAGE_SIZE
;
1409 #else /* defined(__arm__) || defined(__arm64__) */
1410 mem_sz
= pmap_free_pages() * (uint64_t)PAGE_SIZE
;
1411 #endif /* defined(__arm__) || defined(__arm64__) */
1412 mem_sz
+= round_page(virtual_space_start
) - virtual_space_start
; /* Account for any slop */
1413 npages
= (uint_t
)(mem_sz
/ (PAGE_SIZE
+ sizeof(*vm_pages
))); /* scaled to include the vm_page_ts */
1415 vm_pages
= (vm_page_t
) pmap_steal_freeable_memory(npages
* sizeof *vm_pages
);
1418 * Check if we want to initialize pages to a known value
1420 if (PE_parse_boot_argn("fill", &fillval
, sizeof(fillval
))) {
1424 /* This slows down booting the DEBUG kernel, particularly on
1425 * large memory systems, but is worthwhile in deterministically
1426 * trapping uninitialized memory usage.
1430 fillval
= 0xDEB8F177;
1434 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval
);
1437 #if CONFIG_SECLUDED_MEMORY
1439 * Figure out how much secluded memory to have before we start
1440 * release pages to free lists.
1441 * The default, if specified nowhere else, is no secluded mem.
1443 secluded_mem_mb
= 0;
1444 if (max_mem
> 1 * 1024 * 1024 * 1024) {
1445 /* default to 90MB for devices with > 1GB of RAM */
1446 secluded_mem_mb
= 90;
1448 /* override with value from device tree, if provided */
1449 PE_get_default("kern.secluded_mem_mb",
1450 &secluded_mem_mb
, sizeof(secluded_mem_mb
));
1451 /* override with value from boot-args, if provided */
1452 PE_parse_boot_argn("secluded_mem_mb",
1454 sizeof(secluded_mem_mb
));
1456 vm_page_secluded_target
= (unsigned int)
1457 ((secluded_mem_mb
* 1024ULL * 1024ULL) / PAGE_SIZE
);
1458 PE_parse_boot_argn("secluded_for_iokit",
1459 &secluded_for_iokit
,
1460 sizeof(secluded_for_iokit
));
1461 PE_parse_boot_argn("secluded_for_apps",
1463 sizeof(secluded_for_apps
));
1464 PE_parse_boot_argn("secluded_for_filecache",
1465 &secluded_for_filecache
,
1466 sizeof(secluded_for_filecache
));
1468 PE_parse_boot_argn("secluded_for_fbdp",
1470 sizeof(secluded_for_fbdp
));
1474 * Allow a really large app to effectively use secluded memory until it exits.
1476 if (vm_page_secluded_target
!= 0) {
1478 * Get an amount from boot-args, else use 1/2 of max_mem.
1479 * 1/2 max_mem was chosen from a Peace daemon tentpole test which
1480 * used munch to induce jetsam thrashing of false idle daemons on N56.
1482 int secluded_shutoff_mb
;
1483 if (PE_parse_boot_argn("secluded_shutoff_mb", &secluded_shutoff_mb
,
1484 sizeof(secluded_shutoff_mb
))) {
1485 secluded_shutoff_trigger
= (uint64_t)secluded_shutoff_mb
* 1024 * 1024;
1487 secluded_shutoff_trigger
= max_mem
/ 2;
1490 /* ensure the headroom value is sensible and avoid underflows */
1491 assert(secluded_shutoff_trigger
== 0 || secluded_shutoff_trigger
> secluded_shutoff_headroom
);
1494 #endif /* CONFIG_SECLUDED_MEMORY */
1496 #if defined(__x86_64__)
1499 * Decide how much memory we delay freeing at boot time.
1501 uint32_t delay_above_gb
;
1502 if (!PE_parse_boot_argn("delay_above_gb", &delay_above_gb
, sizeof(delay_above_gb
))) {
1503 delay_above_gb
= DEFAULT_DELAY_ABOVE_PHYS_GB
;
1506 if (delay_above_gb
== 0) {
1507 delay_above_pnum
= PPNUM_MAX
;
1509 delay_above_pnum
= delay_above_gb
* (1024 * 1024 * 1024 / PAGE_SIZE
);
1512 /* make sure we have sane breathing room: 1G above low memory */
1513 if (delay_above_pnum
<= max_valid_low_ppnum
) {
1514 delay_above_pnum
= max_valid_low_ppnum
+ ((1024 * 1024 * 1024) >> PAGE_SHIFT
);
1517 if (delay_above_pnum
< PPNUM_MAX
) {
1518 printf("pmap_startup() delaying init/free of page nums > 0x%x\n", delay_above_pnum
);
1521 #endif /* defined(__x86_64__) */
1524 * Initialize and release the page frames.
1526 kernel_debug_string_early("page_frame_init");
1528 vm_page_array_beginning_addr
= &vm_pages
[0];
1529 vm_page_array_ending_addr
= &vm_pages
[npages
]; /* used by ptr packing/unpacking code */
1530 #if VM_PAGE_PACKED_FROM_ARRAY
1531 if (npages
>= VM_PAGE_PACKED_FROM_ARRAY
) {
1532 panic("pmap_startup(): too many pages to support vm_page packing");
1536 vm_delayed_count
= 0;
1537 #if defined(__arm64__)
1538 vm_page_queue_init(&vm_page_queue_retired
);
1539 #endif /* defined(__arm64__) */
1541 absolutetime_to_nanoseconds(mach_absolute_time(), &start_ns
);
1543 for (i
= 0; i
< npages
; i
++) {
1544 /* Did we run out of pages? */
1545 if (!pmap_next_page(&phys_page
)) {
1549 if (phys_page
< max_valid_low_ppnum
) {
1553 /* Are we at high enough pages to delay the rest? */
1554 if (low_page_count
> vm_lopage_free_limit
&& phys_page
> delay_above_pnum
) {
1555 vm_delayed_count
= pmap_free_pages();
1559 #if defined(__arm__) || defined(__arm64__)
1561 vm_first_phys_ppnum
= phys_page
;
1562 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr
,
1563 (void *)vm_page_array_ending_addr
, vm_first_phys_ppnum
);
1564 #if defined(__arm64__)
1567 * pmap_next_page() may skip over pages reported bad by iboot.
1569 while (i
< phys_page
- vm_first_phys_ppnum
&& i
< npages
) {
1571 vm_page_init(&vm_pages
[i
], i
+ vm_first_phys_ppnum
, FALSE
);
1572 vm_page_retire_startup(&vm_pages
[i
]);
1578 assert(i
== phys_page
- vm_first_phys_ppnum
);
1579 #endif /* defined(__arm64__) */
1581 #endif /* defined(__arm__) || defined(__arm64__) */
1583 #if defined(__x86_64__)
1584 /* The x86 clump freeing code requires increasing ppn's to work correctly */
1586 assert(phys_page
> vm_pages
[i
- 1].vmp_phys_page
);
1590 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
1592 fillPage(phys_page
, fillval
);
1594 if (vm_himemory_mode
) {
1595 vm_page_release_startup(&vm_pages
[i
]);
1598 vm_page_pages
= vm_pages_count
; /* used to report to user space */
1600 if (!vm_himemory_mode
) {
1602 if (!vm_pages
[--i
].vmp_error
) { /* skip retired pages */
1603 vm_page_release_startup(&vm_pages
[i
]);
1608 absolutetime_to_nanoseconds(mach_absolute_time(), &now_ns
);
1609 printf("pmap_startup() init/release time: %lld microsec\n", (now_ns
- start_ns
) / NSEC_PER_USEC
);
1610 printf("pmap_startup() delayed init/release of %d pages\n", vm_delayed_count
);
1612 #if defined(__LP64__)
1613 if ((vm_page_t
)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[0]))) != &vm_pages
[0]) {
1614 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages
[0]);
1617 if ((vm_page_t
)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[vm_pages_count
- 1]))) != &vm_pages
[vm_pages_count
- 1]) {
1618 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages
[vm_pages_count
- 1]);
1622 VM_CHECK_MEMORYSTATUS
;
1625 * We have to re-align virtual_space_start,
1626 * because pmap_steal_memory has been using it.
1628 virtual_space_start
= round_page(virtual_space_start
);
1629 *startp
= virtual_space_start
;
1630 *endp
= virtual_space_end
;
1632 #endif /* MACHINE_PAGES */
1635 * Create the zone that represents the vm_pages[] array. Nothing ever allocates
1636 * or frees to this zone. It's just here for reporting purposes via zprint command.
1637 * This needs to be done after all initially delayed pages are put on the free lists.
1640 vm_page_module_init_delayed(void)
1642 (void)zone_create_ext("vm pages array", sizeof(struct vm_page
),
1643 ZC_NOGZALLOC
, ZONE_ID_ANY
, ^(zone_t z
) {
1644 uint64_t vm_page_zone_pages
, vm_page_array_zone_data_size
;
1646 zone_set_exhaustible(z
, 0);
1648 * Reflect size and usage information for vm_pages[].
1651 z
->z_elems_avail
= (uint32_t)(vm_page_array_ending_addr
- vm_pages
);
1652 z
->z_elems_free
= z
->z_elems_avail
- vm_pages_count
;
1653 zpercpu_get_cpu(z
->z_stats
, 0)->zs_mem_allocated
=
1654 vm_pages_count
* sizeof(struct vm_page
);
1655 vm_page_array_zone_data_size
= (uintptr_t)((void *)vm_page_array_ending_addr
- (void *)vm_pages
);
1656 vm_page_zone_pages
= atop(round_page((vm_offset_t
)vm_page_array_zone_data_size
));
1657 z
->z_wired_cur
+= vm_page_zone_pages
;
1658 z
->z_wired_hwm
= z
->z_wired_cur
;
1659 z
->z_va_cur
= z
->z_wired_cur
;
1660 /* since zone accounts for these, take them out of stolen */
1661 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages
);
1666 * Create the vm_pages zone. This is used for the vm_page structures for the pages
1667 * that are scavanged from other boot time usages by ml_static_mfree(). As such,
1668 * this needs to happen in early VM bootstrap.
1673 vm_page_module_init(void)
1675 vm_size_t vm_page_with_ppnum_size
;
1678 * Since the pointers to elements in this zone will be packed, they
1679 * must have appropriate size. Not strictly what sizeof() reports.
1681 vm_page_with_ppnum_size
=
1682 (sizeof(struct vm_page_with_ppnum
) + (VM_PAGE_PACKED_PTR_ALIGNMENT
- 1)) &
1683 ~(VM_PAGE_PACKED_PTR_ALIGNMENT
- 1);
1685 vm_page_zone
= zone_create_ext("vm pages", vm_page_with_ppnum_size
,
1686 ZC_NOGZALLOC
| ZC_ALIGNMENT_REQUIRED
, ZONE_ID_ANY
, ^(zone_t z
) {
1687 #if defined(__LP64__)
1688 zone_set_submap_idx(z
, Z_SUBMAP_IDX_VA_RESTRICTED
);
1691 * The number "10" is a small number that is larger than the number
1692 * of fictitious pages that any single caller will attempt to allocate
1695 * The largest such number at the moment is kernel_memory_allocate()
1696 * when 2 guard pages are asked. 10 is simply a somewhat larger number,
1697 * taking into account the 50% hysteresis the zone allocator uses.
1699 * Note: this works at all because the zone allocator
1700 * doesn't ever allocate fictitious pages.
1702 z
->z_elems_rsv
= 10;
1705 STARTUP(ZALLOC
, STARTUP_RANK_SECOND
, vm_page_module_init
);
1708 * Routine: vm_page_create
1710 * After the VM system is up, machine-dependent code
1711 * may stumble across more physical memory. For example,
1712 * memory that it was reserving for a frame buffer.
1713 * vm_page_create turns this memory into available pages.
1724 for (phys_page
= start
;
1727 m
= vm_page_grab_fictitious_common(phys_page
, TRUE
);
1728 m
->vmp_fictitious
= FALSE
;
1729 pmap_clear_noencrypt(phys_page
);
1731 lck_mtx_lock(&vm_page_queue_free_lock
);
1733 lck_mtx_unlock(&vm_page_queue_free_lock
);
1734 vm_page_release(m
, FALSE
);
1738 #if defined(__arm64__)
1740 * Like vm_page_create(), except we want to immediately retire the page,
1741 * not put it on the free list.
1744 vm_page_create_retired(
1749 m
= vm_page_grab_fictitious_common(phys_page
, TRUE
);
1750 m
->vmp_fictitious
= FALSE
;
1751 pmap_clear_noencrypt(phys_page
);
1752 m
->vmp_error
= true;
1753 m
->vmp_unusual
= true;
1754 vm_page_lock_queues();
1755 m
->vmp_q_state
= VM_PAGE_IS_WIRED
;
1756 m
->vmp_wire_count
++;
1757 vm_page_unlock_queues();
1759 lck_mtx_lock(&vm_page_queue_free_lock
);
1761 lck_mtx_unlock(&vm_page_queue_free_lock
);
1763 vm_object_lock(retired_pages_object
);
1764 vm_page_insert_wired(m
, retired_pages_object
, ptoa(VM_PAGE_GET_PHYS_PAGE(m
)), VM_KERN_MEMORY_RETIRED
);
1765 vm_object_unlock(retired_pages_object
);
1766 pmap_retire_page(VM_PAGE_GET_PHYS_PAGE(m
));
1768 #endif /* defined(__arm64__) */
1773 * Distributes the object/offset key pair among hash buckets.
1775 * NOTE: The bucket count must be a power of 2
1777 #define vm_page_hash(object, offset) (\
1778 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1779 & vm_page_hash_mask)
1783 * vm_page_insert: [ internal use only ]
1785 * Inserts the given mem entry into the object/object-page
1786 * table and object list.
1788 * The object must be locked.
1794 vm_object_offset_t offset
)
1796 vm_page_insert_internal(mem
, object
, offset
, VM_KERN_MEMORY_NONE
, FALSE
, TRUE
, FALSE
, FALSE
, NULL
);
1800 vm_page_insert_wired(
1803 vm_object_offset_t offset
,
1806 vm_page_insert_internal(mem
, object
, offset
, tag
, FALSE
, TRUE
, FALSE
, FALSE
, NULL
);
1810 vm_page_insert_internal(
1813 vm_object_offset_t offset
,
1815 boolean_t queues_lock_held
,
1816 boolean_t insert_in_hash
,
1817 boolean_t batch_pmap_op
,
1818 boolean_t batch_accounting
,
1819 uint64_t *delayed_ledger_update
)
1821 vm_page_bucket_t
*bucket
;
1822 lck_spin_t
*bucket_lock
;
1825 int ledger_idx_volatile
;
1826 int ledger_idx_nonvolatile
;
1827 int ledger_idx_volatile_compressed
;
1828 int ledger_idx_nonvolatile_compressed
;
1829 boolean_t do_footprint
;
1833 * we may not hold the page queue lock
1834 * so this check isn't safe to make
1839 assertf(page_aligned(offset
), "0x%llx\n", offset
);
1841 assert(!VM_PAGE_WIRED(mem
) || mem
->vmp_private
|| mem
->vmp_fictitious
|| (tag
!= VM_KERN_MEMORY_NONE
));
1843 /* the vm_submap_object is only a placeholder for submaps */
1844 assert(object
!= vm_submap_object
);
1846 vm_object_lock_assert_exclusive(object
);
1847 LCK_MTX_ASSERT(&vm_page_queue_lock
,
1848 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1849 : LCK_MTX_ASSERT_NOTOWNED
);
1851 if (queues_lock_held
== FALSE
) {
1852 assert(!VM_PAGE_PAGEABLE(mem
));
1855 if (insert_in_hash
== TRUE
) {
1856 #if DEBUG || VM_PAGE_BUCKETS_CHECK
1857 if (mem
->vmp_tabled
|| mem
->vmp_object
) {
1858 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1859 "already in (obj=%p,off=0x%llx)",
1860 mem
, object
, offset
, VM_PAGE_OBJECT(mem
), mem
->vmp_offset
);
1863 if (object
->internal
&& (offset
>= object
->vo_size
)) {
1864 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1865 mem
, object
, offset
, object
->vo_size
);
1868 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1871 * Record the object/offset pair in this page
1874 mem
->vmp_object
= VM_PAGE_PACK_OBJECT(object
);
1875 mem
->vmp_offset
= offset
;
1877 #if CONFIG_SECLUDED_MEMORY
1878 if (object
->eligible_for_secluded
) {
1879 vm_page_secluded
.eligible_for_secluded
++;
1881 #endif /* CONFIG_SECLUDED_MEMORY */
1884 * Insert it into the object_object/offset hash table
1886 hash_id
= vm_page_hash(object
, offset
);
1887 bucket
= &vm_page_buckets
[hash_id
];
1888 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1890 lck_spin_lock_grp(bucket_lock
, &vm_page_lck_grp_bucket
);
1892 mem
->vmp_next_m
= bucket
->page_list
;
1893 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1894 assert(mem
== (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
)));
1896 #if MACH_PAGE_HASH_STATS
1897 if (++bucket
->cur_count
> bucket
->hi_count
) {
1898 bucket
->hi_count
= bucket
->cur_count
;
1900 #endif /* MACH_PAGE_HASH_STATS */
1901 mem
->vmp_hashed
= TRUE
;
1902 lck_spin_unlock(bucket_lock
);
1906 unsigned int cache_attr
;
1908 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1910 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1911 PMAP_SET_CACHE_ATTR(mem
, object
, cache_attr
, batch_pmap_op
);
1915 * Now link into the object's list of backed pages.
1917 vm_page_queue_enter(&object
->memq
, mem
, vmp_listq
);
1918 object
->memq_hint
= mem
;
1919 mem
->vmp_tabled
= TRUE
;
1922 * Show that the object has one more resident page.
1925 object
->resident_page_count
++;
1926 if (VM_PAGE_WIRED(mem
)) {
1927 assert(mem
->vmp_wire_count
> 0);
1928 VM_OBJECT_WIRED_PAGE_UPDATE_START(object
);
1929 VM_OBJECT_WIRED_PAGE_ADD(object
, mem
);
1930 VM_OBJECT_WIRED_PAGE_UPDATE_END(object
, tag
);
1932 assert(object
->resident_page_count
>= object
->wired_page_count
);
1934 #if DEVELOPMENT || DEBUG
1935 if (object
->object_is_shared_cache
&&
1936 object
->pager
!= NULL
&&
1937 object
->pager
->mo_pager_ops
== &shared_region_pager_ops
) {
1939 assert(!object
->internal
);
1940 new = OSAddAtomic(+1, &shared_region_pagers_resident_count
);
1942 old
= shared_region_pagers_resident_peak
;
1943 } while (old
< new &&
1944 !OSCompareAndSwap(old
, new, &shared_region_pagers_resident_peak
));
1946 #endif /* DEVELOPMENT || DEBUG */
1948 if (batch_accounting
== FALSE
) {
1949 if (object
->internal
) {
1950 OSAddAtomic(1, &vm_page_internal_count
);
1952 OSAddAtomic(1, &vm_page_external_count
);
1957 * It wouldn't make sense to insert a "reusable" page in
1958 * an object (the page would have been marked "reusable" only
1959 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1960 * in the object at that time).
1961 * But a page could be inserted in a "all_reusable" object, if
1962 * something faults it in (a vm_read() from another task or a
1963 * "use-after-free" issue in user space, for example). It can
1964 * also happen if we're relocating a page from that object to
1965 * a different physical page during a physically-contiguous
1968 assert(!mem
->vmp_reusable
);
1969 if (object
->all_reusable
) {
1970 OSAddAtomic(+1, &vm_page_stats_reusable
.reusable_count
);
1973 if (object
->purgable
== VM_PURGABLE_DENY
&&
1974 !object
->vo_ledger_tag
) {
1977 owner
= VM_OBJECT_OWNER(object
);
1978 vm_object_ledger_tag_ledgers(object
,
1979 &ledger_idx_volatile
,
1980 &ledger_idx_nonvolatile
,
1981 &ledger_idx_volatile_compressed
,
1982 &ledger_idx_nonvolatile_compressed
,
1986 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1987 object
->purgable
== VM_PURGABLE_DENY
||
1988 VM_PAGE_WIRED(mem
))) {
1989 if (delayed_ledger_update
) {
1990 *delayed_ledger_update
+= PAGE_SIZE
;
1992 /* more non-volatile bytes */
1993 ledger_credit(owner
->ledger
,
1994 ledger_idx_nonvolatile
,
1997 /* more footprint */
1998 ledger_credit(owner
->ledger
,
1999 task_ledgers
.phys_footprint
,
2004 (object
->purgable
== VM_PURGABLE_VOLATILE
||
2005 object
->purgable
== VM_PURGABLE_EMPTY
)) {
2006 assert(!VM_PAGE_WIRED(mem
));
2007 /* more volatile bytes */
2008 ledger_credit(owner
->ledger
,
2009 ledger_idx_volatile
,
2013 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
2014 if (VM_PAGE_WIRED(mem
)) {
2015 OSAddAtomic(+1, &vm_page_purgeable_wired_count
);
2017 OSAddAtomic(+1, &vm_page_purgeable_count
);
2019 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
2020 mem
->vmp_q_state
== VM_PAGE_ON_THROTTLED_Q
) {
2022 * This page belongs to a purged VM object but hasn't
2023 * been purged (because it was "busy").
2024 * It's in the "throttled" queue and hence not
2025 * visible to vm_pageout_scan(). Move it to a pageable
2026 * queue, so that it can eventually be reclaimed, instead
2027 * of lingering in the "empty" object.
2029 if (queues_lock_held
== FALSE
) {
2030 vm_page_lockspin_queues();
2032 vm_page_deactivate(mem
);
2033 if (queues_lock_held
== FALSE
) {
2034 vm_page_unlock_queues();
2038 #if VM_OBJECT_TRACKING_OP_MODIFIED
2039 if (vm_object_tracking_inited
&&
2041 object
->resident_page_count
== 0 &&
2042 object
->pager
== NULL
&&
2043 object
->shadow
!= NULL
&&
2044 object
->shadow
->copy
== object
) {
2045 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
2048 numsaved
= OSBacktrace(bt
, VM_OBJECT_TRACKING_BTDEPTH
);
2049 btlog_add_entry(vm_object_tracking_btlog
,
2051 VM_OBJECT_TRACKING_OP_MODIFIED
,
2055 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
2061 * Exactly like vm_page_insert, except that we first
2062 * remove any existing page at the given offset in object.
2064 * The object must be locked.
2070 vm_object_offset_t offset
)
2072 vm_page_bucket_t
*bucket
;
2073 vm_page_t found_m
= VM_PAGE_NULL
;
2074 lck_spin_t
*bucket_lock
;
2079 * we don't hold the page queue lock
2080 * so this check isn't safe to make
2084 vm_object_lock_assert_exclusive(object
);
2085 #if DEBUG || VM_PAGE_BUCKETS_CHECK
2086 if (mem
->vmp_tabled
|| mem
->vmp_object
) {
2087 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
2088 "already in (obj=%p,off=0x%llx)",
2089 mem
, object
, offset
, VM_PAGE_OBJECT(mem
), mem
->vmp_offset
);
2092 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
2094 assert(!VM_PAGE_PAGEABLE(mem
));
2097 * Record the object/offset pair in this page
2099 mem
->vmp_object
= VM_PAGE_PACK_OBJECT(object
);
2100 mem
->vmp_offset
= offset
;
2103 * Insert it into the object_object/offset hash table,
2104 * replacing any page that might have been there.
2107 hash_id
= vm_page_hash(object
, offset
);
2108 bucket
= &vm_page_buckets
[hash_id
];
2109 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
2111 lck_spin_lock_grp(bucket_lock
, &vm_page_lck_grp_bucket
);
2113 if (bucket
->page_list
) {
2114 vm_page_packed_t
*mp
= &bucket
->page_list
;
2115 vm_page_t m
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(*mp
));
2119 * compare packed object pointers
2121 if (m
->vmp_object
== mem
->vmp_object
&& m
->vmp_offset
== offset
) {
2123 * Remove old page from hash list
2125 *mp
= m
->vmp_next_m
;
2126 m
->vmp_hashed
= FALSE
;
2127 m
->vmp_next_m
= VM_PAGE_PACK_PTR(NULL
);
2132 mp
= &m
->vmp_next_m
;
2133 } while ((m
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(*mp
))));
2135 mem
->vmp_next_m
= bucket
->page_list
;
2137 mem
->vmp_next_m
= VM_PAGE_PACK_PTR(NULL
);
2140 * insert new page at head of hash list
2142 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
2143 mem
->vmp_hashed
= TRUE
;
2145 lck_spin_unlock(bucket_lock
);
2149 * there was already a page at the specified
2150 * offset for this object... remove it from
2151 * the object and free it back to the free list
2153 vm_page_free_unlocked(found_m
, FALSE
);
2155 vm_page_insert_internal(mem
, object
, offset
, VM_KERN_MEMORY_NONE
, FALSE
, FALSE
, FALSE
, FALSE
, NULL
);
2159 * vm_page_remove: [ internal use only ]
2161 * Removes the given mem entry from the object/offset-page
2162 * table and the object page list.
2164 * The object must be locked.
2170 boolean_t remove_from_hash
)
2172 vm_page_bucket_t
*bucket
;
2174 lck_spin_t
*bucket_lock
;
2177 vm_object_t m_object
;
2178 int ledger_idx_volatile
;
2179 int ledger_idx_nonvolatile
;
2180 int ledger_idx_volatile_compressed
;
2181 int ledger_idx_nonvolatile_compressed
;
2184 m_object
= VM_PAGE_OBJECT(mem
);
2186 vm_object_lock_assert_exclusive(m_object
);
2187 assert(mem
->vmp_tabled
);
2188 assert(!mem
->vmp_cleaning
);
2189 assert(!mem
->vmp_laundry
);
2191 if (VM_PAGE_PAGEABLE(mem
)) {
2192 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2196 * we don't hold the page queue lock
2197 * so this check isn't safe to make
2201 if (remove_from_hash
== TRUE
) {
2203 * Remove from the object_object/offset hash table
2205 hash_id
= vm_page_hash(m_object
, mem
->vmp_offset
);
2206 bucket
= &vm_page_buckets
[hash_id
];
2207 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
2209 lck_spin_lock_grp(bucket_lock
, &vm_page_lck_grp_bucket
);
2211 if ((this = (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
))) == mem
) {
2212 /* optimize for common case */
2214 bucket
->page_list
= mem
->vmp_next_m
;
2216 vm_page_packed_t
*prev
;
2218 for (prev
= &this->vmp_next_m
;
2219 (this = (vm_page_t
)(VM_PAGE_UNPACK_PTR(*prev
))) != mem
;
2220 prev
= &this->vmp_next_m
) {
2223 *prev
= this->vmp_next_m
;
2225 #if MACH_PAGE_HASH_STATS
2226 bucket
->cur_count
--;
2227 #endif /* MACH_PAGE_HASH_STATS */
2228 mem
->vmp_hashed
= FALSE
;
2229 this->vmp_next_m
= VM_PAGE_PACK_PTR(NULL
);
2230 lck_spin_unlock(bucket_lock
);
2233 * Now remove from the object's list of backed pages.
2236 vm_page_remove_internal(mem
);
2239 * And show that the object has one fewer resident
2243 assert(m_object
->resident_page_count
> 0);
2244 m_object
->resident_page_count
--;
2246 #if DEVELOPMENT || DEBUG
2247 if (m_object
->object_is_shared_cache
&&
2248 m_object
->pager
!= NULL
&&
2249 m_object
->pager
->mo_pager_ops
== &shared_region_pager_ops
) {
2250 assert(!m_object
->internal
);
2251 OSAddAtomic(-1, &shared_region_pagers_resident_count
);
2253 #endif /* DEVELOPMENT || DEBUG */
2255 if (m_object
->internal
) {
2257 assert(vm_page_internal_count
);
2260 OSAddAtomic(-1, &vm_page_internal_count
);
2262 assert(vm_page_external_count
);
2263 OSAddAtomic(-1, &vm_page_external_count
);
2265 if (mem
->vmp_xpmapped
) {
2266 assert(vm_page_xpmapped_external_count
);
2267 OSAddAtomic(-1, &vm_page_xpmapped_external_count
);
2270 if (!m_object
->internal
&&
2271 m_object
->cached_list
.next
&&
2272 m_object
->cached_list
.prev
) {
2273 if (m_object
->resident_page_count
== 0) {
2274 vm_object_cache_remove(m_object
);
2278 if (VM_PAGE_WIRED(mem
)) {
2279 assert(mem
->vmp_wire_count
> 0);
2280 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object
);
2281 VM_OBJECT_WIRED_PAGE_REMOVE(m_object
, mem
);
2282 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object
, m_object
->wire_tag
);
2284 assert(m_object
->resident_page_count
>=
2285 m_object
->wired_page_count
);
2286 if (mem
->vmp_reusable
) {
2287 assert(m_object
->reusable_page_count
> 0);
2288 m_object
->reusable_page_count
--;
2289 assert(m_object
->reusable_page_count
<=
2290 m_object
->resident_page_count
);
2291 mem
->vmp_reusable
= FALSE
;
2292 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
2293 vm_page_stats_reusable
.reused_remove
++;
2294 } else if (m_object
->all_reusable
) {
2295 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
2296 vm_page_stats_reusable
.reused_remove
++;
2299 if (m_object
->purgable
== VM_PURGABLE_DENY
&&
2300 !m_object
->vo_ledger_tag
) {
2303 owner
= VM_OBJECT_OWNER(m_object
);
2304 vm_object_ledger_tag_ledgers(m_object
,
2305 &ledger_idx_volatile
,
2306 &ledger_idx_nonvolatile
,
2307 &ledger_idx_volatile_compressed
,
2308 &ledger_idx_nonvolatile_compressed
,
2312 (m_object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2313 m_object
->purgable
== VM_PURGABLE_DENY
||
2314 VM_PAGE_WIRED(mem
))) {
2315 /* less non-volatile bytes */
2316 ledger_debit(owner
->ledger
,
2317 ledger_idx_nonvolatile
,
2320 /* less footprint */
2321 ledger_debit(owner
->ledger
,
2322 task_ledgers
.phys_footprint
,
2326 (m_object
->purgable
== VM_PURGABLE_VOLATILE
||
2327 m_object
->purgable
== VM_PURGABLE_EMPTY
)) {
2328 assert(!VM_PAGE_WIRED(mem
));
2329 /* less volatile bytes */
2330 ledger_debit(owner
->ledger
,
2331 ledger_idx_volatile
,
2334 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
) {
2335 if (VM_PAGE_WIRED(mem
)) {
2336 assert(vm_page_purgeable_wired_count
> 0);
2337 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2339 assert(vm_page_purgeable_count
> 0);
2340 OSAddAtomic(-1, &vm_page_purgeable_count
);
2344 if (m_object
->set_cache_attr
== TRUE
) {
2345 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem
), 0);
2348 mem
->vmp_tabled
= FALSE
;
2349 mem
->vmp_object
= 0;
2350 mem
->vmp_offset
= (vm_object_offset_t
) -1;
2357 * Returns the page associated with the object/offset
2358 * pair specified; if none is found, VM_PAGE_NULL is returned.
2360 * The object must be locked. No side effects.
2363 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
2365 #if DEBUG_VM_PAGE_LOOKUP
2369 uint64_t vpl_empty_obj
;
2370 uint64_t vpl_bucket_NULL
;
2371 uint64_t vpl_hit_hint
;
2372 uint64_t vpl_hit_hint_next
;
2373 uint64_t vpl_hit_hint_prev
;
2379 uint64_t vpl_fast_elapsed
;
2380 uint64_t vpl_slow_elapsed
;
2381 } vm_page_lookup_stats
__attribute__((aligned(8)));
2385 #define KDP_VM_PAGE_WALK_MAX 1000
2390 vm_object_offset_t offset
)
2393 int num_traversed
= 0;
2396 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
2399 vm_page_queue_iterate(&object
->memq
, cur_page
, vmp_listq
) {
2400 if (cur_page
->vmp_offset
== offset
) {
2405 if (num_traversed
>= KDP_VM_PAGE_WALK_MAX
) {
2406 return VM_PAGE_NULL
;
2410 return VM_PAGE_NULL
;
2416 vm_object_offset_t offset
)
2419 vm_page_bucket_t
*bucket
;
2420 vm_page_queue_entry_t qe
;
2421 lck_spin_t
*bucket_lock
= NULL
;
2423 #if DEBUG_VM_PAGE_LOOKUP
2424 uint64_t start
, elapsed
;
2426 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_total
);
2428 vm_object_lock_assert_held(object
);
2429 assertf(page_aligned(offset
), "offset 0x%llx\n", offset
);
2431 if (object
->resident_page_count
== 0) {
2432 #if DEBUG_VM_PAGE_LOOKUP
2433 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_empty_obj
);
2435 return VM_PAGE_NULL
;
2438 mem
= object
->memq_hint
;
2440 if (mem
!= VM_PAGE_NULL
) {
2441 assert(VM_PAGE_OBJECT(mem
) == object
);
2443 if (mem
->vmp_offset
== offset
) {
2444 #if DEBUG_VM_PAGE_LOOKUP
2445 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint
);
2449 qe
= (vm_page_queue_entry_t
)vm_page_queue_next(&mem
->vmp_listq
);
2451 if (!vm_page_queue_end(&object
->memq
, qe
)) {
2452 vm_page_t next_page
;
2454 next_page
= (vm_page_t
)((uintptr_t)qe
);
2455 assert(VM_PAGE_OBJECT(next_page
) == object
);
2457 if (next_page
->vmp_offset
== offset
) {
2458 object
->memq_hint
= next_page
; /* new hint */
2459 #if DEBUG_VM_PAGE_LOOKUP
2460 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint_next
);
2465 qe
= (vm_page_queue_entry_t
)vm_page_queue_prev(&mem
->vmp_listq
);
2467 if (!vm_page_queue_end(&object
->memq
, qe
)) {
2468 vm_page_t prev_page
;
2470 prev_page
= (vm_page_t
)((uintptr_t)qe
);
2471 assert(VM_PAGE_OBJECT(prev_page
) == object
);
2473 if (prev_page
->vmp_offset
== offset
) {
2474 object
->memq_hint
= prev_page
; /* new hint */
2475 #if DEBUG_VM_PAGE_LOOKUP
2476 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint_prev
);
2483 * Search the hash table for this object/offset pair
2485 hash_id
= vm_page_hash(object
, offset
);
2486 bucket
= &vm_page_buckets
[hash_id
];
2489 * since we hold the object lock, we are guaranteed that no
2490 * new pages can be inserted into this object... this in turn
2491 * guarantess that the page we're looking for can't exist
2492 * if the bucket it hashes to is currently NULL even when looked
2493 * at outside the scope of the hash bucket lock... this is a
2494 * really cheap optimiztion to avoid taking the lock
2496 if (!bucket
->page_list
) {
2497 #if DEBUG_VM_PAGE_LOOKUP
2498 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_bucket_NULL
);
2500 return VM_PAGE_NULL
;
2503 #if DEBUG_VM_PAGE_LOOKUP
2504 start
= mach_absolute_time();
2506 if (object
->resident_page_count
<= VM_PAGE_HASH_LOOKUP_THRESHOLD
) {
2508 * on average, it's roughly 3 times faster to run a short memq list
2509 * than to take the spin lock and go through the hash list
2511 mem
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
2513 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
)mem
)) {
2514 if (mem
->vmp_offset
== offset
) {
2518 mem
= (vm_page_t
)vm_page_queue_next(&mem
->vmp_listq
);
2520 if (vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
)mem
)) {
2524 vm_page_object_t packed_object
;
2526 packed_object
= VM_PAGE_PACK_OBJECT(object
);
2528 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
2530 lck_spin_lock_grp(bucket_lock
, &vm_page_lck_grp_bucket
);
2532 for (mem
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
));
2533 mem
!= VM_PAGE_NULL
;
2534 mem
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(mem
->vmp_next_m
))) {
2537 * we don't hold the page queue lock
2538 * so this check isn't safe to make
2542 if ((mem
->vmp_object
== packed_object
) && (mem
->vmp_offset
== offset
)) {
2546 lck_spin_unlock(bucket_lock
);
2549 #if DEBUG_VM_PAGE_LOOKUP
2550 elapsed
= mach_absolute_time() - start
;
2553 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_slow
);
2554 OSAddAtomic64(elapsed
, &vm_page_lookup_stats
.vpl_slow_elapsed
);
2556 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_fast
);
2557 OSAddAtomic64(elapsed
, &vm_page_lookup_stats
.vpl_fast_elapsed
);
2559 if (mem
!= VM_PAGE_NULL
) {
2560 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit
);
2562 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_miss
);
2565 if (mem
!= VM_PAGE_NULL
) {
2566 assert(VM_PAGE_OBJECT(mem
) == object
);
2568 object
->memq_hint
= mem
;
2577 * Move the given memory entry from its
2578 * current object to the specified target object/offset.
2580 * The object must be locked.
2585 vm_object_t new_object
,
2586 vm_object_offset_t new_offset
)
2588 boolean_t internal_to_external
, external_to_internal
;
2590 vm_object_t m_object
;
2592 m_object
= VM_PAGE_OBJECT(mem
);
2594 assert(m_object
!= new_object
);
2598 * Changes to mem->vmp_object require the page lock because
2599 * the pageout daemon uses that lock to get the object.
2601 vm_page_lockspin_queues();
2603 internal_to_external
= FALSE
;
2604 external_to_internal
= FALSE
;
2606 if (mem
->vmp_q_state
== VM_PAGE_ON_ACTIVE_LOCAL_Q
) {
2608 * it's much easier to get the vm_page_pageable_xxx accounting correct
2609 * if we first move the page to the active queue... it's going to end
2610 * up there anyway, and we don't do vm_page_rename's frequently enough
2611 * for this to matter.
2613 vm_page_queues_remove(mem
, FALSE
);
2614 vm_page_activate(mem
);
2616 if (VM_PAGE_PAGEABLE(mem
)) {
2617 if (m_object
->internal
&& !new_object
->internal
) {
2618 internal_to_external
= TRUE
;
2620 if (!m_object
->internal
&& new_object
->internal
) {
2621 external_to_internal
= TRUE
;
2625 tag
= m_object
->wire_tag
;
2626 vm_page_remove(mem
, TRUE
);
2627 vm_page_insert_internal(mem
, new_object
, new_offset
, tag
, TRUE
, TRUE
, FALSE
, FALSE
, NULL
);
2629 if (internal_to_external
) {
2630 vm_page_pageable_internal_count
--;
2631 vm_page_pageable_external_count
++;
2632 } else if (external_to_internal
) {
2633 vm_page_pageable_external_count
--;
2634 vm_page_pageable_internal_count
++;
2637 vm_page_unlock_queues();
2643 * Initialize the fields in a new page.
2644 * This takes a structure with random values and initializes it
2645 * so that it can be given to vm_page_release or vm_page_insert.
2659 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
2660 if (!(pmap_valid_page(phys_page
))) {
2661 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
2667 * Initialize the fields of the vm_page. If adding any new fields to vm_page,
2668 * try to use initial values which match 0. This minimizes the number of writes
2669 * needed for boot-time initialization.
2671 * Kernel bzero() isn't an inline yet, so do it by hand for performance.
2673 assert(VM_PAGE_NOT_ON_Q
== 0);
2674 assert(sizeof(*mem
) % sizeof(uintptr_t) == 0);
2675 for (p
= (uintptr_t *)(void *)mem
, i
= sizeof(*mem
) / sizeof(uintptr_t); i
!= 0; --i
) {
2678 mem
->vmp_offset
= (vm_object_offset_t
)-1;
2679 mem
->vmp_busy
= TRUE
;
2680 mem
->vmp_lopage
= lopage
;
2682 VM_PAGE_SET_PHYS_PAGE(mem
, phys_page
);
2685 * we're leaving this turned off for now... currently pages
2686 * come off the free list and are either immediately dirtied/referenced
2687 * due to zero-fill or COW faults, or are used to read or write files...
2688 * in the file I/O case, the UPL mechanism takes care of clearing
2689 * the state of the HW ref/mod bits in a somewhat fragile way.
2690 * Since we may change the way this works in the future (to toughen it up),
2691 * I'm leaving this as a reminder of where these bits could get cleared
2695 * make sure both the h/w referenced and modified bits are
2696 * clear at this point... we are especially dependent on
2697 * not finding a 'stale' h/w modified in a number of spots
2698 * once this page goes back into use
2700 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
2705 * vm_page_grab_fictitious:
2707 * Remove a fictitious page from the free list.
2708 * Returns VM_PAGE_NULL if there are no free pages.
2712 vm_page_grab_fictitious_common(ppnum_t phys_addr
, boolean_t canwait
)
2716 m
= zalloc_flags(vm_page_zone
, canwait
? Z_WAITOK
: Z_NOWAIT
);
2718 vm_page_init(m
, phys_addr
, FALSE
);
2719 m
->vmp_fictitious
= TRUE
;
2725 vm_page_grab_fictitious(boolean_t canwait
)
2727 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
, canwait
);
2734 vm_page_grab_guard(boolean_t canwait
)
2737 page
= vm_page_grab_fictitious_common(vm_page_guard_addr
, canwait
);
2739 OSAddAtomic(1, &vm_guard_count
);
2746 * vm_page_release_fictitious:
2748 * Release a fictitious page to the zone pool
2751 vm_page_release_fictitious(
2754 assert((m
->vmp_q_state
== VM_PAGE_NOT_ON_Q
) || (m
->vmp_q_state
== VM_PAGE_IS_WIRED
));
2755 assert(m
->vmp_fictitious
);
2756 assert(VM_PAGE_GET_PHYS_PAGE(m
) == vm_page_fictitious_addr
||
2757 VM_PAGE_GET_PHYS_PAGE(m
) == vm_page_guard_addr
);
2760 if (VM_PAGE_GET_PHYS_PAGE(m
) == vm_page_guard_addr
) {
2761 OSAddAtomic(-1, &vm_guard_count
);
2764 zfree(vm_page_zone
, m
);
2770 * Return true if it is not likely that a non-vm_privileged thread
2771 * can get memory without blocking. Advisory only, since the
2772 * situation may change under us.
2777 /* No locking, at worst we will fib. */
2778 return vm_page_free_count
<= vm_page_free_reserved
;
2781 boolean_t vm_darkwake_mode
= FALSE
;
2784 * vm_update_darkwake_mode():
2786 * Tells the VM that the system is in / out of darkwake.
2788 * Today, the VM only lowers/raises the background queue target
2789 * so as to favor consuming more/less background pages when
2790 * darwake is ON/OFF.
2792 * We might need to do more things in the future.
2796 vm_update_darkwake_mode(boolean_t darkwake_mode
)
2798 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
2800 vm_page_lockspin_queues();
2802 if (vm_darkwake_mode
== darkwake_mode
) {
2806 vm_page_unlock_queues();
2810 vm_darkwake_mode
= darkwake_mode
;
2812 if (vm_darkwake_mode
== TRUE
) {
2813 #if CONFIG_BACKGROUND_QUEUE
2815 /* save background target to restore later */
2816 vm_page_background_target_snapshot
= vm_page_background_target
;
2818 /* target is set to 0...no protection for background pages */
2819 vm_page_background_target
= 0;
2821 #endif /* CONFIG_BACKGROUND_QUEUE */
2822 } else if (vm_darkwake_mode
== FALSE
) {
2823 #if CONFIG_BACKGROUND_QUEUE
2825 if (vm_page_background_target_snapshot
) {
2826 vm_page_background_target
= vm_page_background_target_snapshot
;
2828 #endif /* CONFIG_BACKGROUND_QUEUE */
2830 vm_page_unlock_queues();
2833 #if CONFIG_BACKGROUND_QUEUE
2836 vm_page_update_background_state(vm_page_t mem
)
2838 if (vm_page_background_mode
== VM_PAGE_BG_DISABLED
) {
2842 if (mem
->vmp_in_background
== FALSE
) {
2846 task_t my_task
= current_task();
2849 if (task_get_darkwake_mode(my_task
)) {
2854 #if BACKGROUNDQ_BASED_ON_QOS
2855 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS
) <= THREAD_QOS_LEGACY
) {
2860 if (proc_get_effective_task_policy(my_task
, TASK_POLICY_DARWIN_BG
)) {
2865 vm_page_lockspin_queues();
2867 mem
->vmp_in_background
= FALSE
;
2868 vm_page_background_promoted_count
++;
2870 vm_page_remove_from_backgroundq(mem
);
2872 vm_page_unlock_queues();
2877 vm_page_assign_background_state(vm_page_t mem
)
2879 if (vm_page_background_mode
== VM_PAGE_BG_DISABLED
) {
2883 task_t my_task
= current_task();
2886 if (task_get_darkwake_mode(my_task
)) {
2887 mem
->vmp_in_background
= TRUE
;
2892 #if BACKGROUNDQ_BASED_ON_QOS
2893 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS
) <= THREAD_QOS_LEGACY
) {
2894 mem
->vmp_in_background
= TRUE
;
2896 mem
->vmp_in_background
= FALSE
;
2900 mem
->vmp_in_background
= proc_get_effective_task_policy(my_task
, TASK_POLICY_DARWIN_BG
);
2907 vm_page_remove_from_backgroundq(
2910 vm_object_t m_object
;
2912 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2914 if (mem
->vmp_on_backgroundq
) {
2915 vm_page_queue_remove(&vm_page_queue_background
, mem
, vmp_backgroundq
);
2917 mem
->vmp_backgroundq
.next
= 0;
2918 mem
->vmp_backgroundq
.prev
= 0;
2919 mem
->vmp_on_backgroundq
= FALSE
;
2921 vm_page_background_count
--;
2923 m_object
= VM_PAGE_OBJECT(mem
);
2925 if (m_object
->internal
) {
2926 vm_page_background_internal_count
--;
2928 vm_page_background_external_count
--;
2931 assert(VM_PAGE_UNPACK_PTR(mem
->vmp_backgroundq
.next
) == (uintptr_t)NULL
&&
2932 VM_PAGE_UNPACK_PTR(mem
->vmp_backgroundq
.prev
) == (uintptr_t)NULL
);
2938 vm_page_add_to_backgroundq(
2942 vm_object_t m_object
;
2944 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2946 if (vm_page_background_mode
== VM_PAGE_BG_DISABLED
) {
2950 if (mem
->vmp_on_backgroundq
== FALSE
) {
2951 m_object
= VM_PAGE_OBJECT(mem
);
2953 if (vm_page_background_exclude_external
&& !m_object
->internal
) {
2957 if (first
== TRUE
) {
2958 vm_page_queue_enter_first(&vm_page_queue_background
, mem
, vmp_backgroundq
);
2960 vm_page_queue_enter(&vm_page_queue_background
, mem
, vmp_backgroundq
);
2962 mem
->vmp_on_backgroundq
= TRUE
;
2964 vm_page_background_count
++;
2966 if (m_object
->internal
) {
2967 vm_page_background_internal_count
++;
2969 vm_page_background_external_count
++;
2974 #endif /* CONFIG_BACKGROUND_QUEUE */
2977 * This can be switched to FALSE to help debug drivers
2978 * that are having problems with memory > 4G.
2980 boolean_t vm_himemory_mode
= TRUE
;
2983 * this interface exists to support hardware controllers
2984 * incapable of generating DMAs with more than 32 bits
2985 * of address on platforms with physical memory > 4G...
2987 unsigned int vm_lopages_allocated_q
= 0;
2988 unsigned int vm_lopages_allocated_cpm_success
= 0;
2989 unsigned int vm_lopages_allocated_cpm_failed
= 0;
2990 vm_page_queue_head_t vm_lopage_queue_free VM_PAGE_PACKED_ALIGNED
;
2993 vm_page_grablo(void)
2997 if (vm_lopage_needed
== FALSE
) {
2998 return vm_page_grab();
3001 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3003 if (!vm_page_queue_empty(&vm_lopage_queue_free
)) {
3004 vm_page_queue_remove_first(&vm_lopage_queue_free
, mem
, vmp_pageq
);
3005 assert(vm_lopage_free_count
);
3006 assert(mem
->vmp_q_state
== VM_PAGE_ON_FREE_LOPAGE_Q
);
3007 mem
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
3009 vm_lopage_free_count
--;
3010 vm_lopages_allocated_q
++;
3012 if (vm_lopage_free_count
< vm_lopage_lowater
) {
3013 vm_lopage_refill
= TRUE
;
3016 lck_mtx_unlock(&vm_page_queue_free_lock
);
3018 #if CONFIG_BACKGROUND_QUEUE
3019 vm_page_assign_background_state(mem
);
3022 lck_mtx_unlock(&vm_page_queue_free_lock
);
3024 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(PPNUM_MAX
), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
3025 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3026 vm_lopages_allocated_cpm_failed
++;
3027 lck_mtx_unlock(&vm_page_queue_free_lock
);
3029 return VM_PAGE_NULL
;
3031 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
3033 mem
->vmp_busy
= TRUE
;
3035 vm_page_lockspin_queues();
3037 mem
->vmp_gobbled
= FALSE
;
3038 vm_page_gobble_count
--;
3039 vm_page_wire_count
--;
3041 vm_lopages_allocated_cpm_success
++;
3042 vm_page_unlock_queues();
3044 assert(mem
->vmp_busy
);
3045 assert(!mem
->vmp_pmapped
);
3046 assert(!mem
->vmp_wpmapped
);
3047 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
)));
3049 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
3051 counter_inc(&vm_page_grab_count
);
3052 VM_DEBUG_EVENT(vm_page_grab
, VM_PAGE_GRAB
, DBG_FUNC_NONE
, 0, 1, 0, 0);
3060 * first try to grab a page from the per-cpu free list...
3061 * this must be done while pre-emption is disabled... if
3062 * a page is available, we're done...
3063 * if no page is available, grab the vm_page_queue_free_lock
3064 * and see if current number of free pages would allow us
3065 * to grab at least 1... if not, return VM_PAGE_NULL as before...
3066 * if there are pages available, disable preemption and
3067 * recheck the state of the per-cpu free list... we could
3068 * have been preempted and moved to a different cpu, or
3069 * some other thread could have re-filled it... if still
3070 * empty, figure out how many pages we can steal from the
3071 * global free queue and move to the per-cpu queue...
3072 * return 1 of these pages when done... only wakeup the
3073 * pageout_scan thread if we moved pages from the global
3074 * list... no need for the wakeup if we've satisfied the
3075 * request from the per-cpu queue.
3078 #if CONFIG_SECLUDED_MEMORY
3079 vm_page_t
vm_page_grab_secluded(void);
3080 #endif /* CONFIG_SECLUDED_MEMORY */
3083 vm_page_grab_diags(void);
3088 return vm_page_grab_options(VM_PAGE_GRAB_OPTIONS_NONE
);
3092 boolean_t hibernate_rebuild_needed
= FALSE
;
3093 #endif /* HIBERNATION */
3096 vm_page_grab_options(
3101 disable_preemption();
3103 if ((mem
= *PERCPU_GET(free_pages
))) {
3104 return_page_from_cpu_list
:
3105 assert(mem
->vmp_q_state
== VM_PAGE_ON_FREE_LOCAL_Q
);
3108 if (hibernate_rebuild_needed
) {
3109 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__
, __LINE__
);
3111 #endif /* HIBERNATION */
3113 vm_page_grab_diags();
3115 vm_offset_t pcpu_base
= current_percpu_base();
3116 counter_inc_preemption_disabled(&vm_page_grab_count
);
3117 *PERCPU_GET_WITH_BASE(pcpu_base
, free_pages
) = mem
->vmp_snext
;
3118 VM_DEBUG_EVENT(vm_page_grab
, VM_PAGE_GRAB
, DBG_FUNC_NONE
, grab_options
, 0, 0, 0);
3120 enable_preemption();
3121 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
3122 mem
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
3124 assert(mem
->vmp_listq
.next
== 0 && mem
->vmp_listq
.prev
== 0);
3125 assert(mem
->vmp_tabled
== FALSE
);
3126 assert(mem
->vmp_object
== 0);
3127 assert(!mem
->vmp_laundry
);
3128 ASSERT_PMAP_FREE(mem
);
3129 assert(mem
->vmp_busy
);
3130 assert(!mem
->vmp_pmapped
);
3131 assert(!mem
->vmp_wpmapped
);
3132 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
)));
3134 #if CONFIG_BACKGROUND_QUEUE
3135 vm_page_assign_background_state(mem
);
3139 enable_preemption();
3143 * Optionally produce warnings if the wire or gobble
3144 * counts exceed some threshold.
3146 #if VM_PAGE_WIRE_COUNT_WARNING
3147 if (vm_page_wire_count
>= VM_PAGE_WIRE_COUNT_WARNING
) {
3148 printf("mk: vm_page_grab(): high wired page count of %d\n",
3149 vm_page_wire_count
);
3152 #if VM_PAGE_GOBBLE_COUNT_WARNING
3153 if (vm_page_gobble_count
>= VM_PAGE_GOBBLE_COUNT_WARNING
) {
3154 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
3155 vm_page_gobble_count
);
3160 * If free count is low and we have delayed pages from early boot,
3161 * get one of those instead.
3163 if (__improbable(vm_delayed_count
> 0 &&
3164 vm_page_free_count
<= vm_page_free_target
&&
3165 (mem
= vm_get_delayed_page(grab_options
)) != NULL
)) {
3169 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3172 * Only let privileged threads (involved in pageout)
3173 * dip into the reserved pool.
3175 if ((vm_page_free_count
< vm_page_free_reserved
) &&
3176 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
3177 /* no page for us in the free queue... */
3178 lck_mtx_unlock(&vm_page_queue_free_lock
);
3181 #if CONFIG_SECLUDED_MEMORY
3182 /* ... but can we try and grab from the secluded queue? */
3183 if (vm_page_secluded_count
> 0 &&
3184 ((grab_options
& VM_PAGE_GRAB_SECLUDED
) ||
3185 task_can_use_secluded_mem(current_task(), TRUE
))) {
3186 mem
= vm_page_grab_secluded();
3187 if (grab_options
& VM_PAGE_GRAB_SECLUDED
) {
3188 vm_page_secluded
.grab_for_iokit
++;
3190 vm_page_secluded
.grab_for_iokit_success
++;
3194 VM_CHECK_MEMORYSTATUS
;
3196 vm_page_grab_diags();
3197 counter_inc(&vm_page_grab_count
);
3198 VM_DEBUG_EVENT(vm_page_grab
, VM_PAGE_GRAB
, DBG_FUNC_NONE
, grab_options
, 0, 0, 0);
3203 #else /* CONFIG_SECLUDED_MEMORY */
3204 (void) grab_options
;
3205 #endif /* CONFIG_SECLUDED_MEMORY */
3209 unsigned int pages_to_steal
;
3211 unsigned int clump_end
, sub_count
;
3213 while (vm_page_free_count
== 0) {
3214 lck_mtx_unlock(&vm_page_queue_free_lock
);
3216 * must be a privileged thread to be
3217 * in this state since a non-privileged
3218 * thread would have bailed if we were
3219 * under the vm_page_free_reserved mark
3222 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3225 disable_preemption();
3227 if ((mem
= *PERCPU_GET(free_pages
))) {
3228 lck_mtx_unlock(&vm_page_queue_free_lock
);
3231 * we got preempted and moved to another processor
3232 * or we got preempted and someone else ran and filled the cache
3234 goto return_page_from_cpu_list
;
3236 if (vm_page_free_count
<= vm_page_free_reserved
) {
3239 if (vm_free_magazine_refill_limit
<= (vm_page_free_count
- vm_page_free_reserved
)) {
3240 pages_to_steal
= vm_free_magazine_refill_limit
;
3242 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
3245 color
= *PERCPU_GET(start_color
);
3248 vm_page_free_count
-= pages_to_steal
;
3249 clump_end
= sub_count
= 0;
3251 while (pages_to_steal
--) {
3252 while (vm_page_queue_empty(&vm_page_queue_free
[color
].qhead
)) {
3253 color
= (color
+ 1) & vm_color_mask
;
3255 #if defined(__x86_64__)
3256 vm_page_queue_remove_first_with_clump(&vm_page_queue_free
[color
].qhead
,
3259 vm_page_queue_remove_first(&vm_page_queue_free
[color
].qhead
,
3263 assert(mem
->vmp_q_state
== VM_PAGE_ON_FREE_Q
);
3265 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
3267 #if defined(__arm__) || defined(__arm64__)
3268 color
= (color
+ 1) & vm_color_mask
;
3271 #if DEVELOPMENT || DEBUG
3275 vm_clump_update_stats(sub_count
);
3277 color
= (color
+ 1) & vm_color_mask
;
3281 color
= (color
+ 1) & vm_color_mask
;
3284 #endif /* if DEVELOPMENT || DEBUG */
3286 #endif /* if defined(__arm__) || defined(__arm64__) */
3291 tail
->vmp_snext
= mem
;
3295 assert(mem
->vmp_listq
.next
== 0 && mem
->vmp_listq
.prev
== 0);
3296 assert(mem
->vmp_tabled
== FALSE
);
3297 assert(mem
->vmp_object
== 0);
3298 assert(!mem
->vmp_laundry
);
3300 mem
->vmp_q_state
= VM_PAGE_ON_FREE_LOCAL_Q
;
3302 ASSERT_PMAP_FREE(mem
);
3303 assert(mem
->vmp_busy
);
3304 assert(!mem
->vmp_pmapped
);
3305 assert(!mem
->vmp_wpmapped
);
3306 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
)));
3308 #if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
3309 vm_clump_update_stats(sub_count
);
3311 lck_mtx_unlock(&vm_page_queue_free_lock
);
3314 if (hibernate_rebuild_needed
) {
3315 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__
, __LINE__
);
3317 #endif /* HIBERNATION */
3318 vm_offset_t pcpu_base
= current_percpu_base();
3319 *PERCPU_GET_WITH_BASE(pcpu_base
, free_pages
) = head
->vmp_snext
;
3320 *PERCPU_GET_WITH_BASE(pcpu_base
, start_color
) = color
;
3323 * satisfy this request
3325 vm_page_grab_diags();
3326 counter_inc_preemption_disabled(&vm_page_grab_count
);
3327 VM_DEBUG_EVENT(vm_page_grab
, VM_PAGE_GRAB
, DBG_FUNC_NONE
, grab_options
, 0, 0, 0);
3329 assert(mem
->vmp_q_state
== VM_PAGE_ON_FREE_LOCAL_Q
);
3331 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
3332 mem
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
3334 enable_preemption();
3337 * Decide if we should poke the pageout daemon.
3338 * We do this if the free count is less than the low
3339 * water mark. VM Pageout Scan will keep running till
3340 * the free_count > free_target (& hence above free_min).
3341 * This wakeup is to catch the possibility of the counts
3342 * dropping between VM Pageout Scan parking and this check.
3344 * We don't have the counts locked ... if they change a little,
3345 * it doesn't really matter.
3347 if (vm_page_free_count
< vm_page_free_min
) {
3348 lck_mtx_lock(&vm_page_queue_free_lock
);
3349 if (vm_pageout_running
== FALSE
) {
3350 lck_mtx_unlock(&vm_page_queue_free_lock
);
3351 thread_wakeup((event_t
) &vm_page_free_wanted
);
3353 lck_mtx_unlock(&vm_page_queue_free_lock
);
3357 VM_CHECK_MEMORYSTATUS
;
3360 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
3362 #if CONFIG_BACKGROUND_QUEUE
3363 vm_page_assign_background_state(mem
);
3369 #if CONFIG_SECLUDED_MEMORY
3371 vm_page_grab_secluded(void)
3377 if (vm_page_secluded_count
== 0) {
3378 /* no secluded pages to grab... */
3379 return VM_PAGE_NULL
;
3382 /* secluded queue is protected by the VM page queue lock */
3383 vm_page_lock_queues();
3385 if (vm_page_secluded_count
== 0) {
3386 /* no secluded pages to grab... */
3387 vm_page_unlock_queues();
3388 return VM_PAGE_NULL
;
3392 /* can we grab from the secluded queue? */
3393 if (vm_page_secluded_count
> vm_page_secluded_target
||
3394 (vm_page_secluded_count
> 0 &&
3395 task_can_use_secluded_mem(current_task(), TRUE
))) {
3398 /* can't grab from secluded queue... */
3399 vm_page_unlock_queues();
3400 return VM_PAGE_NULL
;
3404 /* we can grab a page from secluded queue! */
3405 assert((vm_page_secluded_count_free
+
3406 vm_page_secluded_count_inuse
) ==
3407 vm_page_secluded_count
);
3408 if (current_task()->task_can_use_secluded_mem
) {
3409 assert(num_tasks_can_use_secluded_mem
> 0);
3411 assert(!vm_page_queue_empty(&vm_page_queue_secluded
));
3412 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3413 mem
= (vm_page_t
)vm_page_queue_first(&vm_page_queue_secluded
);
3414 assert(mem
->vmp_q_state
== VM_PAGE_ON_SECLUDED_Q
);
3415 vm_page_queues_remove(mem
, TRUE
);
3417 object
= VM_PAGE_OBJECT(mem
);
3419 assert(!mem
->vmp_fictitious
);
3420 assert(!VM_PAGE_WIRED(mem
));
3421 if (object
== VM_OBJECT_NULL
) {
3422 /* free for grab! */
3423 vm_page_unlock_queues();
3424 vm_page_secluded
.grab_success_free
++;
3426 assert(mem
->vmp_busy
);
3427 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
3428 assert(VM_PAGE_OBJECT(mem
) == VM_OBJECT_NULL
);
3429 assert(mem
->vmp_pageq
.next
== 0);
3430 assert(mem
->vmp_pageq
.prev
== 0);
3431 assert(mem
->vmp_listq
.next
== 0);
3432 assert(mem
->vmp_listq
.prev
== 0);
3433 #if CONFIG_BACKGROUND_QUEUE
3434 assert(mem
->vmp_on_backgroundq
== 0);
3435 assert(mem
->vmp_backgroundq
.next
== 0);
3436 assert(mem
->vmp_backgroundq
.prev
== 0);
3437 #endif /* CONFIG_BACKGROUND_QUEUE */
3441 assert(!object
->internal
);
3442 // vm_page_pageable_external_count--;
3444 if (!vm_object_lock_try(object
)) {
3445 // printf("SECLUDED: page %p: object %p locked\n", mem, object);
3446 vm_page_secluded
.grab_failure_locked
++;
3447 reactivate_secluded_page
:
3448 vm_page_activate(mem
);
3449 vm_page_unlock_queues();
3450 return VM_PAGE_NULL
;
3452 if (mem
->vmp_busy
||
3453 mem
->vmp_cleaning
||
3455 /* can't steal page in this state... */
3456 vm_object_unlock(object
);
3457 vm_page_secluded
.grab_failure_state
++;
3458 goto reactivate_secluded_page
;
3461 mem
->vmp_busy
= TRUE
;
3462 refmod_state
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem
));
3463 if (refmod_state
& VM_MEM_REFERENCED
) {
3464 mem
->vmp_reference
= TRUE
;
3466 if (refmod_state
& VM_MEM_MODIFIED
) {
3467 SET_PAGE_DIRTY(mem
, FALSE
);
3469 if (mem
->vmp_dirty
|| mem
->vmp_precious
) {
3470 /* can't grab a dirty page; re-activate */
3471 // printf("SECLUDED: dirty page %p\n", mem);
3472 PAGE_WAKEUP_DONE(mem
);
3473 vm_page_secluded
.grab_failure_dirty
++;
3474 vm_object_unlock(object
);
3475 goto reactivate_secluded_page
;
3477 if (mem
->vmp_reference
) {
3478 /* it's been used but we do need to grab a page... */
3481 vm_page_unlock_queues();
3483 /* finish what vm_page_free() would have done... */
3484 vm_page_free_prepare_object(mem
, TRUE
);
3485 vm_object_unlock(object
);
3486 object
= VM_OBJECT_NULL
;
3487 if (vm_page_free_verify
) {
3488 ASSERT_PMAP_FREE(mem
);
3490 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
3491 vm_page_secluded
.grab_success_other
++;
3493 assert(mem
->vmp_busy
);
3494 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
3495 assert(VM_PAGE_OBJECT(mem
) == VM_OBJECT_NULL
);
3496 assert(mem
->vmp_pageq
.next
== 0);
3497 assert(mem
->vmp_pageq
.prev
== 0);
3498 assert(mem
->vmp_listq
.next
== 0);
3499 assert(mem
->vmp_listq
.prev
== 0);
3500 #if CONFIG_BACKGROUND_QUEUE
3501 assert(mem
->vmp_on_backgroundq
== 0);
3502 assert(mem
->vmp_backgroundq
.next
== 0);
3503 assert(mem
->vmp_backgroundq
.prev
== 0);
3504 #endif /* CONFIG_BACKGROUND_QUEUE */
3510 vm_page_secluded_drain(void)
3512 vm_page_t local_freeq
;
3514 uint64_t num_reclaimed
;
3515 unsigned int saved_secluded_count
, saved_secluded_target
;
3521 vm_page_lock_queues();
3523 saved_secluded_count
= vm_page_secluded_count
;
3524 saved_secluded_target
= vm_page_secluded_target
;
3525 vm_page_secluded_target
= 0;
3526 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3527 while (vm_page_secluded_count
) {
3528 vm_page_t secluded_page
;
3530 assert((vm_page_secluded_count_free
+
3531 vm_page_secluded_count_inuse
) ==
3532 vm_page_secluded_count
);
3533 secluded_page
= (vm_page_t
)vm_page_queue_first(&vm_page_queue_secluded
);
3534 assert(secluded_page
->vmp_q_state
== VM_PAGE_ON_SECLUDED_Q
);
3536 vm_page_queues_remove(secluded_page
, FALSE
);
3537 assert(!secluded_page
->vmp_fictitious
);
3538 assert(!VM_PAGE_WIRED(secluded_page
));
3540 if (secluded_page
->vmp_object
== 0) {
3541 /* transfer to free queue */
3542 assert(secluded_page
->vmp_busy
);
3543 secluded_page
->vmp_snext
= local_freeq
;
3544 local_freeq
= secluded_page
;
3547 /* transfer to head of active queue */
3548 vm_page_enqueue_active(secluded_page
, FALSE
);
3549 secluded_page
= VM_PAGE_NULL
;
3553 vm_page_secluded_target
= saved_secluded_target
;
3554 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3556 // printf("FBDP %s:%d secluded_count %d->%d, target %d, reclaimed %lld\n", __FUNCTION__, __LINE__, saved_secluded_count, vm_page_secluded_count, vm_page_secluded_target, num_reclaimed);
3558 vm_page_unlock_queues();
3561 vm_page_free_list(local_freeq
, TRUE
);
3566 return num_reclaimed
;
3568 #endif /* CONFIG_SECLUDED_MEMORY */
3572 vm_page_grab_diags()
3574 #if DEVELOPMENT || DEBUG
3575 task_t task
= current_task();
3580 ledger_credit(task
->ledger
, task_ledgers
.pages_grabbed
, 1);
3581 #endif /* DEVELOPMENT || DEBUG */
3587 * Return a page to the free list.
3593 boolean_t page_queues_locked
)
3596 int need_wakeup
= 0;
3597 int need_priv_wakeup
= 0;
3598 #if CONFIG_SECLUDED_MEMORY
3599 int need_secluded_wakeup
= 0;
3600 #endif /* CONFIG_SECLUDED_MEMORY */
3601 event_t wakeup_event
= NULL
;
3603 if (page_queues_locked
) {
3604 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3606 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
3609 assert(!mem
->vmp_private
&& !mem
->vmp_fictitious
);
3610 if (vm_page_free_verify
) {
3611 ASSERT_PMAP_FREE(mem
);
3613 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
3615 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
3617 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3619 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
3620 assert(mem
->vmp_busy
);
3621 assert(!mem
->vmp_laundry
);
3622 assert(mem
->vmp_object
== 0);
3623 assert(mem
->vmp_pageq
.next
== 0 && mem
->vmp_pageq
.prev
== 0);
3624 assert(mem
->vmp_listq
.next
== 0 && mem
->vmp_listq
.prev
== 0);
3625 #if CONFIG_BACKGROUND_QUEUE
3626 assert(mem
->vmp_backgroundq
.next
== 0 &&
3627 mem
->vmp_backgroundq
.prev
== 0 &&
3628 mem
->vmp_on_backgroundq
== FALSE
);
3630 if ((mem
->vmp_lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
3631 vm_lopage_free_count
< vm_lopage_free_limit
&&
3632 VM_PAGE_GET_PHYS_PAGE(mem
) < max_valid_low_ppnum
) {
3634 * this exists to support hardware controllers
3635 * incapable of generating DMAs with more than 32 bits
3636 * of address on platforms with physical memory > 4G...
3638 vm_page_queue_enter_first(&vm_lopage_queue_free
, mem
, vmp_pageq
);
3639 vm_lopage_free_count
++;
3641 if (vm_lopage_free_count
>= vm_lopage_free_limit
) {
3642 vm_lopage_refill
= FALSE
;
3645 mem
->vmp_q_state
= VM_PAGE_ON_FREE_LOPAGE_Q
;
3646 mem
->vmp_lopage
= TRUE
;
3647 #if CONFIG_SECLUDED_MEMORY
3648 } else if (vm_page_free_count
> vm_page_free_reserved
&&
3649 vm_page_secluded_count
< vm_page_secluded_target
&&
3650 num_tasks_can_use_secluded_mem
== 0) {
3652 * XXX FBDP TODO: also avoid refilling secluded queue
3653 * when some IOKit objects are already grabbing from it...
3655 if (!page_queues_locked
) {
3656 if (!vm_page_trylock_queues()) {
3657 /* take locks in right order */
3658 lck_mtx_unlock(&vm_page_queue_free_lock
);
3659 vm_page_lock_queues();
3660 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3663 mem
->vmp_lopage
= FALSE
;
3664 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3665 vm_page_queue_enter_first(&vm_page_queue_secluded
, mem
, vmp_pageq
);
3666 mem
->vmp_q_state
= VM_PAGE_ON_SECLUDED_Q
;
3667 vm_page_secluded_count
++;
3668 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3669 vm_page_secluded_count_free
++;
3670 if (!page_queues_locked
) {
3671 vm_page_unlock_queues();
3673 LCK_MTX_ASSERT(&vm_page_queue_free_lock
, LCK_MTX_ASSERT_OWNED
);
3674 if (vm_page_free_wanted_secluded
> 0) {
3675 vm_page_free_wanted_secluded
--;
3676 need_secluded_wakeup
= 1;
3678 #endif /* CONFIG_SECLUDED_MEMORY */
3680 mem
->vmp_lopage
= FALSE
;
3681 mem
->vmp_q_state
= VM_PAGE_ON_FREE_Q
;
3683 color
= VM_PAGE_GET_COLOR(mem
);
3684 #if defined(__x86_64__)
3685 vm_page_queue_enter_clump(&vm_page_queue_free
[color
].qhead
, mem
);
3687 vm_page_queue_enter(&vm_page_queue_free
[color
].qhead
, mem
, vmp_pageq
);
3689 vm_page_free_count
++;
3691 * Check if we should wake up someone waiting for page.
3692 * But don't bother waking them unless they can allocate.
3694 * We wakeup only one thread, to prevent starvation.
3695 * Because the scheduling system handles wait queues FIFO,
3696 * if we wakeup all waiting threads, one greedy thread
3697 * can starve multiple niceguy threads. When the threads
3698 * all wakeup, the greedy threads runs first, grabs the page,
3699 * and waits for another page. It will be the first to run
3700 * when the next page is freed.
3702 * However, there is a slight danger here.
3703 * The thread we wake might not use the free page.
3704 * Then the other threads could wait indefinitely
3705 * while the page goes unused. To forestall this,
3706 * the pageout daemon will keep making free pages
3707 * as long as vm_page_free_wanted is non-zero.
3710 assert(vm_page_free_count
> 0);
3711 if (vm_page_free_wanted_privileged
> 0) {
3712 vm_page_free_wanted_privileged
--;
3713 need_priv_wakeup
= 1;
3714 #if CONFIG_SECLUDED_MEMORY
3715 } else if (vm_page_free_wanted_secluded
> 0 &&
3716 vm_page_free_count
> vm_page_free_reserved
) {
3717 vm_page_free_wanted_secluded
--;
3718 need_secluded_wakeup
= 1;
3719 #endif /* CONFIG_SECLUDED_MEMORY */
3720 } else if (vm_page_free_wanted
> 0 &&
3721 vm_page_free_count
> vm_page_free_reserved
) {
3722 vm_page_free_wanted
--;
3726 vm_pageout_vminfo
.vm_page_pages_freed
++;
3728 VM_DEBUG_CONSTANT_EVENT(vm_page_release
, VM_PAGE_RELEASE
, DBG_FUNC_NONE
, 1, 0, 0, 0);
3730 lck_mtx_unlock(&vm_page_queue_free_lock
);
3732 if (need_priv_wakeup
) {
3733 wakeup_event
= &vm_page_free_wanted_privileged
;
3735 #if CONFIG_SECLUDED_MEMORY
3736 else if (need_secluded_wakeup
) {
3737 wakeup_event
= &vm_page_free_wanted_secluded
;
3739 #endif /* CONFIG_SECLUDED_MEMORY */
3740 else if (need_wakeup
) {
3741 wakeup_event
= &vm_page_free_count
;
3745 if (vps_dynamic_priority_enabled
== TRUE
) {
3746 thread_t thread_woken
= NULL
;
3747 wakeup_one_with_inheritor((event_t
) wakeup_event
, THREAD_AWAKENED
, LCK_WAKE_DO_NOT_TRANSFER_PUSH
, &thread_woken
);
3748 thread_deallocate(thread_woken
);
3750 thread_wakeup_one((event_t
) wakeup_event
);
3754 VM_CHECK_MEMORYSTATUS
;
3758 * This version of vm_page_release() is used only at startup
3759 * when we are single-threaded and pages are being released
3760 * for the first time. Hence, no locking or unnecessary checks are made.
3761 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3764 vm_page_release_startup(
3767 vm_page_queue_t queue_free
;
3769 if (vm_lopage_free_count
< vm_lopage_free_limit
&&
3770 VM_PAGE_GET_PHYS_PAGE(mem
) < max_valid_low_ppnum
) {
3771 mem
->vmp_lopage
= TRUE
;
3772 mem
->vmp_q_state
= VM_PAGE_ON_FREE_LOPAGE_Q
;
3773 vm_lopage_free_count
++;
3774 queue_free
= &vm_lopage_queue_free
;
3775 #if CONFIG_SECLUDED_MEMORY
3776 } else if (vm_page_secluded_count
< vm_page_secluded_target
) {
3777 mem
->vmp_lopage
= FALSE
;
3778 mem
->vmp_q_state
= VM_PAGE_ON_SECLUDED_Q
;
3779 vm_page_secluded_count
++;
3780 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3781 vm_page_secluded_count_free
++;
3782 queue_free
= &vm_page_queue_secluded
;
3783 #endif /* CONFIG_SECLUDED_MEMORY */
3785 mem
->vmp_lopage
= FALSE
;
3786 mem
->vmp_q_state
= VM_PAGE_ON_FREE_Q
;
3787 vm_page_free_count
++;
3788 queue_free
= &vm_page_queue_free
[VM_PAGE_GET_COLOR(mem
)].qhead
;
3790 if (mem
->vmp_q_state
== VM_PAGE_ON_FREE_Q
) {
3791 #if defined(__x86_64__)
3792 vm_page_queue_enter_clump(queue_free
, mem
);
3794 vm_page_queue_enter(queue_free
, mem
, vmp_pageq
);
3797 vm_page_queue_enter_first(queue_free
, mem
, vmp_pageq
);
3804 * Wait for a page to become available.
3805 * If there are plenty of free pages, then we don't sleep.
3808 * TRUE: There may be another page, try again
3809 * FALSE: We were interrupted out of our wait, don't try again
3817 * We can't use vm_page_free_reserved to make this
3818 * determination. Consider: some thread might
3819 * need to allocate two pages. The first allocation
3820 * succeeds, the second fails. After the first page is freed,
3821 * a call to vm_page_wait must really block.
3823 kern_return_t wait_result
;
3824 int need_wakeup
= 0;
3825 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
3826 event_t wait_event
= NULL
;
3828 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3830 if (is_privileged
&& vm_page_free_count
) {
3831 lck_mtx_unlock(&vm_page_queue_free_lock
);
3835 if (vm_page_free_count
>= vm_page_free_target
) {
3836 lck_mtx_unlock(&vm_page_queue_free_lock
);
3840 if (is_privileged
) {
3841 if (vm_page_free_wanted_privileged
++ == 0) {
3844 wait_event
= (event_t
)&vm_page_free_wanted_privileged
;
3845 #if CONFIG_SECLUDED_MEMORY
3846 } else if (secluded_for_apps
&&
3847 task_can_use_secluded_mem(current_task(), FALSE
)) {
3849 /* XXX FBDP: need pageq lock for this... */
3850 /* XXX FBDP: might wait even if pages available, */
3851 /* XXX FBDP: hopefully not for too long... */
3852 if (vm_page_secluded_count
> 0) {
3853 lck_mtx_unlock(&vm_page_queue_free_lock
);
3857 if (vm_page_free_wanted_secluded
++ == 0) {
3860 wait_event
= (event_t
)&vm_page_free_wanted_secluded
;
3861 #endif /* CONFIG_SECLUDED_MEMORY */
3863 if (vm_page_free_wanted
++ == 0) {
3866 wait_event
= (event_t
)&vm_page_free_count
;
3870 * We don't do a vm_pageout_scan wakeup if we already have
3871 * some waiters because vm_pageout_scan checks for waiters
3872 * before it returns and does so behind the vm_page_queue_free_lock,
3873 * which we own when we bump the waiter counts.
3876 if (vps_dynamic_priority_enabled
== TRUE
) {
3878 * We are waking up vm_pageout_scan here. If it needs
3879 * the vm_page_queue_free_lock before we unlock it
3880 * we'll end up just blocking and incur an extra
3881 * context switch. Could be a perf. issue.
3885 thread_wakeup((event_t
)&vm_page_free_wanted
);
3889 * LD: This event is going to get recorded every time because
3890 * we don't get back THREAD_WAITING from lck_mtx_sleep_with_inheritor.
3891 * We just block in that routine.
3893 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_START
,
3894 vm_page_free_wanted_privileged
,
3895 vm_page_free_wanted
,
3896 #if CONFIG_SECLUDED_MEMORY
3897 vm_page_free_wanted_secluded
,
3898 #else /* CONFIG_SECLUDED_MEMORY */
3900 #endif /* CONFIG_SECLUDED_MEMORY */
3902 wait_result
= lck_mtx_sleep_with_inheritor(&vm_page_queue_free_lock
,
3905 vm_pageout_scan_thread
,
3909 wait_result
= assert_wait(wait_event
, interruptible
);
3911 lck_mtx_unlock(&vm_page_queue_free_lock
);
3914 thread_wakeup((event_t
)&vm_page_free_wanted
);
3917 if (wait_result
== THREAD_WAITING
) {
3918 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_START
,
3919 vm_page_free_wanted_privileged
,
3920 vm_page_free_wanted
,
3921 #if CONFIG_SECLUDED_MEMORY
3922 vm_page_free_wanted_secluded
,
3923 #else /* CONFIG_SECLUDED_MEMORY */
3925 #endif /* CONFIG_SECLUDED_MEMORY */
3927 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3928 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block
,
3929 VM_PAGE_WAIT_BLOCK
, DBG_FUNC_END
, 0, 0, 0, 0);
3933 return (wait_result
== THREAD_AWAKENED
) || (wait_result
== THREAD_NOT_WAITING
);
3939 * Allocate and return a memory cell associated
3940 * with this VM object/offset pair.
3942 * Object must be locked.
3948 vm_object_offset_t offset
)
3953 vm_object_lock_assert_exclusive(object
);
3955 #if CONFIG_SECLUDED_MEMORY
3956 if (object
->can_grab_secluded
) {
3957 grab_options
|= VM_PAGE_GRAB_SECLUDED
;
3959 #endif /* CONFIG_SECLUDED_MEMORY */
3960 mem
= vm_page_grab_options(grab_options
);
3961 if (mem
== VM_PAGE_NULL
) {
3962 return VM_PAGE_NULL
;
3965 vm_page_insert(mem
, object
, offset
);
3971 * vm_page_free_prepare:
3973 * Removes page from any queue it may be on
3974 * and disassociates it from its VM object.
3976 * Object and page queues must be locked prior to entry.
3979 vm_page_free_prepare(
3982 vm_page_free_prepare_queues(mem
);
3983 vm_page_free_prepare_object(mem
, TRUE
);
3988 vm_page_free_prepare_queues(
3991 vm_object_t m_object
;
3995 assert(mem
->vmp_q_state
!= VM_PAGE_ON_FREE_Q
);
3996 assert(!mem
->vmp_cleaning
);
3997 m_object
= VM_PAGE_OBJECT(mem
);
3999 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4001 vm_object_lock_assert_exclusive(m_object
);
4003 if (mem
->vmp_laundry
) {
4005 * We may have to free a page while it's being laundered
4006 * if we lost its pager (due to a forced unmount, for example).
4007 * We need to call vm_pageout_steal_laundry() before removing
4008 * the page from its VM object, so that we can remove it
4009 * from its pageout queue and adjust the laundry accounting
4011 vm_pageout_steal_laundry(mem
, TRUE
);
4014 vm_page_queues_remove(mem
, TRUE
);
4016 if (VM_PAGE_WIRED(mem
)) {
4017 assert(mem
->vmp_wire_count
> 0);
4020 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object
);
4021 VM_OBJECT_WIRED_PAGE_REMOVE(m_object
, mem
);
4022 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object
, m_object
->wire_tag
);
4024 assert(m_object
->resident_page_count
>=
4025 m_object
->wired_page_count
);
4027 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
) {
4028 OSAddAtomic(+1, &vm_page_purgeable_count
);
4029 assert(vm_page_purgeable_wired_count
> 0);
4030 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
4032 if ((m_object
->purgable
== VM_PURGABLE_VOLATILE
||
4033 m_object
->purgable
== VM_PURGABLE_EMPTY
) &&
4034 m_object
->vo_owner
!= TASK_NULL
) {
4036 int ledger_idx_volatile
;
4037 int ledger_idx_nonvolatile
;
4038 int ledger_idx_volatile_compressed
;
4039 int ledger_idx_nonvolatile_compressed
;
4040 boolean_t do_footprint
;
4042 owner
= VM_OBJECT_OWNER(m_object
);
4043 vm_object_ledger_tag_ledgers(
4045 &ledger_idx_volatile
,
4046 &ledger_idx_nonvolatile
,
4047 &ledger_idx_volatile_compressed
,
4048 &ledger_idx_nonvolatile_compressed
,
4051 * While wired, this page was accounted
4052 * as "non-volatile" but it should now
4053 * be accounted as "volatile".
4055 /* one less "non-volatile"... */
4056 ledger_debit(owner
->ledger
,
4057 ledger_idx_nonvolatile
,
4060 /* ... and "phys_footprint" */
4061 ledger_debit(owner
->ledger
,
4062 task_ledgers
.phys_footprint
,
4065 /* one more "volatile" */
4066 ledger_credit(owner
->ledger
,
4067 ledger_idx_volatile
,
4071 if (!mem
->vmp_private
&& !mem
->vmp_fictitious
) {
4072 vm_page_wire_count
--;
4075 mem
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
4076 mem
->vmp_wire_count
= 0;
4077 assert(!mem
->vmp_gobbled
);
4078 } else if (mem
->vmp_gobbled
) {
4079 if (!mem
->vmp_private
&& !mem
->vmp_fictitious
) {
4080 vm_page_wire_count
--;
4082 vm_page_gobble_count
--;
4088 vm_page_free_prepare_object(
4090 boolean_t remove_from_hash
)
4092 if (mem
->vmp_tabled
) {
4093 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
4095 PAGE_WAKEUP(mem
); /* clears wanted */
4097 if (mem
->vmp_private
) {
4098 mem
->vmp_private
= FALSE
;
4099 mem
->vmp_fictitious
= TRUE
;
4100 VM_PAGE_SET_PHYS_PAGE(mem
, vm_page_fictitious_addr
);
4102 if (!mem
->vmp_fictitious
) {
4103 assert(mem
->vmp_pageq
.next
== 0);
4104 assert(mem
->vmp_pageq
.prev
== 0);
4105 assert(mem
->vmp_listq
.next
== 0);
4106 assert(mem
->vmp_listq
.prev
== 0);
4107 #if CONFIG_BACKGROUND_QUEUE
4108 assert(mem
->vmp_backgroundq
.next
== 0);
4109 assert(mem
->vmp_backgroundq
.prev
== 0);
4110 #endif /* CONFIG_BACKGROUND_QUEUE */
4111 assert(mem
->vmp_next_m
== 0);
4112 ASSERT_PMAP_FREE(mem
);
4113 vm_page_init(mem
, VM_PAGE_GET_PHYS_PAGE(mem
), mem
->vmp_lopage
);
4121 * Returns the given page to the free list,
4122 * disassociating it with any VM object.
4124 * Object and page queues must be locked prior to entry.
4130 vm_page_free_prepare(mem
);
4132 if (mem
->vmp_fictitious
) {
4133 vm_page_release_fictitious(mem
);
4135 vm_page_release(mem
,
4136 TRUE
); /* page queues are locked */
4142 vm_page_free_unlocked(
4144 boolean_t remove_from_hash
)
4146 vm_page_lockspin_queues();
4147 vm_page_free_prepare_queues(mem
);
4148 vm_page_unlock_queues();
4150 vm_page_free_prepare_object(mem
, remove_from_hash
);
4152 if (mem
->vmp_fictitious
) {
4153 vm_page_release_fictitious(mem
);
4155 vm_page_release(mem
, FALSE
); /* page queues are not locked */
4161 * Free a list of pages. The list can be up to several hundred pages,
4162 * as blocked up by vm_pageout_scan().
4163 * The big win is not having to take the free list lock once
4166 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
4167 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
4172 boolean_t prepare_object
)
4176 vm_page_t local_freeq
;
4179 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
4180 LCK_MTX_ASSERT(&vm_page_queue_free_lock
, LCK_MTX_ASSERT_NOTOWNED
);
4184 local_freeq
= VM_PAGE_NULL
;
4188 * break up the processing into smaller chunks so
4189 * that we can 'pipeline' the pages onto the
4190 * free list w/o introducing too much
4191 * contention on the global free queue lock
4193 while (mem
&& pg_count
< 64) {
4194 assert((mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
) ||
4195 (mem
->vmp_q_state
== VM_PAGE_IS_WIRED
));
4196 #if CONFIG_BACKGROUND_QUEUE
4197 assert(mem
->vmp_backgroundq
.next
== 0 &&
4198 mem
->vmp_backgroundq
.prev
== 0 &&
4199 mem
->vmp_on_backgroundq
== FALSE
);
4201 nxt
= mem
->vmp_snext
;
4202 mem
->vmp_snext
= NULL
;
4203 assert(mem
->vmp_pageq
.prev
== 0);
4205 if (vm_page_free_verify
&& !mem
->vmp_fictitious
&& !mem
->vmp_private
) {
4206 ASSERT_PMAP_FREE(mem
);
4208 if (prepare_object
== TRUE
) {
4209 vm_page_free_prepare_object(mem
, TRUE
);
4212 if (!mem
->vmp_fictitious
) {
4213 assert(mem
->vmp_busy
);
4215 if ((mem
->vmp_lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
4216 vm_lopage_free_count
< vm_lopage_free_limit
&&
4217 VM_PAGE_GET_PHYS_PAGE(mem
) < max_valid_low_ppnum
) {
4218 vm_page_release(mem
, FALSE
); /* page queues are not locked */
4219 #if CONFIG_SECLUDED_MEMORY
4220 } else if (vm_page_secluded_count
< vm_page_secluded_target
&&
4221 num_tasks_can_use_secluded_mem
== 0) {
4222 vm_page_release(mem
,
4223 FALSE
); /* page queues are not locked */
4224 #endif /* CONFIG_SECLUDED_MEMORY */
4227 * IMPORTANT: we can't set the page "free" here
4228 * because that would make the page eligible for
4229 * a physically-contiguous allocation (see
4230 * vm_page_find_contiguous()) right away (we don't
4231 * hold the vm_page_queue_free lock). That would
4232 * cause trouble because the page is not actually
4233 * in the free queue yet...
4235 mem
->vmp_snext
= local_freeq
;
4239 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
4242 assert(VM_PAGE_GET_PHYS_PAGE(mem
) == vm_page_fictitious_addr
||
4243 VM_PAGE_GET_PHYS_PAGE(mem
) == vm_page_guard_addr
);
4244 vm_page_release_fictitious(mem
);
4250 if ((mem
= local_freeq
)) {
4251 unsigned int avail_free_count
;
4252 unsigned int need_wakeup
= 0;
4253 unsigned int need_priv_wakeup
= 0;
4254 #if CONFIG_SECLUDED_MEMORY
4255 unsigned int need_wakeup_secluded
= 0;
4256 #endif /* CONFIG_SECLUDED_MEMORY */
4257 event_t priv_wakeup_event
, secluded_wakeup_event
, normal_wakeup_event
;
4258 boolean_t priv_wakeup_all
, secluded_wakeup_all
, normal_wakeup_all
;
4260 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
4265 nxt
= mem
->vmp_snext
;
4267 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
4268 assert(mem
->vmp_busy
);
4269 mem
->vmp_lopage
= FALSE
;
4270 mem
->vmp_q_state
= VM_PAGE_ON_FREE_Q
;
4272 color
= VM_PAGE_GET_COLOR(mem
);
4273 #if defined(__x86_64__)
4274 vm_page_queue_enter_clump(&vm_page_queue_free
[color
].qhead
, mem
);
4276 vm_page_queue_enter(&vm_page_queue_free
[color
].qhead
,
4281 vm_pageout_vminfo
.vm_page_pages_freed
+= pg_count
;
4282 vm_page_free_count
+= pg_count
;
4283 avail_free_count
= vm_page_free_count
;
4285 VM_DEBUG_CONSTANT_EVENT(vm_page_release
, VM_PAGE_RELEASE
, DBG_FUNC_NONE
, pg_count
, 0, 0, 0);
4287 if (vm_page_free_wanted_privileged
> 0 && avail_free_count
> 0) {
4288 if (avail_free_count
< vm_page_free_wanted_privileged
) {
4289 need_priv_wakeup
= avail_free_count
;
4290 vm_page_free_wanted_privileged
-= avail_free_count
;
4291 avail_free_count
= 0;
4293 need_priv_wakeup
= vm_page_free_wanted_privileged
;
4294 avail_free_count
-= vm_page_free_wanted_privileged
;
4295 vm_page_free_wanted_privileged
= 0;
4298 #if CONFIG_SECLUDED_MEMORY
4299 if (vm_page_free_wanted_secluded
> 0 &&
4300 avail_free_count
> vm_page_free_reserved
) {
4301 unsigned int available_pages
;
4302 available_pages
= (avail_free_count
-
4303 vm_page_free_reserved
);
4304 if (available_pages
<
4305 vm_page_free_wanted_secluded
) {
4306 need_wakeup_secluded
= available_pages
;
4307 vm_page_free_wanted_secluded
-=
4309 avail_free_count
-= available_pages
;
4311 need_wakeup_secluded
=
4312 vm_page_free_wanted_secluded
;
4314 vm_page_free_wanted_secluded
;
4315 vm_page_free_wanted_secluded
= 0;
4318 #endif /* CONFIG_SECLUDED_MEMORY */
4319 if (vm_page_free_wanted
> 0 && avail_free_count
> vm_page_free_reserved
) {
4320 unsigned int available_pages
;
4322 available_pages
= avail_free_count
- vm_page_free_reserved
;
4324 if (available_pages
>= vm_page_free_wanted
) {
4325 need_wakeup
= vm_page_free_wanted
;
4326 vm_page_free_wanted
= 0;
4328 need_wakeup
= available_pages
;
4329 vm_page_free_wanted
-= available_pages
;
4332 lck_mtx_unlock(&vm_page_queue_free_lock
);
4334 priv_wakeup_event
= NULL
;
4335 secluded_wakeup_event
= NULL
;
4336 normal_wakeup_event
= NULL
;
4338 priv_wakeup_all
= FALSE
;
4339 secluded_wakeup_all
= FALSE
;
4340 normal_wakeup_all
= FALSE
;
4343 if (need_priv_wakeup
!= 0) {
4345 * There shouldn't be that many VM-privileged threads,
4346 * so let's wake them all up, even if we don't quite
4347 * have enough pages to satisfy them all.
4349 priv_wakeup_event
= (event_t
)&vm_page_free_wanted_privileged
;
4350 priv_wakeup_all
= TRUE
;
4352 #if CONFIG_SECLUDED_MEMORY
4353 if (need_wakeup_secluded
!= 0 &&
4354 vm_page_free_wanted_secluded
== 0) {
4355 secluded_wakeup_event
= (event_t
)&vm_page_free_wanted_secluded
;
4356 secluded_wakeup_all
= TRUE
;
4357 need_wakeup_secluded
= 0;
4359 secluded_wakeup_event
= (event_t
)&vm_page_free_wanted_secluded
;
4361 #endif /* CONFIG_SECLUDED_MEMORY */
4362 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
4364 * We don't expect to have any more waiters
4365 * after this, so let's wake them all up at
4368 normal_wakeup_event
= (event_t
) &vm_page_free_count
;
4369 normal_wakeup_all
= TRUE
;
4372 normal_wakeup_event
= (event_t
) &vm_page_free_count
;
4375 if (priv_wakeup_event
||
4376 #if CONFIG_SECLUDED_MEMORY
4377 secluded_wakeup_event
||
4378 #endif /* CONFIG_SECLUDED_MEMORY */
4379 normal_wakeup_event
) {
4380 if (vps_dynamic_priority_enabled
== TRUE
) {
4381 thread_t thread_woken
= NULL
;
4383 if (priv_wakeup_all
== TRUE
) {
4384 wakeup_all_with_inheritor(priv_wakeup_event
, THREAD_AWAKENED
);
4387 #if CONFIG_SECLUDED_MEMORY
4388 if (secluded_wakeup_all
== TRUE
) {
4389 wakeup_all_with_inheritor(secluded_wakeup_event
, THREAD_AWAKENED
);
4392 while (need_wakeup_secluded
-- != 0) {
4394 * Wake up one waiter per page we just released.
4396 wakeup_one_with_inheritor(secluded_wakeup_event
, THREAD_AWAKENED
, LCK_WAKE_DO_NOT_TRANSFER_PUSH
, &thread_woken
);
4397 thread_deallocate(thread_woken
);
4399 #endif /* CONFIG_SECLUDED_MEMORY */
4401 if (normal_wakeup_all
== TRUE
) {
4402 wakeup_all_with_inheritor(normal_wakeup_event
, THREAD_AWAKENED
);
4405 while (need_wakeup
-- != 0) {
4407 * Wake up one waiter per page we just released.
4409 wakeup_one_with_inheritor(normal_wakeup_event
, THREAD_AWAKENED
, LCK_WAKE_DO_NOT_TRANSFER_PUSH
, &thread_woken
);
4410 thread_deallocate(thread_woken
);
4414 * Non-priority-aware wakeups.
4417 if (priv_wakeup_all
== TRUE
) {
4418 thread_wakeup(priv_wakeup_event
);
4421 #if CONFIG_SECLUDED_MEMORY
4422 if (secluded_wakeup_all
== TRUE
) {
4423 thread_wakeup(secluded_wakeup_event
);
4426 while (need_wakeup_secluded
-- != 0) {
4428 * Wake up one waiter per page we just released.
4430 thread_wakeup_one(secluded_wakeup_event
);
4433 #endif /* CONFIG_SECLUDED_MEMORY */
4434 if (normal_wakeup_all
== TRUE
) {
4435 thread_wakeup(normal_wakeup_event
);
4438 while (need_wakeup
-- != 0) {
4440 * Wake up one waiter per page we just released.
4442 thread_wakeup_one(normal_wakeup_event
);
4447 VM_CHECK_MEMORYSTATUS
;
4456 * Mark this page as wired down by yet
4457 * another map, removing it from paging queues
4460 * The page's object and the page queues must be locked.
4468 boolean_t check_memorystatus
)
4470 vm_object_t m_object
;
4472 m_object
= VM_PAGE_OBJECT(mem
);
4474 // dbgLog(current_thread(), mem->vmp_offset, m_object, 1); /* (TEST/DEBUG) */
4478 vm_object_lock_assert_exclusive(m_object
);
4481 * In theory, the page should be in an object before it
4482 * gets wired, since we need to hold the object lock
4483 * to update some fields in the page structure.
4484 * However, some code (i386 pmap, for example) might want
4485 * to wire a page before it gets inserted into an object.
4486 * That's somewhat OK, as long as nobody else can get to
4487 * that page and update it at the same time.
4490 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4491 if (!VM_PAGE_WIRED(mem
)) {
4492 if (mem
->vmp_laundry
) {
4493 vm_pageout_steal_laundry(mem
, TRUE
);
4496 vm_page_queues_remove(mem
, TRUE
);
4498 assert(mem
->vmp_wire_count
== 0);
4499 mem
->vmp_q_state
= VM_PAGE_IS_WIRED
;
4502 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object
);
4503 VM_OBJECT_WIRED_PAGE_ADD(m_object
, mem
);
4504 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object
, tag
);
4506 assert(m_object
->resident_page_count
>=
4507 m_object
->wired_page_count
);
4508 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
) {
4509 assert(vm_page_purgeable_count
> 0);
4510 OSAddAtomic(-1, &vm_page_purgeable_count
);
4511 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
4513 if ((m_object
->purgable
== VM_PURGABLE_VOLATILE
||
4514 m_object
->purgable
== VM_PURGABLE_EMPTY
) &&
4515 m_object
->vo_owner
!= TASK_NULL
) {
4517 int ledger_idx_volatile
;
4518 int ledger_idx_nonvolatile
;
4519 int ledger_idx_volatile_compressed
;
4520 int ledger_idx_nonvolatile_compressed
;
4521 boolean_t do_footprint
;
4523 owner
= VM_OBJECT_OWNER(m_object
);
4524 vm_object_ledger_tag_ledgers(
4526 &ledger_idx_volatile
,
4527 &ledger_idx_nonvolatile
,
4528 &ledger_idx_volatile_compressed
,
4529 &ledger_idx_nonvolatile_compressed
,
4531 /* less volatile bytes */
4532 ledger_debit(owner
->ledger
,
4533 ledger_idx_volatile
,
4535 /* more not-quite-volatile bytes */
4536 ledger_credit(owner
->ledger
,
4537 ledger_idx_nonvolatile
,
4540 /* more footprint */
4541 ledger_credit(owner
->ledger
,
4542 task_ledgers
.phys_footprint
,
4546 if (m_object
->all_reusable
) {
4548 * Wired pages are not counted as "re-usable"
4549 * in "all_reusable" VM objects, so nothing
4552 } else if (mem
->vmp_reusable
) {
4554 * This page is not "re-usable" when it's
4555 * wired, so adjust its state and the
4558 vm_object_reuse_pages(m_object
,
4560 mem
->vmp_offset
+ PAGE_SIZE_64
,
4564 assert(!mem
->vmp_reusable
);
4566 if (!mem
->vmp_private
&& !mem
->vmp_fictitious
&& !mem
->vmp_gobbled
) {
4567 vm_page_wire_count
++;
4569 if (mem
->vmp_gobbled
) {
4570 vm_page_gobble_count
--;
4572 mem
->vmp_gobbled
= FALSE
;
4574 if (check_memorystatus
== TRUE
) {
4575 VM_CHECK_MEMORYSTATUS
;
4578 assert(!mem
->vmp_gobbled
);
4579 assert(mem
->vmp_q_state
== VM_PAGE_IS_WIRED
);
4580 mem
->vmp_wire_count
++;
4581 if (__improbable(mem
->vmp_wire_count
== 0)) {
4582 panic("vm_page_wire(%p): wire_count overflow", mem
);
4590 * Release one wiring of this page, potentially
4591 * enabling it to be paged again.
4593 * The page's object and the page queues must be locked.
4600 vm_object_t m_object
;
4602 m_object
= VM_PAGE_OBJECT(mem
);
4604 // dbgLog(current_thread(), mem->vmp_offset, m_object, 0); /* (TEST/DEBUG) */
4607 assert(VM_PAGE_WIRED(mem
));
4608 assert(mem
->vmp_wire_count
> 0);
4609 assert(!mem
->vmp_gobbled
);
4610 assert(m_object
!= VM_OBJECT_NULL
);
4611 vm_object_lock_assert_exclusive(m_object
);
4612 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4613 if (--mem
->vmp_wire_count
== 0) {
4614 mem
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
4616 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object
);
4617 VM_OBJECT_WIRED_PAGE_REMOVE(m_object
, mem
);
4618 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object
, m_object
->wire_tag
);
4619 if (!mem
->vmp_private
&& !mem
->vmp_fictitious
) {
4620 vm_page_wire_count
--;
4623 assert(m_object
->resident_page_count
>=
4624 m_object
->wired_page_count
);
4625 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
) {
4626 OSAddAtomic(+1, &vm_page_purgeable_count
);
4627 assert(vm_page_purgeable_wired_count
> 0);
4628 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
4630 if ((m_object
->purgable
== VM_PURGABLE_VOLATILE
||
4631 m_object
->purgable
== VM_PURGABLE_EMPTY
) &&
4632 m_object
->vo_owner
!= TASK_NULL
) {
4634 int ledger_idx_volatile
;
4635 int ledger_idx_nonvolatile
;
4636 int ledger_idx_volatile_compressed
;
4637 int ledger_idx_nonvolatile_compressed
;
4638 boolean_t do_footprint
;
4640 owner
= VM_OBJECT_OWNER(m_object
);
4641 vm_object_ledger_tag_ledgers(
4643 &ledger_idx_volatile
,
4644 &ledger_idx_nonvolatile
,
4645 &ledger_idx_volatile_compressed
,
4646 &ledger_idx_nonvolatile_compressed
,
4648 /* more volatile bytes */
4649 ledger_credit(owner
->ledger
,
4650 ledger_idx_volatile
,
4652 /* less not-quite-volatile bytes */
4653 ledger_debit(owner
->ledger
,
4654 ledger_idx_nonvolatile
,
4657 /* less footprint */
4658 ledger_debit(owner
->ledger
,
4659 task_ledgers
.phys_footprint
,
4663 assert(m_object
!= kernel_object
);
4664 assert(mem
->vmp_pageq
.next
== 0 && mem
->vmp_pageq
.prev
== 0);
4666 if (queueit
== TRUE
) {
4667 if (m_object
->purgable
== VM_PURGABLE_EMPTY
) {
4668 vm_page_deactivate(mem
);
4670 vm_page_activate(mem
);
4674 VM_CHECK_MEMORYSTATUS
;
4680 * vm_page_deactivate:
4682 * Returns the given page to the inactive list,
4683 * indicating that no physical maps have access
4684 * to this page. [Used by the physical mapping system.]
4686 * The page queues must be locked.
4692 vm_page_deactivate_internal(m
, TRUE
);
4697 vm_page_deactivate_internal(
4699 boolean_t clear_hw_reference
)
4701 vm_object_t m_object
;
4703 m_object
= VM_PAGE_OBJECT(m
);
4706 assert(m_object
!= kernel_object
);
4707 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4709 // dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4710 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4712 * This page is no longer very interesting. If it was
4713 * interesting (active or inactive/referenced), then we
4714 * clear the reference bit and (re)enter it in the
4715 * inactive queue. Note wired pages should not have
4716 * their reference bit cleared.
4718 assert( !(m
->vmp_absent
&& !m
->vmp_unusual
));
4720 if (m
->vmp_gobbled
) { /* can this happen? */
4721 assert( !VM_PAGE_WIRED(m
));
4723 if (!m
->vmp_private
&& !m
->vmp_fictitious
) {
4724 vm_page_wire_count
--;
4726 vm_page_gobble_count
--;
4727 m
->vmp_gobbled
= FALSE
;
4730 * if this page is currently on the pageout queue, we can't do the
4731 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4732 * and we can't remove it manually since we would need the object lock
4733 * (which is not required here) to decrement the activity_in_progress
4734 * reference which is held on the object while the page is in the pageout queue...
4735 * just let the normal laundry processing proceed
4737 if (m
->vmp_laundry
|| m
->vmp_private
|| m
->vmp_fictitious
||
4738 (m
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
4739 (m
->vmp_q_state
== VM_PAGE_ON_PAGEOUT_Q
) ||
4743 if (!m
->vmp_absent
&& clear_hw_reference
== TRUE
) {
4744 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m
));
4747 m
->vmp_reference
= FALSE
;
4748 m
->vmp_no_cache
= FALSE
;
4750 if (!VM_PAGE_INACTIVE(m
)) {
4751 vm_page_queues_remove(m
, FALSE
);
4753 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4754 m
->vmp_dirty
&& m_object
->internal
&&
4755 (m_object
->purgable
== VM_PURGABLE_DENY
||
4756 m_object
->purgable
== VM_PURGABLE_NONVOLATILE
||
4757 m_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
4758 vm_page_check_pageable_safe(m
);
4759 vm_page_queue_enter(&vm_page_queue_throttled
, m
, vmp_pageq
);
4760 m
->vmp_q_state
= VM_PAGE_ON_THROTTLED_Q
;
4761 vm_page_throttled_count
++;
4763 if (m_object
->named
&& m_object
->ref_count
== 1) {
4764 vm_page_speculate(m
, FALSE
);
4765 #if DEVELOPMENT || DEBUG
4766 vm_page_speculative_recreated
++;
4769 vm_page_enqueue_inactive(m
, FALSE
);
4776 * vm_page_enqueue_cleaned
4778 * Put the page on the cleaned queue, mark it cleaned, etc.
4779 * Being on the cleaned queue (and having m->clean_queue set)
4780 * does ** NOT ** guarantee that the page is clean!
4782 * Call with the queues lock held.
4786 vm_page_enqueue_cleaned(vm_page_t m
)
4788 vm_object_t m_object
;
4790 m_object
= VM_PAGE_OBJECT(m
);
4792 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4793 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4794 assert( !(m
->vmp_absent
&& !m
->vmp_unusual
));
4796 if (VM_PAGE_WIRED(m
)) {
4800 if (m
->vmp_gobbled
) {
4801 if (!m
->vmp_private
&& !m
->vmp_fictitious
) {
4802 vm_page_wire_count
--;
4804 vm_page_gobble_count
--;
4805 m
->vmp_gobbled
= FALSE
;
4808 * if this page is currently on the pageout queue, we can't do the
4809 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4810 * and we can't remove it manually since we would need the object lock
4811 * (which is not required here) to decrement the activity_in_progress
4812 * reference which is held on the object while the page is in the pageout queue...
4813 * just let the normal laundry processing proceed
4815 if (m
->vmp_laundry
|| m
->vmp_private
|| m
->vmp_fictitious
||
4816 (m
->vmp_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
) ||
4817 (m
->vmp_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
4820 vm_page_queues_remove(m
, FALSE
);
4822 vm_page_check_pageable_safe(m
);
4823 vm_page_queue_enter(&vm_page_queue_cleaned
, m
, vmp_pageq
);
4824 m
->vmp_q_state
= VM_PAGE_ON_INACTIVE_CLEANED_Q
;
4825 vm_page_cleaned_count
++;
4827 vm_page_inactive_count
++;
4828 if (m_object
->internal
) {
4829 vm_page_pageable_internal_count
++;
4831 vm_page_pageable_external_count
++;
4833 #if CONFIG_BACKGROUND_QUEUE
4834 if (m
->vmp_in_background
) {
4835 vm_page_add_to_backgroundq(m
, TRUE
);
4838 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned
, 1);
4844 * Put the specified page on the active list (if appropriate).
4846 * The page queues must be locked.
4853 vm_object_t m_object
;
4855 m_object
= VM_PAGE_OBJECT(m
);
4858 #ifdef FIXME_4778297
4859 assert(m_object
!= kernel_object
);
4861 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4862 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4863 assert( !(m
->vmp_absent
&& !m
->vmp_unusual
));
4865 if (m
->vmp_gobbled
) {
4866 assert( !VM_PAGE_WIRED(m
));
4867 if (!m
->vmp_private
&& !m
->vmp_fictitious
) {
4868 vm_page_wire_count
--;
4870 vm_page_gobble_count
--;
4871 m
->vmp_gobbled
= FALSE
;
4874 * if this page is currently on the pageout queue, we can't do the
4875 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4876 * and we can't remove it manually since we would need the object lock
4877 * (which is not required here) to decrement the activity_in_progress
4878 * reference which is held on the object while the page is in the pageout queue...
4879 * just let the normal laundry processing proceed
4881 if (m
->vmp_laundry
|| m
->vmp_private
|| m
->vmp_fictitious
||
4882 (m
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
4883 (m
->vmp_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
4888 if (m
->vmp_q_state
== VM_PAGE_ON_ACTIVE_Q
) {
4889 panic("vm_page_activate: already active");
4893 if (m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) {
4894 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
4895 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
4898 vm_page_queues_remove(m
, FALSE
);
4900 if (!VM_PAGE_WIRED(m
)) {
4901 vm_page_check_pageable_safe(m
);
4902 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4903 m
->vmp_dirty
&& m_object
->internal
&&
4904 (m_object
->purgable
== VM_PURGABLE_DENY
||
4905 m_object
->purgable
== VM_PURGABLE_NONVOLATILE
||
4906 m_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
4907 vm_page_queue_enter(&vm_page_queue_throttled
, m
, vmp_pageq
);
4908 m
->vmp_q_state
= VM_PAGE_ON_THROTTLED_Q
;
4909 vm_page_throttled_count
++;
4911 #if CONFIG_SECLUDED_MEMORY
4912 if (secluded_for_filecache
&&
4913 vm_page_secluded_target
!= 0 &&
4914 num_tasks_can_use_secluded_mem
== 0 &&
4915 m_object
->eligible_for_secluded
) {
4916 vm_page_queue_enter(&vm_page_queue_secluded
, m
, vmp_pageq
);
4917 m
->vmp_q_state
= VM_PAGE_ON_SECLUDED_Q
;
4918 vm_page_secluded_count
++;
4919 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
4920 vm_page_secluded_count_inuse
++;
4921 assert(!m_object
->internal
);
4922 // vm_page_pageable_external_count++;
4924 #endif /* CONFIG_SECLUDED_MEMORY */
4925 vm_page_enqueue_active(m
, FALSE
);
4927 m
->vmp_reference
= TRUE
;
4928 m
->vmp_no_cache
= FALSE
;
4935 * vm_page_speculate:
4937 * Put the specified page on the speculative list (if appropriate).
4939 * The page queues must be locked.
4946 struct vm_speculative_age_q
*aq
;
4947 vm_object_t m_object
;
4949 m_object
= VM_PAGE_OBJECT(m
);
4952 vm_page_check_pageable_safe(m
);
4954 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4955 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4956 assert( !(m
->vmp_absent
&& !m
->vmp_unusual
));
4957 assert(m_object
->internal
== FALSE
);
4960 * if this page is currently on the pageout queue, we can't do the
4961 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4962 * and we can't remove it manually since we would need the object lock
4963 * (which is not required here) to decrement the activity_in_progress
4964 * reference which is held on the object while the page is in the pageout queue...
4965 * just let the normal laundry processing proceed
4967 if (m
->vmp_laundry
|| m
->vmp_private
|| m
->vmp_fictitious
||
4968 (m
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
4969 (m
->vmp_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
4973 vm_page_queues_remove(m
, FALSE
);
4975 if (!VM_PAGE_WIRED(m
)) {
4980 clock_get_system_nanotime(&sec
, &nsec
);
4981 ts
.tv_sec
= (unsigned int) sec
;
4984 if (vm_page_speculative_count
== 0) {
4985 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
4986 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
4988 aq
= &vm_page_queue_speculative
[speculative_age_index
];
4991 * set the timer to begin a new group
4993 aq
->age_ts
.tv_sec
= vm_pageout_state
.vm_page_speculative_q_age_ms
/ 1000;
4994 aq
->age_ts
.tv_nsec
= (vm_pageout_state
.vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
4996 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
4998 aq
= &vm_page_queue_speculative
[speculative_age_index
];
5000 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
5001 speculative_age_index
++;
5003 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
) {
5004 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
5006 if (speculative_age_index
== speculative_steal_index
) {
5007 speculative_steal_index
= speculative_age_index
+ 1;
5009 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
) {
5010 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
5013 aq
= &vm_page_queue_speculative
[speculative_age_index
];
5015 if (!vm_page_queue_empty(&aq
->age_q
)) {
5016 vm_page_speculate_ageit(aq
);
5019 aq
->age_ts
.tv_sec
= vm_pageout_state
.vm_page_speculative_q_age_ms
/ 1000;
5020 aq
->age_ts
.tv_nsec
= (vm_pageout_state
.vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
5022 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
5025 vm_page_enqueue_tail(&aq
->age_q
, &m
->vmp_pageq
);
5026 m
->vmp_q_state
= VM_PAGE_ON_SPECULATIVE_Q
;
5027 vm_page_speculative_count
++;
5028 vm_page_pageable_external_count
++;
5031 vm_object_lock_assert_exclusive(m_object
);
5033 m_object
->pages_created
++;
5034 #if DEVELOPMENT || DEBUG
5035 vm_page_speculative_created
++;
5044 * move pages from the specified aging bin to
5045 * the speculative bin that pageout_scan claims from
5047 * The page queues must be locked.
5050 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
5052 struct vm_speculative_age_q
*sq
;
5055 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
5057 if (vm_page_queue_empty(&sq
->age_q
)) {
5058 sq
->age_q
.next
= aq
->age_q
.next
;
5059 sq
->age_q
.prev
= aq
->age_q
.prev
;
5061 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(sq
->age_q
.next
);
5062 t
->vmp_pageq
.prev
= VM_PAGE_PACK_PTR(&sq
->age_q
);
5064 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(sq
->age_q
.prev
);
5065 t
->vmp_pageq
.next
= VM_PAGE_PACK_PTR(&sq
->age_q
);
5067 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(sq
->age_q
.prev
);
5068 t
->vmp_pageq
.next
= aq
->age_q
.next
;
5070 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(aq
->age_q
.next
);
5071 t
->vmp_pageq
.prev
= sq
->age_q
.prev
;
5073 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(aq
->age_q
.prev
);
5074 t
->vmp_pageq
.next
= VM_PAGE_PACK_PTR(&sq
->age_q
);
5076 sq
->age_q
.prev
= aq
->age_q
.prev
;
5078 vm_page_queue_init(&aq
->age_q
);
5087 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
5088 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
5090 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
5092 if (m
->vmp_q_state
== VM_PAGE_ON_INACTIVE_EXTERNAL_Q
) {
5094 * we don't need to do all the other work that
5095 * vm_page_queues_remove and vm_page_enqueue_inactive
5096 * bring along for the ride
5098 assert(!m
->vmp_laundry
);
5099 assert(!m
->vmp_private
);
5101 m
->vmp_no_cache
= FALSE
;
5103 vm_page_queue_remove(&vm_page_queue_inactive
, m
, vmp_pageq
);
5104 vm_page_queue_enter(&vm_page_queue_inactive
, m
, vmp_pageq
);
5109 * if this page is currently on the pageout queue, we can't do the
5110 * vm_page_queues_remove (which doesn't handle the pageout queue case)
5111 * and we can't remove it manually since we would need the object lock
5112 * (which is not required here) to decrement the activity_in_progress
5113 * reference which is held on the object while the page is in the pageout queue...
5114 * just let the normal laundry processing proceed
5116 if (m
->vmp_laundry
|| m
->vmp_private
||
5117 (m
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
5118 (m
->vmp_q_state
== VM_PAGE_ON_PAGEOUT_Q
) ||
5123 m
->vmp_no_cache
= FALSE
;
5125 vm_page_queues_remove(m
, FALSE
);
5127 vm_page_enqueue_inactive(m
, FALSE
);
5132 vm_page_reactivate_all_throttled(void)
5134 vm_page_t first_throttled
, last_throttled
;
5135 vm_page_t first_active
;
5137 int extra_active_count
;
5138 int extra_internal_count
, extra_external_count
;
5139 vm_object_t m_object
;
5141 if (!VM_DYNAMIC_PAGING_ENABLED()) {
5145 extra_active_count
= 0;
5146 extra_internal_count
= 0;
5147 extra_external_count
= 0;
5148 vm_page_lock_queues();
5149 if (!vm_page_queue_empty(&vm_page_queue_throttled
)) {
5151 * Switch "throttled" pages to "active".
5153 vm_page_queue_iterate(&vm_page_queue_throttled
, m
, vmp_pageq
) {
5155 assert(m
->vmp_q_state
== VM_PAGE_ON_THROTTLED_Q
);
5157 m_object
= VM_PAGE_OBJECT(m
);
5159 extra_active_count
++;
5160 if (m_object
->internal
) {
5161 extra_internal_count
++;
5163 extra_external_count
++;
5166 m
->vmp_q_state
= VM_PAGE_ON_ACTIVE_Q
;
5168 #if CONFIG_BACKGROUND_QUEUE
5169 if (m
->vmp_in_background
) {
5170 vm_page_add_to_backgroundq(m
, FALSE
);
5176 * Transfer the entire throttled queue to a regular LRU page queues.
5177 * We insert it at the head of the active queue, so that these pages
5178 * get re-evaluated by the LRU algorithm first, since they've been
5179 * completely out of it until now.
5181 first_throttled
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_throttled
);
5182 last_throttled
= (vm_page_t
) vm_page_queue_last(&vm_page_queue_throttled
);
5183 first_active
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
5184 if (vm_page_queue_empty(&vm_page_queue_active
)) {
5185 vm_page_queue_active
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled
);
5187 first_active
->vmp_pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled
);
5189 vm_page_queue_active
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled
);
5190 first_throttled
->vmp_pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active
);
5191 last_throttled
->vmp_pageq
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active
);
5194 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
5196 vm_page_queue_init(&vm_page_queue_throttled
);
5198 * Adjust the global page counts.
5200 vm_page_active_count
+= extra_active_count
;
5201 vm_page_pageable_internal_count
+= extra_internal_count
;
5202 vm_page_pageable_external_count
+= extra_external_count
;
5203 vm_page_throttled_count
= 0;
5205 assert(vm_page_throttled_count
== 0);
5206 assert(vm_page_queue_empty(&vm_page_queue_throttled
));
5207 vm_page_unlock_queues();
5212 * move pages from the indicated local queue to the global active queue
5213 * its ok to fail if we're below the hard limit and force == FALSE
5214 * the nolocks == TRUE case is to allow this function to be run on
5215 * the hibernate path
5219 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
5222 vm_page_t first_local
, last_local
;
5223 vm_page_t first_active
;
5227 if (vm_page_local_q
== NULL
) {
5231 lq
= zpercpu_get_cpu(vm_page_local_q
, lid
);
5233 if (nolocks
== FALSE
) {
5234 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
5235 if (!vm_page_trylockspin_queues()) {
5239 vm_page_lockspin_queues();
5242 VPL_LOCK(&lq
->vpl_lock
);
5244 if (lq
->vpl_count
) {
5246 * Switch "local" pages to "active".
5248 assert(!vm_page_queue_empty(&lq
->vpl_queue
));
5250 vm_page_queue_iterate(&lq
->vpl_queue
, m
, vmp_pageq
) {
5252 vm_page_check_pageable_safe(m
);
5253 assert(m
->vmp_q_state
== VM_PAGE_ON_ACTIVE_LOCAL_Q
);
5254 assert(!m
->vmp_fictitious
);
5256 if (m
->vmp_local_id
!= lid
) {
5257 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
5260 m
->vmp_local_id
= 0;
5261 m
->vmp_q_state
= VM_PAGE_ON_ACTIVE_Q
;
5263 #if CONFIG_BACKGROUND_QUEUE
5264 if (m
->vmp_in_background
) {
5265 vm_page_add_to_backgroundq(m
, FALSE
);
5270 if (count
!= lq
->vpl_count
) {
5271 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
5275 * Transfer the entire local queue to a regular LRU page queues.
5277 first_local
= (vm_page_t
) vm_page_queue_first(&lq
->vpl_queue
);
5278 last_local
= (vm_page_t
) vm_page_queue_last(&lq
->vpl_queue
);
5279 first_active
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
5281 if (vm_page_queue_empty(&vm_page_queue_active
)) {
5282 vm_page_queue_active
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local
);
5284 first_active
->vmp_pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local
);
5286 vm_page_queue_active
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local
);
5287 first_local
->vmp_pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active
);
5288 last_local
->vmp_pageq
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active
);
5290 vm_page_queue_init(&lq
->vpl_queue
);
5292 * Adjust the global page counts.
5294 vm_page_active_count
+= lq
->vpl_count
;
5295 vm_page_pageable_internal_count
+= lq
->vpl_internal_count
;
5296 vm_page_pageable_external_count
+= lq
->vpl_external_count
;
5298 lq
->vpl_internal_count
= 0;
5299 lq
->vpl_external_count
= 0;
5301 assert(vm_page_queue_empty(&lq
->vpl_queue
));
5303 if (nolocks
== FALSE
) {
5304 VPL_UNLOCK(&lq
->vpl_lock
);
5306 vm_page_balance_inactive(count
/ 4);
5307 vm_page_unlock_queues();
5312 * vm_page_part_zero_fill:
5314 * Zero-fill a part of the page.
5316 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
5318 vm_page_part_zero_fill(
5325 * we don't hold the page queue lock
5326 * so this check isn't safe to make
5331 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
5332 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m
), m_pa
, len
);
5336 tmp
= vm_page_grab();
5337 if (tmp
== VM_PAGE_NULL
) {
5338 vm_page_wait(THREAD_UNINT
);
5343 vm_page_zero_fill(tmp
);
5345 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
5347 if ((m_pa
+ len
) < PAGE_SIZE
) {
5348 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
5349 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
5351 vm_page_copy(tmp
, m
);
5357 * vm_page_zero_fill:
5359 * Zero-fill the specified page.
5367 * we don't hold the page queue lock
5368 * so this check isn't safe to make
5373 // dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
5374 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
5378 * vm_page_part_copy:
5380 * copy part of one page to another
5393 * we don't hold the page queue lock
5394 * so this check isn't safe to make
5396 VM_PAGE_CHECK(src_m
);
5397 VM_PAGE_CHECK(dst_m
);
5399 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m
), src_pa
,
5400 VM_PAGE_GET_PHYS_PAGE(dst_m
), dst_pa
, len
);
5406 * Copy one page to another
5409 int vm_page_copy_cs_validations
= 0;
5410 int vm_page_copy_cs_tainted
= 0;
5417 vm_object_t src_m_object
;
5419 src_m_object
= VM_PAGE_OBJECT(src_m
);
5423 * we don't hold the page queue lock
5424 * so this check isn't safe to make
5426 VM_PAGE_CHECK(src_m
);
5427 VM_PAGE_CHECK(dest_m
);
5429 vm_object_lock_assert_held(src_m_object
);
5431 if (src_m_object
!= VM_OBJECT_NULL
&&
5432 src_m_object
->code_signed
) {
5434 * We're copying a page from a code-signed object.
5435 * Whoever ends up mapping the copy page might care about
5436 * the original page's integrity, so let's validate the
5439 vm_page_copy_cs_validations
++;
5440 vm_page_validate_cs(src_m
, PAGE_SIZE
, 0);
5441 #if DEVELOPMENT || DEBUG
5442 DTRACE_VM4(codesigned_copy
,
5443 vm_object_t
, src_m_object
,
5444 vm_object_offset_t
, src_m
->vmp_offset
,
5445 int, src_m
->vmp_cs_validated
,
5446 int, src_m
->vmp_cs_tainted
);
5447 #endif /* DEVELOPMENT || DEBUG */
5451 * Propagate the cs_tainted bit to the copy page. Do not propagate
5452 * the cs_validated bit.
5454 dest_m
->vmp_cs_tainted
= src_m
->vmp_cs_tainted
;
5455 dest_m
->vmp_cs_nx
= src_m
->vmp_cs_nx
;
5456 if (dest_m
->vmp_cs_tainted
) {
5457 vm_page_copy_cs_tainted
++;
5459 dest_m
->vmp_error
= src_m
->vmp_error
; /* sliding src_m might have failed... */
5460 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m
), VM_PAGE_GET_PHYS_PAGE(dest_m
));
5468 printf("vm_page %p: \n", p
);
5469 printf(" pageq: next=%p prev=%p\n",
5470 (vm_page_t
)VM_PAGE_UNPACK_PTR(p
->vmp_pageq
.next
),
5471 (vm_page_t
)VM_PAGE_UNPACK_PTR(p
->vmp_pageq
.prev
));
5472 printf(" listq: next=%p prev=%p\n",
5473 (vm_page_t
)(VM_PAGE_UNPACK_PTR(p
->vmp_listq
.next
)),
5474 (vm_page_t
)(VM_PAGE_UNPACK_PTR(p
->vmp_listq
.prev
)));
5475 printf(" next=%p\n", (vm_page_t
)(VM_PAGE_UNPACK_PTR(p
->vmp_next_m
)));
5476 printf(" object=%p offset=0x%llx\n", VM_PAGE_OBJECT(p
), p
->vmp_offset
);
5477 printf(" wire_count=%u\n", p
->vmp_wire_count
);
5478 printf(" q_state=%u\n", p
->vmp_q_state
);
5480 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
5481 (p
->vmp_laundry
? "" : "!"),
5482 (p
->vmp_reference
? "" : "!"),
5483 (p
->vmp_gobbled
? "" : "!"),
5484 (p
->vmp_private
? "" : "!"));
5485 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
5486 (p
->vmp_busy
? "" : "!"),
5487 (p
->vmp_wanted
? "" : "!"),
5488 (p
->vmp_tabled
? "" : "!"),
5489 (p
->vmp_fictitious
? "" : "!"),
5490 (p
->vmp_pmapped
? "" : "!"),
5491 (p
->vmp_wpmapped
? "" : "!"));
5492 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
5493 (p
->vmp_free_when_done
? "" : "!"),
5494 (p
->vmp_absent
? "" : "!"),
5495 (p
->vmp_error
? "" : "!"),
5496 (p
->vmp_dirty
? "" : "!"),
5497 (p
->vmp_cleaning
? "" : "!"),
5498 (p
->vmp_precious
? "" : "!"),
5499 (p
->vmp_clustered
? "" : "!"));
5500 printf(" %soverwriting, %srestart, %sunusual\n",
5501 (p
->vmp_overwriting
? "" : "!"),
5502 (p
->vmp_restart
? "" : "!"),
5503 (p
->vmp_unusual
? "" : "!"));
5504 printf(" cs_validated=%d, cs_tainted=%d, cs_nx=%d, %sno_cache\n",
5505 p
->vmp_cs_validated
,
5508 (p
->vmp_no_cache
? "" : "!"));
5510 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p
));
5514 * Check that the list of pages is ordered by
5515 * ascending physical address and has no holes.
5518 vm_page_verify_contiguous(
5520 unsigned int npages
)
5523 unsigned int page_count
;
5524 vm_offset_t prev_addr
;
5526 prev_addr
= VM_PAGE_GET_PHYS_PAGE(pages
);
5528 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
5529 if (VM_PAGE_GET_PHYS_PAGE(m
) != prev_addr
+ 1) {
5530 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
5531 m
, (long)prev_addr
, VM_PAGE_GET_PHYS_PAGE(m
));
5532 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
5533 panic("vm_page_verify_contiguous: not contiguous!");
5535 prev_addr
= VM_PAGE_GET_PHYS_PAGE(m
);
5538 if (page_count
!= npages
) {
5539 printf("pages %p actual count 0x%x but requested 0x%x\n",
5540 pages
, page_count
, npages
);
5541 panic("vm_page_verify_contiguous: count error");
5548 * Check the free lists for proper length etc.
5550 static boolean_t vm_page_verify_this_free_list_enabled
= FALSE
;
5552 vm_page_verify_free_list(
5553 vm_page_queue_head_t
*vm_page_queue
,
5555 vm_page_t look_for_page
,
5556 boolean_t expect_page
)
5558 unsigned int npages
;
5561 boolean_t found_page
;
5563 if (!vm_page_verify_this_free_list_enabled
) {
5569 prev_m
= (vm_page_t
)((uintptr_t)vm_page_queue
);
5571 vm_page_queue_iterate(vm_page_queue
, m
, vmp_pageq
) {
5572 if (m
== look_for_page
) {
5575 if ((vm_page_t
)VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.prev
) != prev_m
) {
5576 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
5577 color
, npages
, m
, (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.prev
), prev_m
);
5580 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
5583 if (color
!= (unsigned int) -1) {
5584 if (VM_PAGE_GET_COLOR(m
) != color
) {
5585 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
5586 color
, npages
, m
, VM_PAGE_GET_COLOR(m
), color
);
5588 if (m
->vmp_q_state
!= VM_PAGE_ON_FREE_Q
) {
5589 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
5590 color
, npages
, m
, m
->vmp_q_state
);
5593 if (m
->vmp_q_state
!= VM_PAGE_ON_FREE_LOCAL_Q
) {
5594 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
5595 npages
, m
, m
->vmp_q_state
);
5601 if (look_for_page
!= VM_PAGE_NULL
) {
5602 unsigned int other_color
;
5604 if (expect_page
&& !found_page
) {
5605 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
5606 color
, npages
, look_for_page
, VM_PAGE_GET_PHYS_PAGE(look_for_page
));
5607 _vm_page_print(look_for_page
);
5608 for (other_color
= 0;
5609 other_color
< vm_colors
;
5611 if (other_color
== color
) {
5614 vm_page_verify_free_list(&vm_page_queue_free
[other_color
].qhead
,
5615 other_color
, look_for_page
, FALSE
);
5617 if (color
== (unsigned int) -1) {
5618 vm_page_verify_free_list(&vm_lopage_queue_free
,
5619 (unsigned int) -1, look_for_page
, FALSE
);
5621 panic("vm_page_verify_free_list(color=%u)\n", color
);
5623 if (!expect_page
&& found_page
) {
5624 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
5625 color
, npages
, look_for_page
, VM_PAGE_GET_PHYS_PAGE(look_for_page
));
5631 static boolean_t vm_page_verify_all_free_lists_enabled
= FALSE
;
5633 vm_page_verify_free_lists( void )
5635 unsigned int color
, npages
, nlopages
;
5636 boolean_t toggle
= TRUE
;
5638 if (!vm_page_verify_all_free_lists_enabled
) {
5644 lck_mtx_lock(&vm_page_queue_free_lock
);
5646 if (vm_page_verify_this_free_list_enabled
== TRUE
) {
5648 * This variable has been set globally for extra checking of
5649 * each free list Q. Since we didn't set it, we don't own it
5650 * and we shouldn't toggle it.
5655 if (toggle
== TRUE
) {
5656 vm_page_verify_this_free_list_enabled
= TRUE
;
5659 for (color
= 0; color
< vm_colors
; color
++) {
5660 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
].qhead
,
5661 color
, VM_PAGE_NULL
, FALSE
);
5663 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
5665 VM_PAGE_NULL
, FALSE
);
5666 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
) {
5667 panic("vm_page_verify_free_lists: "
5668 "npages %u free_count %d nlopages %u lo_free_count %u",
5669 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
5672 if (toggle
== TRUE
) {
5673 vm_page_verify_this_free_list_enabled
= FALSE
;
5676 lck_mtx_unlock(&vm_page_queue_free_lock
);
5679 #endif /* MACH_ASSERT */
5682 extern boolean_t(*volatile consider_buffer_cache_collect
)(int);
5685 * CONTIGUOUS PAGE ALLOCATION
5687 * Find a region large enough to contain at least n pages
5688 * of contiguous physical memory.
5690 * This is done by traversing the vm_page_t array in a linear fashion
5691 * we assume that the vm_page_t array has the avaiable physical pages in an
5692 * ordered, ascending list... this is currently true of all our implementations
5693 * and must remain so... there can be 'holes' in the array... we also can
5694 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5695 * which use to happen via 'vm_page_convert'... that function was no longer
5696 * being called and was removed...
5698 * The basic flow consists of stabilizing some of the interesting state of
5699 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5700 * sweep at the beginning of the array looking for pages that meet our criterea
5701 * for a 'stealable' page... currently we are pretty conservative... if the page
5702 * meets this criterea and is physically contiguous to the previous page in the 'run'
5703 * we keep developing it. If we hit a page that doesn't fit, we reset our state
5704 * and start to develop a new run... if at this point we've already considered
5705 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5706 * and mutex_pause (which will yield the processor), to keep the latency low w/r
5707 * to other threads trying to acquire free pages (or move pages from q to q),
5708 * and then continue from the spot we left off... we only make 1 pass through the
5709 * array. Once we have a 'run' that is long enough, we'll go into the loop which
5710 * which steals the pages from the queues they're currently on... pages on the free
5711 * queue can be stolen directly... pages that are on any of the other queues
5712 * must be removed from the object they are tabled on... this requires taking the
5713 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5714 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5715 * dump the pages we've currently stolen back to the free list, and pick up our
5716 * scan from the point where we aborted the 'current' run.
5720 * - neither vm_page_queue nor vm_free_list lock can be held on entry
5722 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5727 #define MAX_CONSIDERED_BEFORE_YIELD 1000
5730 #define RESET_STATE_OF_RUN() \
5732 prevcontaddr = -2; \
5734 free_considered = 0; \
5735 substitute_needed = 0; \
5740 * Can we steal in-use (i.e. not free) pages when searching for
5741 * physically-contiguous pages ?
5743 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5745 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
5747 int vm_page_find_contig_debug
= 0;
5751 vm_page_find_contiguous(
5752 unsigned int contig_pages
,
5759 ppnum_t prevcontaddr
= 0;
5760 ppnum_t start_pnum
= 0;
5761 unsigned int npages
= 0, considered
= 0, scanned
= 0;
5762 unsigned int page_idx
= 0, start_idx
= 0, last_idx
= 0, orig_last_idx
= 0;
5763 unsigned int idx_last_contig_page_found
= 0;
5764 int free_considered
= 0, free_available
= 0;
5765 int substitute_needed
= 0;
5766 int zone_gc_called
= 0;
5770 clock_sec_t tv_start_sec
= 0, tv_end_sec
= 0;
5771 clock_usec_t tv_start_usec
= 0, tv_end_usec
= 0;
5776 int stolen_pages
= 0;
5777 int compressed_pages
= 0;
5780 if (contig_pages
== 0) {
5781 return VM_PAGE_NULL
;
5787 vm_page_verify_free_lists();
5790 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
5792 PAGE_REPLACEMENT_ALLOWED(TRUE
);
5795 * If there are still delayed pages, try to free up some that match.
5797 if (__improbable(vm_delayed_count
!= 0 && contig_pages
!= 0)) {
5798 vm_free_delayed_pages_contig(contig_pages
, max_pnum
, pnum_mask
);
5801 vm_page_lock_queues();
5802 lck_mtx_lock(&vm_page_queue_free_lock
);
5804 RESET_STATE_OF_RUN();
5808 free_available
= vm_page_free_count
- vm_page_free_reserved
;
5812 if (flags
& KMA_LOMEM
) {
5813 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
5815 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
5818 orig_last_idx
= idx_last_contig_page_found
;
5819 last_idx
= orig_last_idx
;
5821 for (page_idx
= last_idx
, start_idx
= last_idx
;
5822 npages
< contig_pages
&& page_idx
< vm_pages_count
;
5827 page_idx
>= orig_last_idx
) {
5829 * We're back where we started and we haven't
5830 * found any suitable contiguous range. Let's
5836 m
= &vm_pages
[page_idx
];
5838 assert(!m
->vmp_fictitious
);
5839 assert(!m
->vmp_private
);
5841 if (max_pnum
&& VM_PAGE_GET_PHYS_PAGE(m
) > max_pnum
) {
5842 /* no more low pages... */
5845 if (!npages
& ((VM_PAGE_GET_PHYS_PAGE(m
) & pnum_mask
) != 0)) {
5849 RESET_STATE_OF_RUN();
5850 } else if (VM_PAGE_WIRED(m
) || m
->vmp_gobbled
||
5851 m
->vmp_laundry
|| m
->vmp_wanted
||
5852 m
->vmp_cleaning
|| m
->vmp_overwriting
|| m
->vmp_free_when_done
) {
5854 * page is in a transient state
5855 * or a state we don't want to deal
5856 * with, so don't consider it which
5857 * means starting a new run
5859 RESET_STATE_OF_RUN();
5860 } else if ((m
->vmp_q_state
== VM_PAGE_NOT_ON_Q
) ||
5861 (m
->vmp_q_state
== VM_PAGE_ON_FREE_LOCAL_Q
) ||
5862 (m
->vmp_q_state
== VM_PAGE_ON_FREE_LOPAGE_Q
) ||
5863 (m
->vmp_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
5865 * page needs to be on one of our queues (other then the pageout or special free queues)
5866 * or it needs to belong to the compressor pool (which is now indicated
5867 * by vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5868 * from the check for VM_PAGE_NOT_ON_Q)
5869 * in order for it to be stable behind the
5870 * locks we hold at this point...
5871 * if not, don't consider it which
5872 * means starting a new run
5874 RESET_STATE_OF_RUN();
5875 } else if ((m
->vmp_q_state
!= VM_PAGE_ON_FREE_Q
) && (!m
->vmp_tabled
|| m
->vmp_busy
)) {
5877 * pages on the free list are always 'busy'
5878 * so we couldn't test for 'busy' in the check
5879 * for the transient states... pages that are
5880 * 'free' are never 'tabled', so we also couldn't
5881 * test for 'tabled'. So we check here to make
5882 * sure that a non-free page is not busy and is
5883 * tabled on an object...
5884 * if not, don't consider it which
5885 * means starting a new run
5887 RESET_STATE_OF_RUN();
5889 if (VM_PAGE_GET_PHYS_PAGE(m
) != prevcontaddr
+ 1) {
5890 if ((VM_PAGE_GET_PHYS_PAGE(m
) & pnum_mask
) != 0) {
5891 RESET_STATE_OF_RUN();
5895 start_idx
= page_idx
;
5896 start_pnum
= VM_PAGE_GET_PHYS_PAGE(m
);
5901 prevcontaddr
= VM_PAGE_GET_PHYS_PAGE(m
);
5904 if (m
->vmp_q_state
== VM_PAGE_ON_FREE_Q
) {
5908 * This page is not free.
5909 * If we can't steal used pages,
5910 * we have to give up this run
5912 * Otherwise, we might need to
5913 * move the contents of this page
5914 * into a substitute page.
5916 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5917 if (m
->vmp_pmapped
|| m
->vmp_dirty
|| m
->vmp_precious
) {
5918 substitute_needed
++;
5921 RESET_STATE_OF_RUN();
5925 if ((free_considered
+ substitute_needed
) > free_available
) {
5927 * if we let this run continue
5928 * we will end up dropping the vm_page_free_count
5929 * below the reserve limit... we need to abort
5930 * this run, but we can at least re-consider this
5931 * page... thus the jump back to 'retry'
5933 RESET_STATE_OF_RUN();
5935 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
5940 * free_available == 0
5941 * so can't consider any free pages... if
5942 * we went to retry in this case, we'd
5943 * get stuck looking at the same page
5944 * w/o making any forward progress
5945 * we also want to take this path if we've already
5946 * reached our limit that controls the lock latency
5951 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
5952 PAGE_REPLACEMENT_ALLOWED(FALSE
);
5954 lck_mtx_unlock(&vm_page_queue_free_lock
);
5955 vm_page_unlock_queues();
5959 PAGE_REPLACEMENT_ALLOWED(TRUE
);
5961 vm_page_lock_queues();
5962 lck_mtx_lock(&vm_page_queue_free_lock
);
5964 RESET_STATE_OF_RUN();
5966 * reset our free page limit since we
5967 * dropped the lock protecting the vm_page_free_queue
5969 free_available
= vm_page_free_count
- vm_page_free_reserved
;
5980 if (npages
!= contig_pages
) {
5983 * We didn't find a contiguous range but we didn't
5984 * start from the very first page.
5985 * Start again from the very first page.
5987 RESET_STATE_OF_RUN();
5988 if (flags
& KMA_LOMEM
) {
5989 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
5991 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
5994 page_idx
= last_idx
;
5998 lck_mtx_unlock(&vm_page_queue_free_lock
);
6002 unsigned int cur_idx
;
6003 unsigned int tmp_start_idx
;
6004 vm_object_t locked_object
= VM_OBJECT_NULL
;
6005 boolean_t abort_run
= FALSE
;
6007 assert(page_idx
- start_idx
== contig_pages
);
6009 tmp_start_idx
= start_idx
;
6012 * first pass through to pull the free pages
6013 * off of the free queue so that in case we
6014 * need substitute pages, we won't grab any
6015 * of the free pages in the run... we'll clear
6016 * the 'free' bit in the 2nd pass, and even in
6017 * an abort_run case, we'll collect all of the
6018 * free pages in this run and return them to the free list
6020 while (start_idx
< page_idx
) {
6021 m1
= &vm_pages
[start_idx
++];
6023 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
6024 assert(m1
->vmp_q_state
== VM_PAGE_ON_FREE_Q
);
6027 if (m1
->vmp_q_state
== VM_PAGE_ON_FREE_Q
) {
6030 color
= VM_PAGE_GET_COLOR(m1
);
6032 vm_page_verify_free_list(&vm_page_queue_free
[color
].qhead
, color
, m1
, TRUE
);
6034 vm_page_queue_remove(&vm_page_queue_free
[color
].qhead
, m1
, vmp_pageq
);
6036 VM_PAGE_ZERO_PAGEQ_ENTRY(m1
);
6038 vm_page_verify_free_list(&vm_page_queue_free
[color
].qhead
, color
, VM_PAGE_NULL
, FALSE
);
6041 * Clear the "free" bit so that this page
6042 * does not get considered for another
6043 * concurrent physically-contiguous allocation.
6045 m1
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
6046 assert(m1
->vmp_busy
);
6048 vm_page_free_count
--;
6051 if (flags
& KMA_LOMEM
) {
6052 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
6054 vm_page_find_contiguous_last_idx
= page_idx
;
6058 * we can drop the free queue lock at this point since
6059 * we've pulled any 'free' candidates off of the list
6060 * we need it dropped so that we can do a vm_page_grab
6061 * when substituing for pmapped/dirty pages
6063 lck_mtx_unlock(&vm_page_queue_free_lock
);
6065 start_idx
= tmp_start_idx
;
6066 cur_idx
= page_idx
- 1;
6068 while (start_idx
++ < page_idx
) {
6070 * must go through the list from back to front
6071 * so that the page list is created in the
6072 * correct order - low -> high phys addresses
6074 m1
= &vm_pages
[cur_idx
--];
6076 if (m1
->vmp_object
== 0) {
6078 * page has already been removed from
6079 * the free list in the 1st pass
6081 assert(m1
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
6082 assert(m1
->vmp_offset
== (vm_object_offset_t
) -1);
6083 assert(m1
->vmp_busy
);
6084 assert(!m1
->vmp_wanted
);
6085 assert(!m1
->vmp_laundry
);
6089 boolean_t disconnected
, reusable
;
6091 if (abort_run
== TRUE
) {
6095 assert(m1
->vmp_q_state
!= VM_PAGE_NOT_ON_Q
);
6097 object
= VM_PAGE_OBJECT(m1
);
6099 if (object
!= locked_object
) {
6100 if (locked_object
) {
6101 vm_object_unlock(locked_object
);
6102 locked_object
= VM_OBJECT_NULL
;
6104 if (vm_object_lock_try(object
)) {
6105 locked_object
= object
;
6108 if (locked_object
== VM_OBJECT_NULL
||
6109 (VM_PAGE_WIRED(m1
) || m1
->vmp_gobbled
||
6110 m1
->vmp_laundry
|| m1
->vmp_wanted
||
6111 m1
->vmp_cleaning
|| m1
->vmp_overwriting
|| m1
->vmp_free_when_done
|| m1
->vmp_busy
) ||
6112 (m1
->vmp_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
6113 if (locked_object
) {
6114 vm_object_unlock(locked_object
);
6115 locked_object
= VM_OBJECT_NULL
;
6117 tmp_start_idx
= cur_idx
;
6122 disconnected
= FALSE
;
6125 if ((m1
->vmp_reusable
||
6126 object
->all_reusable
) &&
6127 (m1
->vmp_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
) &&
6129 !m1
->vmp_reference
) {
6130 /* reusable page... */
6131 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1
));
6132 disconnected
= TRUE
;
6135 * ... not reused: can steal
6136 * without relocating contents.
6142 if ((m1
->vmp_pmapped
&&
6146 vm_object_offset_t offset
;
6148 m2
= vm_page_grab_options(VM_PAGE_GRAB_Q_LOCK_HELD
);
6150 if (m2
== VM_PAGE_NULL
) {
6151 if (locked_object
) {
6152 vm_object_unlock(locked_object
);
6153 locked_object
= VM_OBJECT_NULL
;
6155 tmp_start_idx
= cur_idx
;
6159 if (!disconnected
) {
6160 if (m1
->vmp_pmapped
) {
6161 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1
));
6167 /* copy the page's contents */
6168 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1
), VM_PAGE_GET_PHYS_PAGE(m2
));
6169 /* copy the page's state */
6170 assert(!VM_PAGE_WIRED(m1
));
6171 assert(m1
->vmp_q_state
!= VM_PAGE_ON_FREE_Q
);
6172 assert(m1
->vmp_q_state
!= VM_PAGE_ON_PAGEOUT_Q
);
6173 assert(!m1
->vmp_laundry
);
6174 m2
->vmp_reference
= m1
->vmp_reference
;
6175 assert(!m1
->vmp_gobbled
);
6176 assert(!m1
->vmp_private
);
6177 m2
->vmp_no_cache
= m1
->vmp_no_cache
;
6178 m2
->vmp_xpmapped
= 0;
6179 assert(!m1
->vmp_busy
);
6180 assert(!m1
->vmp_wanted
);
6181 assert(!m1
->vmp_fictitious
);
6182 m2
->vmp_pmapped
= m1
->vmp_pmapped
; /* should flush cache ? */
6183 m2
->vmp_wpmapped
= m1
->vmp_wpmapped
;
6184 assert(!m1
->vmp_free_when_done
);
6185 m2
->vmp_absent
= m1
->vmp_absent
;
6186 m2
->vmp_error
= m1
->vmp_error
;
6187 m2
->vmp_dirty
= m1
->vmp_dirty
;
6188 assert(!m1
->vmp_cleaning
);
6189 m2
->vmp_precious
= m1
->vmp_precious
;
6190 m2
->vmp_clustered
= m1
->vmp_clustered
;
6191 assert(!m1
->vmp_overwriting
);
6192 m2
->vmp_restart
= m1
->vmp_restart
;
6193 m2
->vmp_unusual
= m1
->vmp_unusual
;
6194 m2
->vmp_cs_validated
= m1
->vmp_cs_validated
;
6195 m2
->vmp_cs_tainted
= m1
->vmp_cs_tainted
;
6196 m2
->vmp_cs_nx
= m1
->vmp_cs_nx
;
6199 * If m1 had really been reusable,
6200 * we would have just stolen it, so
6201 * let's not propagate it's "reusable"
6202 * bit and assert that m2 is not
6203 * marked as "reusable".
6205 // m2->vmp_reusable = m1->vmp_reusable;
6206 assert(!m2
->vmp_reusable
);
6208 // assert(!m1->vmp_lopage);
6210 if (m1
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) {
6211 m2
->vmp_q_state
= VM_PAGE_USED_BY_COMPRESSOR
;
6215 * page may need to be flushed if
6216 * it is marshalled into a UPL
6217 * that is going to be used by a device
6218 * that doesn't support coherency
6220 m2
->vmp_written_by_kernel
= TRUE
;
6223 * make sure we clear the ref/mod state
6224 * from the pmap layer... else we risk
6225 * inheriting state from the last time
6226 * this page was used...
6228 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2
), VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
6230 if (refmod
& VM_MEM_REFERENCED
) {
6231 m2
->vmp_reference
= TRUE
;
6233 if (refmod
& VM_MEM_MODIFIED
) {
6234 SET_PAGE_DIRTY(m2
, TRUE
);
6236 offset
= m1
->vmp_offset
;
6239 * completely cleans up the state
6240 * of the page so that it is ready
6241 * to be put onto the free list, or
6242 * for this purpose it looks like it
6243 * just came off of the free list
6245 vm_page_free_prepare(m1
);
6248 * now put the substitute page
6251 vm_page_insert_internal(m2
, locked_object
, offset
, VM_KERN_MEMORY_NONE
, TRUE
, TRUE
, FALSE
, FALSE
, NULL
);
6253 if (m2
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) {
6254 m2
->vmp_pmapped
= TRUE
;
6255 m2
->vmp_wpmapped
= TRUE
;
6257 PMAP_ENTER(kernel_pmap
, (vm_map_offset_t
)m2
->vmp_offset
, m2
,
6258 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
, kr
);
6260 assert(kr
== KERN_SUCCESS
);
6264 if (m2
->vmp_reference
) {
6265 vm_page_activate(m2
);
6267 vm_page_deactivate(m2
);
6270 PAGE_WAKEUP_DONE(m2
);
6272 assert(m1
->vmp_q_state
!= VM_PAGE_USED_BY_COMPRESSOR
);
6275 * completely cleans up the state
6276 * of the page so that it is ready
6277 * to be put onto the free list, or
6278 * for this purpose it looks like it
6279 * just came off of the free list
6281 vm_page_free_prepare(m1
);
6286 #if CONFIG_BACKGROUND_QUEUE
6287 vm_page_assign_background_state(m1
);
6289 VM_PAGE_ZERO_PAGEQ_ENTRY(m1
);
6293 if (locked_object
) {
6294 vm_object_unlock(locked_object
);
6295 locked_object
= VM_OBJECT_NULL
;
6298 if (abort_run
== TRUE
) {
6300 * want the index of the last
6301 * page in this run that was
6302 * successfully 'stolen', so back
6303 * it up 1 for the auto-decrement on use
6304 * and 1 more to bump back over this page
6306 page_idx
= tmp_start_idx
+ 2;
6307 if (page_idx
>= vm_pages_count
) {
6309 if (m
!= VM_PAGE_NULL
) {
6310 vm_page_unlock_queues();
6311 vm_page_free_list(m
, FALSE
);
6312 vm_page_lock_queues();
6318 page_idx
= last_idx
= 0;
6324 * We didn't find a contiguous range but we didn't
6325 * start from the very first page.
6326 * Start again from the very first page.
6328 RESET_STATE_OF_RUN();
6330 if (flags
& KMA_LOMEM
) {
6331 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
6333 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
6336 last_idx
= page_idx
;
6338 if (m
!= VM_PAGE_NULL
) {
6339 vm_page_unlock_queues();
6340 vm_page_free_list(m
, FALSE
);
6341 vm_page_lock_queues();
6346 lck_mtx_lock(&vm_page_queue_free_lock
);
6348 * reset our free page limit since we
6349 * dropped the lock protecting the vm_page_free_queue
6351 free_available
= vm_page_free_count
- vm_page_free_reserved
;
6355 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
6356 assert(m1
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
6357 assert(m1
->vmp_wire_count
== 0);
6360 m1
->vmp_wire_count
++;
6361 m1
->vmp_q_state
= VM_PAGE_IS_WIRED
;
6363 m1
->vmp_gobbled
= TRUE
;
6366 if (wire
== FALSE
) {
6367 vm_page_gobble_count
+= npages
;
6371 * gobbled pages are also counted as wired pages
6373 vm_page_wire_count
+= npages
;
6375 assert(vm_page_verify_contiguous(m
, npages
));
6378 PAGE_REPLACEMENT_ALLOWED(FALSE
);
6380 vm_page_unlock_queues();
6383 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
6385 tv_end_sec
-= tv_start_sec
;
6386 if (tv_end_usec
< tv_start_usec
) {
6388 tv_end_usec
+= 1000000;
6390 tv_end_usec
-= tv_start_usec
;
6391 if (tv_end_usec
>= 1000000) {
6393 tv_end_sec
-= 1000000;
6395 if (vm_page_find_contig_debug
) {
6396 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
6397 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
6398 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
6399 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
);
6404 vm_page_verify_free_lists();
6406 if (m
== NULL
&& zone_gc_called
< 2) {
6407 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
6408 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
6409 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
, vm_page_wire_count
);
6411 if (consider_buffer_cache_collect
!= NULL
) {
6412 (void)(*consider_buffer_cache_collect
)(1);
6415 zone_gc(zone_gc_called
? ZONE_GC_DRAIN
: ZONE_GC_TRIM
);
6419 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count
);
6420 goto full_scan_again
;
6427 * Allocate a list of contiguous, wired pages.
6439 unsigned int npages
;
6441 if (size
% PAGE_SIZE
!= 0) {
6442 return KERN_INVALID_ARGUMENT
;
6445 npages
= (unsigned int) (size
/ PAGE_SIZE
);
6446 if (npages
!= size
/ PAGE_SIZE
) {
6447 /* 32-bit overflow */
6448 return KERN_INVALID_ARGUMENT
;
6452 * Obtain a pointer to a subset of the free
6453 * list large enough to satisfy the request;
6454 * the region will be physically contiguous.
6456 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
6458 if (pages
== VM_PAGE_NULL
) {
6459 return KERN_NO_SPACE
;
6462 * determine need for wakeups
6464 if (vm_page_free_count
< vm_page_free_min
) {
6465 lck_mtx_lock(&vm_page_queue_free_lock
);
6466 if (vm_pageout_running
== FALSE
) {
6467 lck_mtx_unlock(&vm_page_queue_free_lock
);
6468 thread_wakeup((event_t
) &vm_page_free_wanted
);
6470 lck_mtx_unlock(&vm_page_queue_free_lock
);
6474 VM_CHECK_MEMORYSTATUS
;
6477 * The CPM pages should now be available and
6478 * ordered by ascending physical address.
6480 assert(vm_page_verify_contiguous(pages
, npages
));
6483 return KERN_SUCCESS
;
6487 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
6490 * when working on a 'run' of pages, it is necessary to hold
6491 * the vm_page_queue_lock (a hot global lock) for certain operations
6492 * on the page... however, the majority of the work can be done
6493 * while merely holding the object lock... in fact there are certain
6494 * collections of pages that don't require any work brokered by the
6495 * vm_page_queue_lock... to mitigate the time spent behind the global
6496 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
6497 * while doing all of the work that doesn't require the vm_page_queue_lock...
6498 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
6499 * necessary work for each page... we will grab the busy bit on the page
6500 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
6501 * if it can't immediately take the vm_page_queue_lock in order to compete
6502 * for the locks in the same order that vm_pageout_scan takes them.
6503 * the operation names are modeled after the names of the routines that
6504 * need to be called in order to make the changes very obvious in the
6509 vm_page_do_delayed_work(
6512 struct vm_page_delayed_work
*dwp
,
6517 vm_page_t local_free_q
= VM_PAGE_NULL
;
6520 * pageout_scan takes the vm_page_lock_queues first
6521 * then tries for the object lock... to avoid what
6522 * is effectively a lock inversion, we'll go to the
6523 * trouble of taking them in that same order... otherwise
6524 * if this object contains the majority of the pages resident
6525 * in the UBC (or a small set of large objects actively being
6526 * worked on contain the majority of the pages), we could
6527 * cause the pageout_scan thread to 'starve' in its attempt
6528 * to find pages to move to the free queue, since it has to
6529 * successfully acquire the object lock of any candidate page
6530 * before it can steal/clean it.
6532 if (!vm_page_trylockspin_queues()) {
6533 vm_object_unlock(object
);
6536 * "Turnstile enabled vm_pageout_scan" can be runnable
6537 * for a very long time without getting on a core.
6538 * If this is a higher priority thread it could be
6539 * waiting here for a very long time respecting the fact
6540 * that pageout_scan would like its object after VPS does
6542 * So we cap the number of yields in the vm_object_lock_avoid()
6543 * case to a single mutex_pause(0) which will give vm_pageout_scan
6544 * 10us to run and grab the object if needed.
6546 vm_page_lockspin_queues();
6549 if ((!vm_object_lock_avoid(object
) ||
6550 (vps_dynamic_priority_enabled
&& (j
> 0))) &&
6551 _vm_object_lock_try(object
)) {
6554 vm_page_unlock_queues();
6556 vm_page_lockspin_queues();
6559 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
6562 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
) {
6563 vm_pageout_throttle_up(m
);
6565 #if CONFIG_PHANTOM_CACHE
6566 if (dwp
->dw_mask
& DW_vm_phantom_cache_update
) {
6567 vm_phantom_cache_update(m
);
6570 if (dwp
->dw_mask
& DW_vm_page_wire
) {
6571 vm_page_wire(m
, tag
, FALSE
);
6572 } else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
6575 queueit
= (dwp
->dw_mask
& (DW_vm_page_free
| DW_vm_page_deactivate_internal
)) ? FALSE
: TRUE
;
6577 vm_page_unwire(m
, queueit
);
6579 if (dwp
->dw_mask
& DW_vm_page_free
) {
6580 vm_page_free_prepare_queues(m
);
6582 assert(m
->vmp_pageq
.next
== 0 && m
->vmp_pageq
.prev
== 0);
6584 * Add this page to our list of reclaimed pages,
6585 * to be freed later.
6587 m
->vmp_snext
= local_free_q
;
6590 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
) {
6591 vm_page_deactivate_internal(m
, FALSE
);
6592 } else if (dwp
->dw_mask
& DW_vm_page_activate
) {
6593 if (m
->vmp_q_state
!= VM_PAGE_ON_ACTIVE_Q
) {
6594 vm_page_activate(m
);
6596 } else if (dwp
->dw_mask
& DW_vm_page_speculate
) {
6597 vm_page_speculate(m
, TRUE
);
6598 } else if (dwp
->dw_mask
& DW_enqueue_cleaned
) {
6600 * if we didn't hold the object lock and did this,
6601 * we might disconnect the page, then someone might
6602 * soft fault it back in, then we would put it on the
6603 * cleaned queue, and so we would have a referenced (maybe even dirty)
6604 * page on that queue, which we don't want
6606 int refmod_state
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
6608 if ((refmod_state
& VM_MEM_REFERENCED
)) {
6610 * this page has been touched since it got cleaned; let's activate it
6611 * if it hasn't already been
6613 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned
, 1);
6614 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated
, 1);
6616 if (m
->vmp_q_state
!= VM_PAGE_ON_ACTIVE_Q
) {
6617 vm_page_activate(m
);
6620 m
->vmp_reference
= FALSE
;
6621 vm_page_enqueue_cleaned(m
);
6623 } else if (dwp
->dw_mask
& DW_vm_page_lru
) {
6625 } else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
) {
6626 if (m
->vmp_q_state
!= VM_PAGE_ON_PAGEOUT_Q
) {
6627 vm_page_queues_remove(m
, TRUE
);
6630 if (dwp
->dw_mask
& DW_set_reference
) {
6631 m
->vmp_reference
= TRUE
;
6632 } else if (dwp
->dw_mask
& DW_clear_reference
) {
6633 m
->vmp_reference
= FALSE
;
6636 if (dwp
->dw_mask
& DW_move_page
) {
6637 if (m
->vmp_q_state
!= VM_PAGE_ON_PAGEOUT_Q
) {
6638 vm_page_queues_remove(m
, FALSE
);
6640 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
6642 vm_page_enqueue_inactive(m
, FALSE
);
6645 if (dwp
->dw_mask
& DW_clear_busy
) {
6646 m
->vmp_busy
= FALSE
;
6649 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
) {
6654 vm_page_unlock_queues();
6657 vm_page_free_list(local_free_q
, TRUE
);
6660 VM_CHECK_MEMORYSTATUS
;
6669 vm_page_t page_list
= VM_PAGE_NULL
;
6671 kern_return_t kr
= KERN_SUCCESS
;
6672 int page_grab_count
= 0;
6673 mach_vm_size_t map_size
= ptoa_64(page_count
);
6674 #if DEVELOPMENT || DEBUG
6675 task_t task
= current_task();
6676 #endif /* DEVELOPMENT || DEBUG */
6678 for (int i
= 0; i
< page_count
; i
++) {
6680 if (flags
& KMA_LOMEM
) {
6681 mem
= vm_page_grablo();
6683 mem
= vm_page_grab();
6686 if (mem
!= VM_PAGE_NULL
) {
6690 if (flags
& KMA_NOPAGEWAIT
) {
6691 kr
= KERN_RESOURCE_SHORTAGE
;
6694 if ((flags
& KMA_LOMEM
) && (vm_lopage_needed
== TRUE
)) {
6695 kr
= KERN_RESOURCE_SHORTAGE
;
6699 /* VM privileged threads should have waited in vm_page_grab() and not get here. */
6700 assert(!(current_thread()->options
& TH_OPT_VMPRIV
));
6702 uint64_t unavailable
= (vm_page_wire_count
+ vm_page_free_target
) * PAGE_SIZE
;
6703 if (unavailable
> max_mem
|| map_size
> (max_mem
- unavailable
)) {
6704 kr
= KERN_RESOURCE_SHORTAGE
;
6711 mem
->vmp_snext
= page_list
;
6715 if (KMA_ZERO
& flags
) {
6716 for (mem
= page_list
; mem
; mem
= mem
->vmp_snext
) {
6717 vm_page_zero_fill(mem
);
6722 #if DEBUG || DEVELOPMENT
6724 ledger_credit(task
->ledger
, task_ledgers
.pages_grabbed_kern
, page_grab_count
);
6728 if (kr
== KERN_SUCCESS
) {
6731 vm_page_free_list(page_list
, FALSE
);
6738 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
6740 page
->vmp_offset
= offset
;
6744 vm_page_get_next(vm_page_t page
)
6746 return page
->vmp_snext
;
6750 vm_page_get_offset(vm_page_t page
)
6752 return page
->vmp_offset
;
6756 vm_page_get_phys_page(vm_page_t page
)
6758 return VM_PAGE_GET_PHYS_PAGE(page
);
6762 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6766 static vm_page_t hibernate_gobble_queue
;
6768 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
6769 static int hibernate_flush_dirty_pages(int);
6770 static int hibernate_flush_queue(vm_page_queue_head_t
*, int);
6772 void hibernate_flush_wait(void);
6773 void hibernate_mark_in_progress(void);
6774 void hibernate_clear_in_progress(void);
6776 void hibernate_free_range(int, int);
6777 void hibernate_hash_insert_page(vm_page_t
);
6778 uint32_t hibernate_mark_as_unneeded(addr64_t
, addr64_t
, hibernate_page_list_t
*, hibernate_page_list_t
*);
6779 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t
*, hibernate_page_list_t
*);
6780 ppnum_t
hibernate_lookup_paddr(unsigned int);
6782 struct hibernate_statistics
{
6783 int hibernate_considered
;
6784 int hibernate_reentered_on_q
;
6785 int hibernate_found_dirty
;
6786 int hibernate_skipped_cleaning
;
6787 int hibernate_skipped_transient
;
6788 int hibernate_skipped_precious
;
6789 int hibernate_skipped_external
;
6790 int hibernate_queue_nolock
;
6791 int hibernate_queue_paused
;
6792 int hibernate_throttled
;
6793 int hibernate_throttle_timeout
;
6794 int hibernate_drained
;
6795 int hibernate_drain_timeout
;
6797 int cd_found_precious
;
6800 int cd_found_unusual
;
6801 int cd_found_cleaning
;
6802 int cd_found_laundry
;
6804 int cd_found_xpmapped
;
6805 int cd_skipped_xpmapped
;
6808 int cd_vm_page_wire_count
;
6809 int cd_vm_struct_pages_unneeded
;
6817 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6818 * so that we don't overrun the estimated image size, which would
6819 * result in a hibernation failure.
6821 * We use a size value instead of pages because we don't want to take up more space
6822 * on disk if the system has a 16K page size vs 4K. Also, we are not guaranteed
6823 * to have that additional space available.
6825 * Since this was set at 40000 pages on X86 we are going to use 160MB as our
6828 #define HIBERNATE_XPMAPPED_LIMIT ((160 * 1024 * 1024ULL) / PAGE_SIZE)
6832 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
6834 wait_result_t wait_result
;
6836 vm_page_lock_queues();
6838 while (!vm_page_queue_empty(&q
->pgo_pending
)) {
6839 q
->pgo_draining
= TRUE
;
6841 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+ 1), THREAD_INTERRUPTIBLE
, 5000, 1000 * NSEC_PER_USEC
);
6843 vm_page_unlock_queues();
6845 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
6847 if (wait_result
== THREAD_TIMED_OUT
&& !vm_page_queue_empty(&q
->pgo_pending
)) {
6848 hibernate_stats
.hibernate_drain_timeout
++;
6850 if (q
== &vm_pageout_queue_external
) {
6856 vm_page_lock_queues();
6858 hibernate_stats
.hibernate_drained
++;
6860 vm_page_unlock_queues();
6866 boolean_t hibernate_skip_external
= FALSE
;
6869 hibernate_flush_queue(vm_page_queue_head_t
*q
, int qcount
)
6872 vm_object_t l_object
= NULL
;
6873 vm_object_t m_object
= NULL
;
6874 int refmod_state
= 0;
6875 int try_failed_count
= 0;
6877 int current_run
= 0;
6878 struct vm_pageout_queue
*iq
;
6879 struct vm_pageout_queue
*eq
;
6880 struct vm_pageout_queue
*tq
;
6882 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
,
6883 VM_KERNEL_UNSLIDE_OR_PERM(q
), qcount
);
6885 iq
= &vm_pageout_queue_internal
;
6886 eq
= &vm_pageout_queue_external
;
6888 vm_page_lock_queues();
6890 while (qcount
&& !vm_page_queue_empty(q
)) {
6891 if (current_run
++ == 1000) {
6892 if (hibernate_should_abort()) {
6899 m
= (vm_page_t
) vm_page_queue_first(q
);
6900 m_object
= VM_PAGE_OBJECT(m
);
6903 * check to see if we currently are working
6904 * with the same object... if so, we've
6905 * already got the lock
6907 if (m_object
!= l_object
) {
6909 * the object associated with candidate page is
6910 * different from the one we were just working
6911 * with... dump the lock if we still own it
6913 if (l_object
!= NULL
) {
6914 vm_object_unlock(l_object
);
6918 * Try to lock object; since we've alread got the
6919 * page queues lock, we can only 'try' for this one.
6920 * if the 'try' fails, we need to do a mutex_pause
6921 * to allow the owner of the object lock a chance to
6924 if (!vm_object_lock_try_scan(m_object
)) {
6925 if (try_failed_count
> 20) {
6926 hibernate_stats
.hibernate_queue_nolock
++;
6928 goto reenter_pg_on_q
;
6931 vm_page_unlock_queues();
6932 mutex_pause(try_failed_count
++);
6933 vm_page_lock_queues();
6935 hibernate_stats
.hibernate_queue_paused
++;
6938 l_object
= m_object
;
6941 if (!m_object
->alive
|| m
->vmp_cleaning
|| m
->vmp_laundry
|| m
->vmp_busy
|| m
->vmp_absent
|| m
->vmp_error
) {
6943 * page is not to be cleaned
6944 * put it back on the head of its queue
6946 if (m
->vmp_cleaning
) {
6947 hibernate_stats
.hibernate_skipped_cleaning
++;
6949 hibernate_stats
.hibernate_skipped_transient
++;
6952 goto reenter_pg_on_q
;
6954 if (m_object
->copy
== VM_OBJECT_NULL
) {
6955 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
6957 * let the normal hibernate image path
6960 goto reenter_pg_on_q
;
6963 if (!m
->vmp_dirty
&& m
->vmp_pmapped
) {
6964 refmod_state
= pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m
));
6966 if ((refmod_state
& VM_MEM_MODIFIED
)) {
6967 SET_PAGE_DIRTY(m
, FALSE
);
6973 if (!m
->vmp_dirty
) {
6975 * page is not to be cleaned
6976 * put it back on the head of its queue
6978 if (m
->vmp_precious
) {
6979 hibernate_stats
.hibernate_skipped_precious
++;
6982 goto reenter_pg_on_q
;
6985 if (hibernate_skip_external
== TRUE
&& !m_object
->internal
) {
6986 hibernate_stats
.hibernate_skipped_external
++;
6988 goto reenter_pg_on_q
;
6992 if (m_object
->internal
) {
6993 if (VM_PAGE_Q_THROTTLED(iq
)) {
6996 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
7001 wait_result_t wait_result
;
7004 if (l_object
!= NULL
) {
7005 vm_object_unlock(l_object
);
7009 while (retval
== 0) {
7010 tq
->pgo_throttled
= TRUE
;
7012 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000 * NSEC_PER_USEC
);
7014 vm_page_unlock_queues();
7016 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
7018 vm_page_lock_queues();
7020 if (wait_result
!= THREAD_TIMED_OUT
) {
7023 if (!VM_PAGE_Q_THROTTLED(tq
)) {
7027 if (hibernate_should_abort()) {
7031 if (--wait_count
== 0) {
7032 hibernate_stats
.hibernate_throttle_timeout
++;
7035 hibernate_skip_external
= TRUE
;
7045 hibernate_stats
.hibernate_throttled
++;
7050 * we've already factored out pages in the laundry which
7051 * means this page can't be on the pageout queue so it's
7052 * safe to do the vm_page_queues_remove
7054 vm_page_queues_remove(m
, TRUE
);
7056 if (m_object
->internal
== TRUE
) {
7057 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m
), PMAP_OPTIONS_COMPRESSOR
, NULL
);
7060 vm_pageout_cluster(m
);
7062 hibernate_stats
.hibernate_found_dirty
++;
7067 vm_page_queue_remove(q
, m
, vmp_pageq
);
7068 vm_page_queue_enter(q
, m
, vmp_pageq
);
7070 hibernate_stats
.hibernate_reentered_on_q
++;
7072 hibernate_stats
.hibernate_considered
++;
7075 try_failed_count
= 0;
7077 if (l_object
!= NULL
) {
7078 vm_object_unlock(l_object
);
7082 vm_page_unlock_queues();
7084 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
7091 hibernate_flush_dirty_pages(int pass
)
7093 struct vm_speculative_age_q
*aq
;
7096 if (vm_page_local_q
) {
7097 zpercpu_foreach_cpu(lid
) {
7098 vm_page_reactivate_local(lid
, TRUE
, FALSE
);
7102 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
7106 aq
= &vm_page_queue_speculative
[i
];
7108 if (vm_page_queue_empty(&aq
->age_q
)) {
7113 vm_page_lockspin_queues();
7115 vm_page_queue_iterate(&aq
->age_q
, m
, vmp_pageq
) {
7118 vm_page_unlock_queues();
7121 if (hibernate_flush_queue(&aq
->age_q
, qcount
)) {
7126 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_page_anonymous_count
- vm_page_cleaned_count
)) {
7129 /* XXX FBDP TODO: flush secluded queue */
7130 if (hibernate_flush_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
)) {
7133 if (hibernate_flush_queue(&vm_page_queue_cleaned
, vm_page_cleaned_count
)) {
7136 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
)) {
7141 vm_compressor_record_warmup_start();
7144 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
)) {
7146 vm_compressor_record_warmup_end();
7150 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
)) {
7152 vm_compressor_record_warmup_end();
7157 vm_compressor_record_warmup_end();
7160 if (hibernate_skip_external
== FALSE
&& hibernate_drain_pageout_queue(&vm_pageout_queue_external
)) {
7169 hibernate_reset_stats()
7171 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
7176 hibernate_flush_memory()
7180 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
7182 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
7184 hibernate_cleaning_in_progress
= TRUE
;
7185 hibernate_skip_external
= FALSE
;
7187 if ((retval
= hibernate_flush_dirty_pages(1)) == 0) {
7188 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_START
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
7190 vm_compressor_flush();
7192 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_END
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
7194 if (consider_buffer_cache_collect
!= NULL
) {
7195 unsigned int orig_wire_count
;
7197 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
7198 orig_wire_count
= vm_page_wire_count
;
7200 (void)(*consider_buffer_cache_collect
)(1);
7201 zone_gc(ZONE_GC_DRAIN
);
7203 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count
- vm_page_wire_count
);
7205 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, orig_wire_count
- vm_page_wire_count
, 0, 0, 0, 0);
7208 hibernate_cleaning_in_progress
= FALSE
;
7210 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
7213 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT
);
7217 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
7218 hibernate_stats
.hibernate_considered
,
7219 hibernate_stats
.hibernate_reentered_on_q
,
7220 hibernate_stats
.hibernate_found_dirty
);
7221 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
7222 hibernate_stats
.hibernate_skipped_cleaning
,
7223 hibernate_stats
.hibernate_skipped_transient
,
7224 hibernate_stats
.hibernate_skipped_precious
,
7225 hibernate_stats
.hibernate_skipped_external
,
7226 hibernate_stats
.hibernate_queue_nolock
);
7227 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
7228 hibernate_stats
.hibernate_queue_paused
,
7229 hibernate_stats
.hibernate_throttled
,
7230 hibernate_stats
.hibernate_throttle_timeout
,
7231 hibernate_stats
.hibernate_drained
,
7232 hibernate_stats
.hibernate_drain_timeout
);
7239 hibernate_page_list_zero(hibernate_page_list_t
*list
)
7242 hibernate_bitmap_t
* bitmap
;
7244 bitmap
= &list
->bank_bitmap
[0];
7245 for (bank
= 0; bank
< list
->bank_count
; bank
++) {
7248 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
7249 // set out-of-bound bits at end of bitmap.
7250 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
7252 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
7255 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
7260 hibernate_free_gobble_pages(void)
7265 m
= (vm_page_t
) hibernate_gobble_queue
;
7267 next
= m
->vmp_snext
;
7272 hibernate_gobble_queue
= VM_PAGE_NULL
;
7275 HIBLOG("Freed %d pages\n", count
);
7280 hibernate_consider_discard(vm_page_t m
, boolean_t preflight
)
7282 vm_object_t object
= NULL
;
7284 boolean_t discard
= FALSE
;
7287 if (m
->vmp_private
) {
7288 panic("hibernate_consider_discard: private");
7291 object
= VM_PAGE_OBJECT(m
);
7293 if (!vm_object_lock_try(object
)) {
7296 hibernate_stats
.cd_lock_failed
++;
7300 if (VM_PAGE_WIRED(m
)) {
7302 hibernate_stats
.cd_found_wired
++;
7306 if (m
->vmp_precious
) {
7308 hibernate_stats
.cd_found_precious
++;
7312 if (m
->vmp_busy
|| !object
->alive
) {
7314 * Somebody is playing with this page.
7317 hibernate_stats
.cd_found_busy
++;
7321 if (m
->vmp_absent
|| m
->vmp_unusual
|| m
->vmp_error
) {
7323 * If it's unusual in anyway, ignore it
7326 hibernate_stats
.cd_found_unusual
++;
7330 if (m
->vmp_cleaning
) {
7332 hibernate_stats
.cd_found_cleaning
++;
7336 if (m
->vmp_laundry
) {
7338 hibernate_stats
.cd_found_laundry
++;
7342 if (!m
->vmp_dirty
) {
7343 refmod_state
= pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m
));
7345 if (refmod_state
& VM_MEM_REFERENCED
) {
7346 m
->vmp_reference
= TRUE
;
7348 if (refmod_state
& VM_MEM_MODIFIED
) {
7349 SET_PAGE_DIRTY(m
, FALSE
);
7354 * If it's clean or purgeable we can discard the page on wakeup.
7356 discard
= (!m
->vmp_dirty
)
7357 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
7358 || (VM_PURGABLE_EMPTY
== object
->purgable
);
7361 if (discard
== FALSE
) {
7363 hibernate_stats
.cd_found_dirty
++;
7365 } else if (m
->vmp_xpmapped
&& m
->vmp_reference
&& !object
->internal
) {
7366 if (hibernate_stats
.cd_found_xpmapped
< HIBERNATE_XPMAPPED_LIMIT
) {
7368 hibernate_stats
.cd_found_xpmapped
++;
7373 hibernate_stats
.cd_skipped_xpmapped
++;
7380 vm_object_unlock(object
);
7388 hibernate_discard_page(vm_page_t m
)
7390 vm_object_t m_object
;
7392 if (m
->vmp_absent
|| m
->vmp_unusual
|| m
->vmp_error
) {
7394 * If it's unusual in anyway, ignore
7399 m_object
= VM_PAGE_OBJECT(m
);
7401 #if MACH_ASSERT || DEBUG
7402 if (!vm_object_lock_try(m_object
)) {
7403 panic("hibernate_discard_page(%p) !vm_object_lock_try", m
);
7406 /* No need to lock page queue for token delete, hibernate_vm_unlock()
7407 * makes sure these locks are uncontended before sleep */
7408 #endif /* MACH_ASSERT || DEBUG */
7410 if (m
->vmp_pmapped
== TRUE
) {
7411 __unused
int refmod_state
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
7414 if (m
->vmp_laundry
) {
7415 panic("hibernate_discard_page(%p) laundry", m
);
7417 if (m
->vmp_private
) {
7418 panic("hibernate_discard_page(%p) private", m
);
7420 if (m
->vmp_fictitious
) {
7421 panic("hibernate_discard_page(%p) fictitious", m
);
7424 if (VM_PURGABLE_VOLATILE
== m_object
->purgable
) {
7425 /* object should be on a queue */
7426 assert((m_object
->objq
.next
!= NULL
) && (m_object
->objq
.prev
!= NULL
));
7427 purgeable_q_t old_queue
= vm_purgeable_object_remove(m_object
);
7429 if (m_object
->purgeable_when_ripe
) {
7430 vm_purgeable_token_delete_first(old_queue
);
7432 vm_object_lock_assert_exclusive(m_object
);
7433 m_object
->purgable
= VM_PURGABLE_EMPTY
;
7436 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
7437 * accounted in the "volatile" ledger, so no change here.
7438 * We have to update vm_page_purgeable_count, though, since we're
7439 * effectively purging this object.
7442 assert(m_object
->resident_page_count
>= m_object
->wired_page_count
);
7443 delta
= (m_object
->resident_page_count
- m_object
->wired_page_count
);
7444 assert(vm_page_purgeable_count
>= delta
);
7446 OSAddAtomic(-delta
, (SInt32
*)&vm_page_purgeable_count
);
7451 #if MACH_ASSERT || DEBUG
7452 vm_object_unlock(m_object
);
7453 #endif /* MACH_ASSERT || DEBUG */
7457 * Grab locks for hibernate_page_list_setall()
7460 hibernate_vm_lock_queues(void)
7462 vm_object_lock(compressor_object
);
7463 vm_page_lock_queues();
7464 lck_mtx_lock(&vm_page_queue_free_lock
);
7465 lck_mtx_lock(&vm_purgeable_queue_lock
);
7467 if (vm_page_local_q
) {
7468 zpercpu_foreach(lq
, vm_page_local_q
) {
7469 VPL_LOCK(&lq
->vpl_lock
);
7475 hibernate_vm_unlock_queues(void)
7477 if (vm_page_local_q
) {
7478 zpercpu_foreach(lq
, vm_page_local_q
) {
7479 VPL_UNLOCK(&lq
->vpl_lock
);
7482 lck_mtx_unlock(&vm_purgeable_queue_lock
);
7483 lck_mtx_unlock(&vm_page_queue_free_lock
);
7484 vm_page_unlock_queues();
7485 vm_object_unlock(compressor_object
);
7489 * Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
7490 * pages known to VM to not need saving are subtracted.
7491 * Wired pages to be saved are present in page_list_wired, pageable in page_list.
7495 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
7496 hibernate_page_list_t
* page_list_wired
,
7497 hibernate_page_list_t
* page_list_pal
,
7498 boolean_t preflight
,
7499 boolean_t will_discard
,
7500 uint32_t * pagesOut
)
7502 uint64_t start
, end
, nsec
;
7505 uint32_t pages
= page_list
->page_count
;
7506 uint32_t count_anonymous
= 0, count_throttled
= 0, count_compressor
= 0;
7507 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0, count_cleaned
= 0;
7508 uint32_t count_wire
= pages
;
7509 uint32_t count_discard_active
= 0;
7510 uint32_t count_discard_inactive
= 0;
7511 uint32_t count_discard_cleaned
= 0;
7512 uint32_t count_discard_purgeable
= 0;
7513 uint32_t count_discard_speculative
= 0;
7514 uint32_t count_discard_vm_struct_pages
= 0;
7517 hibernate_bitmap_t
* bitmap
;
7518 hibernate_bitmap_t
* bitmap_wired
;
7519 boolean_t discard_all
;
7522 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight
);
7526 page_list_wired
= NULL
;
7527 page_list_pal
= NULL
;
7528 discard_all
= FALSE
;
7530 discard_all
= will_discard
;
7533 #if MACH_ASSERT || DEBUG
7535 assert(hibernate_vm_locks_are_safe());
7536 vm_page_lock_queues();
7537 if (vm_page_local_q
) {
7538 zpercpu_foreach(lq
, vm_page_local_q
) {
7539 VPL_LOCK(&lq
->vpl_lock
);
7543 #endif /* MACH_ASSERT || DEBUG */
7546 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
7548 clock_get_uptime(&start
);
7551 hibernate_page_list_zero(page_list
);
7552 hibernate_page_list_zero(page_list_wired
);
7553 hibernate_page_list_zero(page_list_pal
);
7555 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
7556 hibernate_stats
.cd_pages
= pages
;
7559 if (vm_page_local_q
) {
7560 zpercpu_foreach_cpu(lid
) {
7561 vm_page_reactivate_local(lid
, TRUE
, !preflight
);
7566 vm_object_lock(compressor_object
);
7567 vm_page_lock_queues();
7568 lck_mtx_lock(&vm_page_queue_free_lock
);
7571 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
7573 hibernation_vmqueues_inspection
= TRUE
;
7575 m
= (vm_page_t
) hibernate_gobble_queue
;
7580 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7581 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7587 percpu_foreach(free_pages_head
, free_pages
) {
7588 for (m
= *free_pages_head
; m
; m
= m
->vmp_snext
) {
7589 assert(m
->vmp_q_state
== VM_PAGE_ON_FREE_LOCAL_Q
);
7593 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7594 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7596 hibernate_stats
.cd_local_free
++;
7597 hibernate_stats
.cd_total_free
++;
7602 for (i
= 0; i
< vm_colors
; i
++) {
7603 vm_page_queue_iterate(&vm_page_queue_free
[i
].qhead
, m
, vmp_pageq
) {
7604 assert(m
->vmp_q_state
== VM_PAGE_ON_FREE_Q
);
7609 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7610 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7612 hibernate_stats
.cd_total_free
++;
7617 vm_page_queue_iterate(&vm_lopage_queue_free
, m
, vmp_pageq
) {
7618 assert(m
->vmp_q_state
== VM_PAGE_ON_FREE_LOPAGE_Q
);
7623 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7624 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7626 hibernate_stats
.cd_total_free
++;
7630 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_throttled
);
7631 while (m
&& !vm_page_queue_end(&vm_page_queue_throttled
, (vm_page_queue_entry_t
)m
)) {
7632 assert(m
->vmp_q_state
== VM_PAGE_ON_THROTTLED_Q
);
7634 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7636 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
7637 && hibernate_consider_discard(m
, preflight
)) {
7639 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7641 count_discard_inactive
++;
7642 discard
= discard_all
;
7648 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7652 hibernate_discard_page(m
);
7657 m
= (vm_page_t
)vm_page_queue_first(&vm_page_queue_anonymous
);
7658 while (m
&& !vm_page_queue_end(&vm_page_queue_anonymous
, (vm_page_queue_entry_t
)m
)) {
7659 assert(m
->vmp_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
);
7661 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7663 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
) &&
7664 hibernate_consider_discard(m
, preflight
)) {
7666 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7669 count_discard_purgeable
++;
7671 count_discard_inactive
++;
7673 discard
= discard_all
;
7679 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7682 hibernate_discard_page(m
);
7687 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_cleaned
);
7688 while (m
&& !vm_page_queue_end(&vm_page_queue_cleaned
, (vm_page_queue_entry_t
)m
)) {
7689 assert(m
->vmp_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
);
7691 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7693 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
) &&
7694 hibernate_consider_discard(m
, preflight
)) {
7696 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7699 count_discard_purgeable
++;
7701 count_discard_cleaned
++;
7703 discard
= discard_all
;
7709 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7712 hibernate_discard_page(m
);
7717 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
7718 while (m
&& !vm_page_queue_end(&vm_page_queue_active
, (vm_page_queue_entry_t
)m
)) {
7719 assert(m
->vmp_q_state
== VM_PAGE_ON_ACTIVE_Q
);
7721 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7723 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
) &&
7724 hibernate_consider_discard(m
, preflight
)) {
7726 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7729 count_discard_purgeable
++;
7731 count_discard_active
++;
7733 discard
= discard_all
;
7739 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7742 hibernate_discard_page(m
);
7747 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_inactive
);
7748 while (m
&& !vm_page_queue_end(&vm_page_queue_inactive
, (vm_page_queue_entry_t
)m
)) {
7749 assert(m
->vmp_q_state
== VM_PAGE_ON_INACTIVE_EXTERNAL_Q
);
7751 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7753 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
) &&
7754 hibernate_consider_discard(m
, preflight
)) {
7756 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7759 count_discard_purgeable
++;
7761 count_discard_inactive
++;
7763 discard
= discard_all
;
7769 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7772 hibernate_discard_page(m
);
7776 /* XXX FBDP TODO: secluded queue */
7778 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
7779 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_speculative
[i
].age_q
);
7780 while (m
&& !vm_page_queue_end(&vm_page_queue_speculative
[i
].age_q
, (vm_page_queue_entry_t
)m
)) {
7781 assertf(m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
,
7782 "Bad page: %p (0x%x:0x%x) on queue %d has state: %d (Discard: %d, Preflight: %d)",
7783 m
, m
->vmp_pageq
.next
, m
->vmp_pageq
.prev
, i
, m
->vmp_q_state
, discard
, preflight
);
7785 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7787 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
) &&
7788 hibernate_consider_discard(m
, preflight
)) {
7790 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7792 count_discard_speculative
++;
7793 discard
= discard_all
;
7795 count_speculative
++;
7799 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7802 hibernate_discard_page(m
);
7808 vm_page_queue_iterate(&compressor_object
->memq
, m
, vmp_listq
) {
7809 assert(m
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
);
7814 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7818 if (preflight
== FALSE
&& discard_all
== TRUE
) {
7819 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_START
);
7821 HIBLOG("hibernate_teardown started\n");
7822 count_discard_vm_struct_pages
= hibernate_teardown_vm_structs(page_list
, page_list_wired
);
7823 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages
);
7825 pages
-= count_discard_vm_struct_pages
;
7826 count_wire
-= count_discard_vm_struct_pages
;
7828 hibernate_stats
.cd_vm_struct_pages_unneeded
= count_discard_vm_struct_pages
;
7830 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_END
);
7834 // pull wired from hibernate_bitmap
7835 bitmap
= &page_list
->bank_bitmap
[0];
7836 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
7837 for (bank
= 0; bank
< page_list
->bank_count
; bank
++) {
7838 for (i
= 0; i
< bitmap
->bitmapwords
; i
++) {
7839 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
7841 bitmap
= (hibernate_bitmap_t
*)&bitmap
->bitmap
[bitmap
->bitmapwords
];
7842 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
7846 // machine dependent adjustments
7847 hibernate_page_list_setall_machine(page_list
, page_list_wired
, preflight
, &pages
);
7850 hibernate_stats
.cd_count_wire
= count_wire
;
7851 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+
7852 count_discard_speculative
+ count_discard_cleaned
+ count_discard_vm_struct_pages
;
7855 clock_get_uptime(&end
);
7856 absolutetime_to_nanoseconds(end
- start
, &nsec
);
7857 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
7859 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7860 pages
, count_wire
, count_active
, count_inactive
, count_cleaned
, count_speculative
, count_anonymous
, count_throttled
, count_compressor
, hibernate_stats
.cd_found_xpmapped
,
7861 discard_all
? "did" : "could",
7862 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
7864 if (hibernate_stats
.cd_skipped_xpmapped
) {
7865 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats
.cd_skipped_xpmapped
);
7868 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
- count_discard_cleaned
;
7870 if (preflight
&& will_discard
) {
7871 *pagesOut
-= count_compressor
+ count_throttled
+ count_anonymous
+ count_inactive
+ count_cleaned
+ count_speculative
+ count_active
;
7873 * We try to keep max HIBERNATE_XPMAPPED_LIMIT pages around in the hibernation image
7874 * even if these are clean and so we need to size the hibernation image accordingly.
7876 * NB: We have to assume all HIBERNATE_XPMAPPED_LIMIT pages might show up because 'dirty'
7877 * xpmapped pages aren't distinguishable from other 'dirty' pages in preflight. So we might
7878 * only see part of the xpmapped pages if we look at 'cd_found_xpmapped' which solely tracks
7879 * clean xpmapped pages.
7881 * Since these pages are all cleaned by the time we are in the post-preflight phase, we might
7882 * see a much larger number in 'cd_found_xpmapped' now than we did in the preflight phase
7884 *pagesOut
+= HIBERNATE_XPMAPPED_LIMIT
;
7887 hibernation_vmqueues_inspection
= FALSE
;
7889 #if MACH_ASSERT || DEBUG
7891 if (vm_page_local_q
) {
7892 zpercpu_foreach(lq
, vm_page_local_q
) {
7893 VPL_UNLOCK(&lq
->vpl_lock
);
7896 vm_page_unlock_queues();
7898 #endif /* MACH_ASSERT || DEBUG */
7901 lck_mtx_unlock(&vm_page_queue_free_lock
);
7902 vm_page_unlock_queues();
7903 vm_object_unlock(compressor_object
);
7906 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
7910 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
7912 uint64_t start
, end
, nsec
;
7916 uint32_t count_discard_active
= 0;
7917 uint32_t count_discard_inactive
= 0;
7918 uint32_t count_discard_purgeable
= 0;
7919 uint32_t count_discard_cleaned
= 0;
7920 uint32_t count_discard_speculative
= 0;
7923 #if MACH_ASSERT || DEBUG
7924 vm_page_lock_queues();
7925 if (vm_page_local_q
) {
7926 zpercpu_foreach(lq
, vm_page_local_q
) {
7927 VPL_LOCK(&lq
->vpl_lock
);
7930 #endif /* MACH_ASSERT || DEBUG */
7932 clock_get_uptime(&start
);
7934 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_anonymous
);
7935 while (m
&& !vm_page_queue_end(&vm_page_queue_anonymous
, (vm_page_queue_entry_t
)m
)) {
7936 assert(m
->vmp_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
);
7938 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7939 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
))) {
7941 count_discard_purgeable
++;
7943 count_discard_inactive
++;
7945 hibernate_discard_page(m
);
7950 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
7951 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_speculative
[i
].age_q
);
7952 while (m
&& !vm_page_queue_end(&vm_page_queue_speculative
[i
].age_q
, (vm_page_queue_entry_t
)m
)) {
7953 assert(m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
);
7955 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7956 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
))) {
7957 count_discard_speculative
++;
7958 hibernate_discard_page(m
);
7964 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_inactive
);
7965 while (m
&& !vm_page_queue_end(&vm_page_queue_inactive
, (vm_page_queue_entry_t
)m
)) {
7966 assert(m
->vmp_q_state
== VM_PAGE_ON_INACTIVE_EXTERNAL_Q
);
7968 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7969 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
))) {
7971 count_discard_purgeable
++;
7973 count_discard_inactive
++;
7975 hibernate_discard_page(m
);
7979 /* XXX FBDP TODO: secluded queue */
7981 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
7982 while (m
&& !vm_page_queue_end(&vm_page_queue_active
, (vm_page_queue_entry_t
)m
)) {
7983 assert(m
->vmp_q_state
== VM_PAGE_ON_ACTIVE_Q
);
7985 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
7986 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
))) {
7988 count_discard_purgeable
++;
7990 count_discard_active
++;
7992 hibernate_discard_page(m
);
7997 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_cleaned
);
7998 while (m
&& !vm_page_queue_end(&vm_page_queue_cleaned
, (vm_page_queue_entry_t
)m
)) {
7999 assert(m
->vmp_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
);
8001 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->vmp_pageq
.next
);
8002 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
))) {
8004 count_discard_purgeable
++;
8006 count_discard_cleaned
++;
8008 hibernate_discard_page(m
);
8013 #if MACH_ASSERT || DEBUG
8014 if (vm_page_local_q
) {
8015 zpercpu_foreach(lq
, vm_page_local_q
) {
8016 VPL_UNLOCK(&lq
->vpl_lock
);
8019 vm_page_unlock_queues();
8020 #endif /* MACH_ASSERT || DEBUG */
8022 clock_get_uptime(&end
);
8023 absolutetime_to_nanoseconds(end
- start
, &nsec
);
8024 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
8026 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
8029 boolean_t hibernate_paddr_map_inited
= FALSE
;
8030 unsigned int hibernate_teardown_last_valid_compact_indx
= -1;
8031 vm_page_t hibernate_rebuild_hash_list
= NULL
;
8033 unsigned int hibernate_teardown_found_tabled_pages
= 0;
8034 unsigned int hibernate_teardown_found_created_pages
= 0;
8035 unsigned int hibernate_teardown_found_free_pages
= 0;
8036 unsigned int hibernate_teardown_vm_page_free_count
;
8039 struct ppnum_mapping
{
8040 struct ppnum_mapping
*ppnm_next
;
8041 ppnum_t ppnm_base_paddr
;
8042 unsigned int ppnm_sindx
;
8043 unsigned int ppnm_eindx
;
8046 struct ppnum_mapping
*ppnm_head
;
8047 struct ppnum_mapping
*ppnm_last_found
= NULL
;
8051 hibernate_create_paddr_map(void)
8054 ppnum_t next_ppnum_in_run
= 0;
8055 struct ppnum_mapping
*ppnm
= NULL
;
8057 if (hibernate_paddr_map_inited
== FALSE
) {
8058 for (i
= 0; i
< vm_pages_count
; i
++) {
8060 ppnm
->ppnm_eindx
= i
;
8063 if (ppnm
== NULL
|| VM_PAGE_GET_PHYS_PAGE(&vm_pages
[i
]) != next_ppnum_in_run
) {
8064 ppnm
= zalloc_permanent_type(struct ppnum_mapping
);
8066 ppnm
->ppnm_next
= ppnm_head
;
8069 ppnm
->ppnm_sindx
= i
;
8070 ppnm
->ppnm_base_paddr
= VM_PAGE_GET_PHYS_PAGE(&vm_pages
[i
]);
8072 next_ppnum_in_run
= VM_PAGE_GET_PHYS_PAGE(&vm_pages
[i
]) + 1;
8076 hibernate_paddr_map_inited
= TRUE
;
8081 hibernate_lookup_paddr(unsigned int indx
)
8083 struct ppnum_mapping
*ppnm
= NULL
;
8085 ppnm
= ppnm_last_found
;
8088 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
) {
8092 for (ppnm
= ppnm_head
; ppnm
; ppnm
= ppnm
->ppnm_next
) {
8093 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
) {
8094 ppnm_last_found
= ppnm
;
8099 panic("hibernate_lookup_paddr of %d failed\n", indx
);
8102 return ppnm
->ppnm_base_paddr
+ (indx
- ppnm
->ppnm_sindx
);
8107 hibernate_mark_as_unneeded(addr64_t saddr
, addr64_t eaddr
, hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
8109 addr64_t saddr_aligned
;
8110 addr64_t eaddr_aligned
;
8113 unsigned int mark_as_unneeded_pages
= 0;
8115 saddr_aligned
= (saddr
+ PAGE_MASK_64
) & ~PAGE_MASK_64
;
8116 eaddr_aligned
= eaddr
& ~PAGE_MASK_64
;
8118 for (addr
= saddr_aligned
; addr
< eaddr_aligned
; addr
+= PAGE_SIZE_64
) {
8119 paddr
= pmap_find_phys(kernel_pmap
, addr
);
8123 hibernate_page_bitset(page_list
, TRUE
, paddr
);
8124 hibernate_page_bitset(page_list_wired
, TRUE
, paddr
);
8126 mark_as_unneeded_pages
++;
8128 return mark_as_unneeded_pages
;
8133 hibernate_hash_insert_page(vm_page_t mem
)
8135 vm_page_bucket_t
*bucket
;
8137 vm_object_t m_object
;
8139 m_object
= VM_PAGE_OBJECT(mem
);
8141 assert(mem
->vmp_hashed
);
8143 assert(mem
->vmp_offset
!= (vm_object_offset_t
) -1);
8146 * Insert it into the object_object/offset hash table
8148 hash_id
= vm_page_hash(m_object
, mem
->vmp_offset
);
8149 bucket
= &vm_page_buckets
[hash_id
];
8151 mem
->vmp_next_m
= bucket
->page_list
;
8152 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
8157 hibernate_free_range(int sindx
, int eindx
)
8162 while (sindx
< eindx
) {
8163 mem
= &vm_pages
[sindx
];
8165 vm_page_init(mem
, hibernate_lookup_paddr(sindx
), FALSE
);
8167 mem
->vmp_lopage
= FALSE
;
8168 mem
->vmp_q_state
= VM_PAGE_ON_FREE_Q
;
8170 color
= VM_PAGE_GET_COLOR(mem
);
8171 #if defined(__x86_64__)
8172 vm_page_queue_enter_clump(&vm_page_queue_free
[color
].qhead
, mem
);
8174 vm_page_queue_enter(&vm_page_queue_free
[color
].qhead
, mem
, vmp_pageq
);
8176 vm_page_free_count
++;
8183 hibernate_rebuild_vm_structs(void)
8185 int i
, cindx
, sindx
, eindx
;
8186 vm_page_t mem
, tmem
, mem_next
;
8187 AbsoluteTime startTime
, endTime
;
8190 if (hibernate_rebuild_needed
== FALSE
) {
8194 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_START
);
8195 HIBLOG("hibernate_rebuild started\n");
8197 clock_get_uptime(&startTime
);
8199 pal_hib_rebuild_pmap_structs();
8201 bzero(&vm_page_buckets
[0], vm_page_bucket_count
* sizeof(vm_page_bucket_t
));
8202 eindx
= vm_pages_count
;
8205 * Mark all the vm_pages[] that have not been initialized yet as being
8206 * transient. This is needed to ensure that buddy page search is corrrect.
8207 * Without this random data in these vm_pages[] can trip the buddy search
8209 for (i
= hibernate_teardown_last_valid_compact_indx
+ 1; i
< eindx
; ++i
) {
8210 vm_pages
[i
].vmp_q_state
= VM_PAGE_NOT_ON_Q
;
8213 for (cindx
= hibernate_teardown_last_valid_compact_indx
; cindx
>= 0; cindx
--) {
8214 mem
= &vm_pages
[cindx
];
8215 assert(mem
->vmp_q_state
!= VM_PAGE_ON_FREE_Q
);
8217 * hibernate_teardown_vm_structs leaves the location where
8218 * this vm_page_t must be located in "next".
8220 tmem
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(mem
->vmp_next_m
));
8221 mem
->vmp_next_m
= VM_PAGE_PACK_PTR(NULL
);
8223 sindx
= (int)(tmem
- &vm_pages
[0]);
8227 * this vm_page_t was moved by hibernate_teardown_vm_structs,
8228 * so move it back to its real location
8233 if (mem
->vmp_hashed
) {
8234 hibernate_hash_insert_page(mem
);
8237 * the 'hole' between this vm_page_t and the previous
8238 * vm_page_t we moved needs to be initialized as
8239 * a range of free vm_page_t's
8241 hibernate_free_range(sindx
+ 1, eindx
);
8246 hibernate_free_range(0, sindx
);
8249 assert(vm_page_free_count
== hibernate_teardown_vm_page_free_count
);
8252 * process the list of vm_page_t's that were entered in the hash,
8253 * but were not located in the vm_pages arrary... these are
8254 * vm_page_t's that were created on the fly (i.e. fictitious)
8256 for (mem
= hibernate_rebuild_hash_list
; mem
; mem
= mem_next
) {
8257 mem_next
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(mem
->vmp_next_m
));
8259 mem
->vmp_next_m
= 0;
8260 hibernate_hash_insert_page(mem
);
8262 hibernate_rebuild_hash_list
= NULL
;
8264 clock_get_uptime(&endTime
);
8265 SUB_ABSOLUTETIME(&endTime
, &startTime
);
8266 absolutetime_to_nanoseconds(endTime
, &nsec
);
8268 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec
/ 1000000ULL);
8270 hibernate_rebuild_needed
= FALSE
;
8272 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
);
8276 hibernate_teardown_vm_structs(hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
8279 unsigned int compact_target_indx
;
8280 vm_page_t mem
, mem_next
;
8281 vm_page_bucket_t
*bucket
;
8282 unsigned int mark_as_unneeded_pages
= 0;
8283 unsigned int unneeded_vm_page_bucket_pages
= 0;
8284 unsigned int unneeded_vm_pages_pages
= 0;
8285 unsigned int unneeded_pmap_pages
= 0;
8286 addr64_t start_of_unneeded
= 0;
8287 addr64_t end_of_unneeded
= 0;
8290 if (hibernate_should_abort()) {
8294 hibernate_rebuild_needed
= TRUE
;
8296 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
8297 vm_page_wire_count
, vm_page_free_count
, vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
,
8298 vm_page_cleaned_count
, compressor_object
->resident_page_count
);
8300 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
8301 bucket
= &vm_page_buckets
[i
];
8303 for (mem
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
)); mem
!= VM_PAGE_NULL
; mem
= mem_next
) {
8304 assert(mem
->vmp_hashed
);
8306 mem_next
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(mem
->vmp_next_m
));
8308 if (mem
< &vm_pages
[0] || mem
>= &vm_pages
[vm_pages_count
]) {
8309 mem
->vmp_next_m
= VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list
);
8310 hibernate_rebuild_hash_list
= mem
;
8314 unneeded_vm_page_bucket_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_page_buckets
[0], (addr64_t
)&vm_page_buckets
[vm_page_bucket_count
], page_list
, page_list_wired
);
8315 mark_as_unneeded_pages
+= unneeded_vm_page_bucket_pages
;
8317 hibernate_teardown_vm_page_free_count
= vm_page_free_count
;
8319 compact_target_indx
= 0;
8321 for (i
= 0; i
< vm_pages_count
; i
++) {
8324 if (mem
->vmp_q_state
== VM_PAGE_ON_FREE_Q
) {
8327 assert(mem
->vmp_busy
);
8328 assert(!mem
->vmp_lopage
);
8330 color
= VM_PAGE_GET_COLOR(mem
);
8332 vm_page_queue_remove(&vm_page_queue_free
[color
].qhead
, mem
, vmp_pageq
);
8334 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
8336 vm_page_free_count
--;
8338 hibernate_teardown_found_free_pages
++;
8340 if (vm_pages
[compact_target_indx
].vmp_q_state
!= VM_PAGE_ON_FREE_Q
) {
8341 compact_target_indx
= i
;
8345 * record this vm_page_t's original location
8346 * we need this even if it doesn't get moved
8347 * as an indicator to the rebuild function that
8348 * we don't have to move it
8350 mem
->vmp_next_m
= VM_PAGE_PACK_PTR(mem
);
8352 if (vm_pages
[compact_target_indx
].vmp_q_state
== VM_PAGE_ON_FREE_Q
) {
8354 * we've got a hole to fill, so
8355 * move this vm_page_t to it's new home
8357 vm_pages
[compact_target_indx
] = *mem
;
8358 mem
->vmp_q_state
= VM_PAGE_ON_FREE_Q
;
8360 hibernate_teardown_last_valid_compact_indx
= compact_target_indx
;
8361 compact_target_indx
++;
8363 hibernate_teardown_last_valid_compact_indx
= i
;
8367 unneeded_vm_pages_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_pages
[hibernate_teardown_last_valid_compact_indx
+ 1],
8368 (addr64_t
)&vm_pages
[vm_pages_count
- 1], page_list
, page_list_wired
);
8369 mark_as_unneeded_pages
+= unneeded_vm_pages_pages
;
8371 pal_hib_teardown_pmap_structs(&start_of_unneeded
, &end_of_unneeded
);
8373 if (start_of_unneeded
) {
8374 unneeded_pmap_pages
= hibernate_mark_as_unneeded(start_of_unneeded
, end_of_unneeded
, page_list
, page_list_wired
);
8375 mark_as_unneeded_pages
+= unneeded_pmap_pages
;
8377 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages
, unneeded_vm_pages_pages
, unneeded_pmap_pages
);
8379 return mark_as_unneeded_pages
;
8383 #endif /* HIBERNATION */
8385 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8387 #include <mach_vm_debug.h>
8390 #include <mach_debug/hash_info.h>
8391 #include <vm/vm_debug.h>
8394 * Routine: vm_page_info
8396 * Return information about the global VP table.
8397 * Fills the buffer with as much information as possible
8398 * and returns the desired size of the buffer.
8400 * Nothing locked. The caller should provide
8401 * possibly-pageable memory.
8406 hash_info_bucket_t
*info
,
8410 lck_spin_t
*bucket_lock
;
8412 if (vm_page_bucket_count
< count
) {
8413 count
= vm_page_bucket_count
;
8416 for (i
= 0; i
< count
; i
++) {
8417 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
8418 unsigned int bucket_count
= 0;
8421 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
8422 lck_spin_lock_grp(bucket_lock
, &vm_page_lck_grp_bucket
);
8424 for (m
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
));
8426 m
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(m
->vmp_next_m
))) {
8430 lck_spin_unlock(bucket_lock
);
8432 /* don't touch pageable memory while holding locks */
8433 info
[i
].hib_count
= bucket_count
;
8436 return vm_page_bucket_count
;
8438 #endif /* MACH_VM_DEBUG */
8440 #if VM_PAGE_BUCKETS_CHECK
8442 vm_page_buckets_check(void)
8446 unsigned int p_hash
;
8447 vm_page_bucket_t
*bucket
;
8448 lck_spin_t
*bucket_lock
;
8450 if (!vm_page_buckets_check_ready
) {
8455 if (hibernate_rebuild_needed
||
8456 hibernate_rebuild_hash_list
) {
8457 panic("BUCKET_CHECK: hibernation in progress: "
8458 "rebuild_needed=%d rebuild_hash_list=%p\n",
8459 hibernate_rebuild_needed
,
8460 hibernate_rebuild_hash_list
);
8462 #endif /* HIBERNATION */
8464 #if VM_PAGE_FAKE_BUCKETS
8466 for (cp
= (char *) vm_page_fake_buckets_start
;
8467 cp
< (char *) vm_page_fake_buckets_end
;
8470 panic("BUCKET_CHECK: corruption at %p in fake buckets "
8471 "[0x%llx:0x%llx]\n",
8473 (uint64_t) vm_page_fake_buckets_start
,
8474 (uint64_t) vm_page_fake_buckets_end
);
8477 #endif /* VM_PAGE_FAKE_BUCKETS */
8479 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
8480 vm_object_t p_object
;
8482 bucket
= &vm_page_buckets
[i
];
8483 if (!bucket
->page_list
) {
8487 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
8488 lck_spin_lock_grp(bucket_lock
, &vm_page_lck_grp_bucket
);
8489 p
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
));
8491 while (p
!= VM_PAGE_NULL
) {
8492 p_object
= VM_PAGE_OBJECT(p
);
8494 if (!p
->vmp_hashed
) {
8495 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
8496 "hash %d in bucket %d at %p "
8498 p
, p_object
, p
->vmp_offset
,
8501 p_hash
= vm_page_hash(p_object
, p
->vmp_offset
);
8503 panic("BUCKET_CHECK: corruption in bucket %d "
8504 "at %p: page %p object %p offset 0x%llx "
8506 i
, bucket
, p
, p_object
, p
->vmp_offset
,
8509 p
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(p
->vmp_next_m
));
8511 lck_spin_unlock(bucket_lock
);
8514 // printf("BUCKET_CHECK: checked buckets\n");
8516 #endif /* VM_PAGE_BUCKETS_CHECK */
8519 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
8520 * local queues if they exist... its the only spot in the system where we add pages
8521 * to those queues... once on those queues, those pages can only move to one of the
8522 * global page queues or the free queues... they NEVER move from local q to local q.
8523 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
8524 * the global vm_page_queue_lock at this point... we still need to take the local lock
8525 * in case this operation is being run on a different CPU then the local queue's identity,
8526 * but we don't have to worry about the page moving to a global queue or becoming wired
8527 * while we're grabbing the local lock since those operations would require the global
8528 * vm_page_queue_lock to be held, and we already own it.
8530 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
8531 * 'wired' and local are ALWAYS mutually exclusive conditions.
8534 #if CONFIG_BACKGROUND_QUEUE
8536 vm_page_queues_remove(vm_page_t mem
, boolean_t remove_from_backgroundq
)
8539 vm_page_queues_remove(vm_page_t mem
, boolean_t __unused remove_from_backgroundq
)
8542 boolean_t was_pageable
= TRUE
;
8543 vm_object_t m_object
;
8545 m_object
= VM_PAGE_OBJECT(mem
);
8547 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
8549 if (mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
) {
8550 assert(mem
->vmp_pageq
.next
== 0 && mem
->vmp_pageq
.prev
== 0);
8551 #if CONFIG_BACKGROUND_QUEUE
8552 if (remove_from_backgroundq
== TRUE
) {
8553 vm_page_remove_from_backgroundq(mem
);
8555 if (mem
->vmp_on_backgroundq
) {
8556 assert(mem
->vmp_backgroundq
.next
!= 0);
8557 assert(mem
->vmp_backgroundq
.prev
!= 0);
8559 assert(mem
->vmp_backgroundq
.next
== 0);
8560 assert(mem
->vmp_backgroundq
.prev
== 0);
8562 #endif /* CONFIG_BACKGROUND_QUEUE */
8566 if (mem
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) {
8567 assert(mem
->vmp_pageq
.next
== 0 && mem
->vmp_pageq
.prev
== 0);
8568 #if CONFIG_BACKGROUND_QUEUE
8569 assert(mem
->vmp_backgroundq
.next
== 0 &&
8570 mem
->vmp_backgroundq
.prev
== 0 &&
8571 mem
->vmp_on_backgroundq
== FALSE
);
8575 if (mem
->vmp_q_state
== VM_PAGE_IS_WIRED
) {
8577 * might put these guys on a list for debugging purposes
8578 * if we do, we'll need to remove this assert
8580 assert(mem
->vmp_pageq
.next
== 0 && mem
->vmp_pageq
.prev
== 0);
8581 #if CONFIG_BACKGROUND_QUEUE
8582 assert(mem
->vmp_backgroundq
.next
== 0 &&
8583 mem
->vmp_backgroundq
.prev
== 0 &&
8584 mem
->vmp_on_backgroundq
== FALSE
);
8589 assert(m_object
!= compressor_object
);
8590 assert(m_object
!= kernel_object
);
8591 assert(m_object
!= vm_submap_object
);
8592 assert(!mem
->vmp_fictitious
);
8594 switch (mem
->vmp_q_state
) {
8595 case VM_PAGE_ON_ACTIVE_LOCAL_Q
:
8599 lq
= zpercpu_get_cpu(vm_page_local_q
, mem
->vmp_local_id
);
8600 VPL_LOCK(&lq
->vpl_lock
);
8601 vm_page_queue_remove(&lq
->vpl_queue
, mem
, vmp_pageq
);
8602 mem
->vmp_local_id
= 0;
8604 if (m_object
->internal
) {
8605 lq
->vpl_internal_count
--;
8607 lq
->vpl_external_count
--;
8609 VPL_UNLOCK(&lq
->vpl_lock
);
8610 was_pageable
= FALSE
;
8613 case VM_PAGE_ON_ACTIVE_Q
:
8615 vm_page_queue_remove(&vm_page_queue_active
, mem
, vmp_pageq
);
8616 vm_page_active_count
--;
8620 case VM_PAGE_ON_INACTIVE_INTERNAL_Q
:
8622 assert(m_object
->internal
== TRUE
);
8624 vm_page_inactive_count
--;
8625 vm_page_queue_remove(&vm_page_queue_anonymous
, mem
, vmp_pageq
);
8626 vm_page_anonymous_count
--;
8628 vm_purgeable_q_advance_all();
8629 vm_page_balance_inactive(3);
8633 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q
:
8635 assert(m_object
->internal
== FALSE
);
8637 vm_page_inactive_count
--;
8638 vm_page_queue_remove(&vm_page_queue_inactive
, mem
, vmp_pageq
);
8639 vm_purgeable_q_advance_all();
8640 vm_page_balance_inactive(3);
8644 case VM_PAGE_ON_INACTIVE_CLEANED_Q
:
8646 assert(m_object
->internal
== FALSE
);
8648 vm_page_inactive_count
--;
8649 vm_page_queue_remove(&vm_page_queue_cleaned
, mem
, vmp_pageq
);
8650 vm_page_cleaned_count
--;
8651 vm_page_balance_inactive(3);
8655 case VM_PAGE_ON_THROTTLED_Q
:
8657 assert(m_object
->internal
== TRUE
);
8659 vm_page_queue_remove(&vm_page_queue_throttled
, mem
, vmp_pageq
);
8660 vm_page_throttled_count
--;
8661 was_pageable
= FALSE
;
8665 case VM_PAGE_ON_SPECULATIVE_Q
:
8667 assert(m_object
->internal
== FALSE
);
8669 vm_page_remque(&mem
->vmp_pageq
);
8670 vm_page_speculative_count
--;
8671 vm_page_balance_inactive(3);
8675 #if CONFIG_SECLUDED_MEMORY
8676 case VM_PAGE_ON_SECLUDED_Q
:
8678 vm_page_queue_remove(&vm_page_queue_secluded
, mem
, vmp_pageq
);
8679 vm_page_secluded_count
--;
8680 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
8681 if (m_object
== VM_OBJECT_NULL
) {
8682 vm_page_secluded_count_free
--;
8683 was_pageable
= FALSE
;
8685 assert(!m_object
->internal
);
8686 vm_page_secluded_count_inuse
--;
8687 was_pageable
= FALSE
;
8688 // was_pageable = TRUE;
8692 #endif /* CONFIG_SECLUDED_MEMORY */
8697 * if (mem->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
8698 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
8699 * the caller is responsible for determing if the page is on that queue, and if so, must
8700 * either first remove it (it needs both the page queues lock and the object lock to do
8701 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
8703 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
8704 * or any of the undefined states
8706 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem
, mem
->vmp_q_state
);
8710 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
8711 mem
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
8713 #if CONFIG_BACKGROUND_QUEUE
8714 if (remove_from_backgroundq
== TRUE
) {
8715 vm_page_remove_from_backgroundq(mem
);
8719 if (m_object
->internal
) {
8720 vm_page_pageable_internal_count
--;
8722 vm_page_pageable_external_count
--;
8728 vm_page_remove_internal(vm_page_t page
)
8730 vm_object_t __object
= VM_PAGE_OBJECT(page
);
8731 if (page
== __object
->memq_hint
) {
8732 vm_page_t __new_hint
;
8733 vm_page_queue_entry_t __qe
;
8734 __qe
= (vm_page_queue_entry_t
)vm_page_queue_next(&page
->vmp_listq
);
8735 if (vm_page_queue_end(&__object
->memq
, __qe
)) {
8736 __qe
= (vm_page_queue_entry_t
)vm_page_queue_prev(&page
->vmp_listq
);
8737 if (vm_page_queue_end(&__object
->memq
, __qe
)) {
8741 __new_hint
= (vm_page_t
)((uintptr_t) __qe
);
8742 __object
->memq_hint
= __new_hint
;
8744 vm_page_queue_remove(&__object
->memq
, page
, vmp_listq
);
8745 #if CONFIG_SECLUDED_MEMORY
8746 if (__object
->eligible_for_secluded
) {
8747 vm_page_secluded
.eligible_for_secluded
--;
8749 #endif /* CONFIG_SECLUDED_MEMORY */
8753 vm_page_enqueue_inactive(vm_page_t mem
, boolean_t first
)
8755 vm_object_t m_object
;
8757 m_object
= VM_PAGE_OBJECT(mem
);
8759 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
8760 assert(!mem
->vmp_fictitious
);
8761 assert(!mem
->vmp_laundry
);
8762 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
8763 vm_page_check_pageable_safe(mem
);
8765 if (m_object
->internal
) {
8766 mem
->vmp_q_state
= VM_PAGE_ON_INACTIVE_INTERNAL_Q
;
8768 if (first
== TRUE
) {
8769 vm_page_queue_enter_first(&vm_page_queue_anonymous
, mem
, vmp_pageq
);
8771 vm_page_queue_enter(&vm_page_queue_anonymous
, mem
, vmp_pageq
);
8774 vm_page_anonymous_count
++;
8775 vm_page_pageable_internal_count
++;
8777 mem
->vmp_q_state
= VM_PAGE_ON_INACTIVE_EXTERNAL_Q
;
8779 if (first
== TRUE
) {
8780 vm_page_queue_enter_first(&vm_page_queue_inactive
, mem
, vmp_pageq
);
8782 vm_page_queue_enter(&vm_page_queue_inactive
, mem
, vmp_pageq
);
8785 vm_page_pageable_external_count
++;
8787 vm_page_inactive_count
++;
8788 token_new_pagecount
++;
8790 #if CONFIG_BACKGROUND_QUEUE
8791 if (mem
->vmp_in_background
) {
8792 vm_page_add_to_backgroundq(mem
, FALSE
);
8798 vm_page_enqueue_active(vm_page_t mem
, boolean_t first
)
8800 vm_object_t m_object
;
8802 m_object
= VM_PAGE_OBJECT(mem
);
8804 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
8805 assert(!mem
->vmp_fictitious
);
8806 assert(!mem
->vmp_laundry
);
8807 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
8808 vm_page_check_pageable_safe(mem
);
8810 mem
->vmp_q_state
= VM_PAGE_ON_ACTIVE_Q
;
8811 if (first
== TRUE
) {
8812 vm_page_queue_enter_first(&vm_page_queue_active
, mem
, vmp_pageq
);
8814 vm_page_queue_enter(&vm_page_queue_active
, mem
, vmp_pageq
);
8816 vm_page_active_count
++;
8818 if (m_object
->internal
) {
8819 vm_page_pageable_internal_count
++;
8821 vm_page_pageable_external_count
++;
8824 #if CONFIG_BACKGROUND_QUEUE
8825 if (mem
->vmp_in_background
) {
8826 vm_page_add_to_backgroundq(mem
, FALSE
);
8829 vm_page_balance_inactive(3);
8833 * Pages from special kernel objects shouldn't
8834 * be placed on pageable queues.
8837 vm_page_check_pageable_safe(vm_page_t page
)
8839 vm_object_t page_object
;
8841 page_object
= VM_PAGE_OBJECT(page
);
8843 if (page_object
== kernel_object
) {
8844 panic("vm_page_check_pageable_safe: trying to add page" \
8845 "from kernel object (%p) to pageable queue", kernel_object
);
8848 if (page_object
== compressor_object
) {
8849 panic("vm_page_check_pageable_safe: trying to add page" \
8850 "from compressor object (%p) to pageable queue", compressor_object
);
8853 if (page_object
== vm_submap_object
) {
8854 panic("vm_page_check_pageable_safe: trying to add page" \
8855 "from submap object (%p) to pageable queue", vm_submap_object
);
8859 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8860 * wired page diagnose
8861 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8863 #include <libkern/OSKextLibPrivate.h>
8865 #define KA_SIZE(namelen, subtotalscount) \
8866 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8868 #define KA_NAME(alloc) \
8869 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8871 #define KA_NAME_LEN(alloc) \
8872 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8877 uintptr_t* frameptr
;
8878 uintptr_t* frameptr_next
;
8880 uintptr_t kstackb
, kstackt
;
8881 const vm_allocation_site_t
* site
;
8883 kern_allocation_name_t name
;
8885 cthread
= current_thread();
8886 if (__improbable(cthread
== NULL
)) {
8887 return VM_KERN_MEMORY_OSFMK
;
8890 if ((name
= thread_get_kernel_state(cthread
)->allocation_name
)) {
8897 kstackb
= cthread
->kernel_stack
;
8898 kstackt
= kstackb
+ kernel_stack_size
;
8900 /* Load stack frame pointer (EBP on x86) into frameptr */
8901 frameptr
= __builtin_frame_address(0);
8903 while (frameptr
!= NULL
) {
8904 /* Verify thread stack bounds */
8905 if (((uintptr_t)(frameptr
+ 2) > kstackt
) || ((uintptr_t)frameptr
< kstackb
)) {
8909 /* Next frame pointer is pointed to by the previous one */
8910 frameptr_next
= (uintptr_t*) *frameptr
;
8912 /* Pull return address from one spot above the frame pointer */
8913 retaddr
= *(frameptr
+ 1);
8915 #if defined(HAS_APPLE_PAC)
8916 retaddr
= (uintptr_t) ptrauth_strip((void *)retaddr
, ptrauth_key_return_address
);
8919 if (((retaddr
< vm_kernel_builtinkmod_text_end
) && (retaddr
>= vm_kernel_builtinkmod_text
))
8920 || (retaddr
< vm_kernel_stext
) || (retaddr
> vm_kernel_top
)) {
8921 site
= OSKextGetAllocationSiteForCaller(retaddr
);
8924 frameptr
= frameptr_next
;
8927 return site
? site
->tag
: VM_KERN_MEMORY_NONE
;
8930 static uint64_t free_tag_bits
[VM_MAX_TAG_VALUE
/ 64];
8933 vm_tag_alloc_locked(vm_allocation_site_t
* site
, vm_allocation_site_t
** releasesiteP
)
8938 vm_allocation_site_t
* prev
;
8946 avail
= free_tag_bits
[idx
];
8948 tag
= (vm_tag_t
)__builtin_clzll(avail
);
8949 avail
&= ~(1ULL << (63 - tag
));
8950 free_tag_bits
[idx
] = avail
;
8955 if (idx
>= ARRAY_COUNT(free_tag_bits
)) {
8956 for (idx
= 0; idx
< ARRAY_COUNT(vm_allocation_sites
); idx
++) {
8957 prev
= vm_allocation_sites
[idx
];
8961 if (!KA_NAME_LEN(prev
)) {
8970 if (1 != prev
->refcount
) {
8974 assert(idx
== prev
->tag
);
8975 tag
= (vm_tag_t
)idx
;
8976 prev
->tag
= VM_KERN_MEMORY_NONE
;
8977 *releasesiteP
= prev
;
8980 if (idx
>= ARRAY_COUNT(vm_allocation_sites
)) {
8981 tag
= VM_KERN_MEMORY_ANY
;
8988 OSAddAtomic16(1, &site
->refcount
);
8990 if (VM_KERN_MEMORY_ANY
!= tag
) {
8991 vm_allocation_sites
[tag
] = site
;
8994 if (tag
> vm_allocation_tag_highest
) {
8995 vm_allocation_tag_highest
= tag
;
9000 vm_tag_free_locked(vm_tag_t tag
)
9006 if (VM_KERN_MEMORY_ANY
== tag
) {
9011 avail
= free_tag_bits
[idx
];
9013 bit
= (1ULL << (63 - tag
));
9014 assert(!(avail
& bit
));
9015 free_tag_bits
[idx
] = (avail
| bit
);
9022 for (tag
= VM_KERN_MEMORY_FIRST_DYNAMIC
; tag
< VM_KERN_MEMORY_ANY
; tag
++) {
9023 vm_tag_free_locked(tag
);
9026 for (tag
= VM_KERN_MEMORY_ANY
+ 1; tag
< VM_MAX_TAG_VALUE
; tag
++) {
9027 vm_tag_free_locked(tag
);
9032 vm_tag_alloc(vm_allocation_site_t
* site
)
9035 vm_allocation_site_t
* releasesite
;
9037 if (VM_TAG_BT
& site
->flags
) {
9039 if (VM_KERN_MEMORY_NONE
!= tag
) {
9046 lck_spin_lock(&vm_allocation_sites_lock
);
9047 vm_tag_alloc_locked(site
, &releasesite
);
9048 lck_spin_unlock(&vm_allocation_sites_lock
);
9050 kern_allocation_name_release(releasesite
);
9058 vm_tag_update_size(vm_tag_t tag
, int64_t delta
)
9060 vm_allocation_site_t
* allocation
;
9063 assert(VM_KERN_MEMORY_NONE
!= tag
);
9064 assert(tag
< VM_MAX_TAG_VALUE
);
9066 allocation
= vm_allocation_sites
[tag
];
9070 assertf(allocation
->total
>= ((uint64_t)-delta
), "tag %d, site %p", tag
, allocation
);
9072 prior
= OSAddAtomic64(delta
, &allocation
->total
);
9074 #if DEBUG || DEVELOPMENT
9077 new = prior
+ delta
;
9079 peak
= allocation
->peak
;
9083 }while (!OSCompareAndSwap64(peak
, new, &allocation
->peak
));
9085 #endif /* DEBUG || DEVELOPMENT */
9087 if (tag
< VM_KERN_MEMORY_FIRST_DYNAMIC
) {
9091 if (!prior
&& !allocation
->tag
) {
9092 vm_tag_alloc(allocation
);
9097 kern_allocation_update_size(kern_allocation_name_t allocation
, int64_t delta
)
9102 assertf(allocation
->total
>= ((uint64_t)-delta
), "name %p", allocation
);
9104 prior
= OSAddAtomic64(delta
, &allocation
->total
);
9106 #if DEBUG || DEVELOPMENT
9109 new = prior
+ delta
;
9111 peak
= allocation
->peak
;
9115 }while (!OSCompareAndSwap64(peak
, new, &allocation
->peak
));
9117 #endif /* DEBUG || DEVELOPMENT */
9119 if (!prior
&& !allocation
->tag
) {
9120 vm_tag_alloc(allocation
);
9124 #if VM_MAX_TAG_ZONES
9127 vm_allocation_zones_init(void)
9133 size
= VM_MAX_TAG_VALUE
* sizeof(vm_allocation_zone_total_t
* *)
9134 + 2 * VM_MAX_TAG_ZONES
* sizeof(vm_allocation_zone_total_t
);
9136 ret
= kernel_memory_allocate(kernel_map
,
9137 &addr
, round_page(size
), 0,
9138 KMA_ZERO
, VM_KERN_MEMORY_DIAG
);
9139 assert(KERN_SUCCESS
== ret
);
9141 vm_allocation_zone_totals
= (vm_allocation_zone_total_t
**) addr
;
9142 addr
+= VM_MAX_TAG_VALUE
* sizeof(vm_allocation_zone_total_t
* *);
9144 // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
9145 // in vm_tag_update_zone_size() won't recurse
9146 vm_allocation_zone_totals
[VM_KERN_MEMORY_DIAG
] = (vm_allocation_zone_total_t
*) addr
;
9147 addr
+= VM_MAX_TAG_ZONES
* sizeof(vm_allocation_zone_total_t
);
9148 vm_allocation_zone_totals
[VM_KERN_MEMORY_KALLOC
] = (vm_allocation_zone_total_t
*) addr
;
9151 __attribute__((noinline
))
9153 vm_tag_zone_stats_alloc(vm_tag_t tag
, zalloc_flags_t flags
)
9155 vm_allocation_zone_total_t
*stats
;
9156 vm_size_t size
= sizeof(*stats
) * VM_MAX_TAG_ZONES
;
9158 stats
= kheap_alloc(KHEAP_DATA_BUFFERS
, size
,
9159 Z_VM_TAG(VM_KERN_MEMORY_DIAG
) | Z_ZERO
| flags
);
9161 return VM_KERN_MEMORY_NONE
;
9163 if (!os_atomic_cmpxchg(&vm_allocation_zone_totals
[tag
], NULL
, stats
, release
)) {
9164 kheap_free(KHEAP_DATA_BUFFERS
, stats
, size
);
9170 vm_tag_will_update_zone(vm_tag_t tag
, uint32_t zidx
, uint32_t zflags
)
9172 assert(VM_KERN_MEMORY_NONE
!= tag
);
9173 assert(tag
< VM_MAX_TAG_VALUE
);
9175 if (zidx
>= VM_MAX_TAG_ZONES
) {
9176 return VM_KERN_MEMORY_NONE
;
9179 if (__probable(vm_allocation_zone_totals
[tag
])) {
9182 return vm_tag_zone_stats_alloc(tag
, zflags
);
9186 vm_tag_update_zone_size(vm_tag_t tag
, uint32_t zidx
, long delta
)
9188 vm_allocation_zone_total_t
*stats
;
9191 assert(VM_KERN_MEMORY_NONE
!= tag
);
9192 assert(tag
< VM_MAX_TAG_VALUE
);
9194 if (zidx
>= VM_MAX_TAG_ZONES
) {
9198 stats
= vm_allocation_zone_totals
[tag
];
9202 value
= os_atomic_add(&stats
->vazt_total
, delta
, relaxed
);
9204 assertf((long)value
>= 0, "zidx %d, tag %d, %p", zidx
, tag
, stats
);
9206 } else if (os_atomic_load(&stats
->vazt_peak
, relaxed
) < value
) {
9207 os_atomic_max(&stats
->vazt_peak
, value
, relaxed
);
9211 #endif /* VM_MAX_TAG_ZONES */
9214 kern_allocation_update_subtotal(kern_allocation_name_t allocation
, uint32_t subtag
, int64_t delta
)
9216 kern_allocation_name_t other
;
9217 struct vm_allocation_total
* total
;
9221 assert(VM_KERN_MEMORY_NONE
!= subtag
);
9222 lck_spin_lock(&vm_allocation_sites_lock
);
9223 for (; subidx
< allocation
->subtotalscount
; subidx
++) {
9224 if (VM_KERN_MEMORY_NONE
== allocation
->subtotals
[subidx
].tag
) {
9225 allocation
->subtotals
[subidx
].tag
= (vm_tag_t
)subtag
;
9228 if (subtag
== allocation
->subtotals
[subidx
].tag
) {
9232 lck_spin_unlock(&vm_allocation_sites_lock
);
9233 assert(subidx
< allocation
->subtotalscount
);
9234 if (subidx
>= allocation
->subtotalscount
) {
9238 total
= &allocation
->subtotals
[subidx
];
9239 other
= vm_allocation_sites
[subtag
];
9243 assertf(total
->total
>= ((uint64_t)-delta
), "name %p", allocation
);
9244 assertf(other
->mapped
>= ((uint64_t)-delta
), "other %p", other
);
9246 OSAddAtomic64(delta
, &other
->mapped
);
9247 OSAddAtomic64(delta
, &total
->total
);
9251 kern_allocation_get_name(kern_allocation_name_t allocation
)
9253 return KA_NAME(allocation
);
9256 kern_allocation_name_t
9257 kern_allocation_name_allocate(const char * name
, uint16_t subtotalscount
)
9261 namelen
= (uint16_t)strnlen(name
, MACH_MEMORY_INFO_NAME_MAX_LEN
- 1);
9263 kern_allocation_name_t allocation
;
9264 allocation
= kheap_alloc(KHEAP_DATA_BUFFERS
,
9265 KA_SIZE(namelen
, subtotalscount
), Z_WAITOK
);
9266 bzero(allocation
, KA_SIZE(namelen
, subtotalscount
));
9268 allocation
->refcount
= 1;
9269 allocation
->subtotalscount
= subtotalscount
;
9270 allocation
->flags
= (uint16_t)(namelen
<< VM_TAG_NAME_LEN_SHIFT
);
9271 strlcpy(KA_NAME(allocation
), name
, namelen
+ 1);
9277 kern_allocation_name_release(kern_allocation_name_t allocation
)
9279 assert(allocation
->refcount
> 0);
9280 if (1 == OSAddAtomic16(-1, &allocation
->refcount
)) {
9281 kheap_free(KHEAP_DATA_BUFFERS
, allocation
,
9282 KA_SIZE(KA_NAME_LEN(allocation
), allocation
->subtotalscount
));
9287 kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation
)
9289 return vm_tag_alloc(allocation
);
9292 #if !VM_TAG_ACTIVE_UPDATE
9294 vm_page_count_object(mach_memory_info_t
* info
, unsigned int __unused num_info
, vm_object_t object
)
9296 if (!object
->wired_page_count
) {
9299 if (object
!= kernel_object
) {
9300 assert(object
->wire_tag
< num_info
);
9301 info
[object
->wire_tag
].size
+= ptoa_64(object
->wired_page_count
);
9305 typedef void (*vm_page_iterate_proc
)(mach_memory_info_t
* info
,
9306 unsigned int num_info
, vm_object_t object
);
9309 vm_page_iterate_purgeable_objects(mach_memory_info_t
* info
, unsigned int num_info
,
9310 vm_page_iterate_proc proc
, purgeable_q_t queue
,
9315 for (object
= (vm_object_t
) queue_first(&queue
->objq
[group
]);
9316 !queue_end(&queue
->objq
[group
], (queue_entry_t
) object
);
9317 object
= (vm_object_t
) queue_next(&object
->objq
)) {
9318 proc(info
, num_info
, object
);
9323 vm_page_iterate_objects(mach_memory_info_t
* info
, unsigned int num_info
,
9324 vm_page_iterate_proc proc
)
9328 lck_spin_lock_grp(&vm_objects_wired_lock
, &vm_page_lck_grp_bucket
);
9329 queue_iterate(&vm_objects_wired
,
9334 proc(info
, num_info
, object
);
9336 lck_spin_unlock(&vm_objects_wired_lock
);
9338 #endif /* ! VM_TAG_ACTIVE_UPDATE */
9341 process_account(mach_memory_info_t
* info
, unsigned int num_info
,
9342 uint64_t zones_collectable_bytes
, boolean_t iterated
)
9345 unsigned int idx
, count
, nextinfo
;
9346 vm_allocation_site_t
* site
;
9347 lck_spin_lock(&vm_allocation_sites_lock
);
9349 for (idx
= 0; idx
<= vm_allocation_tag_highest
; idx
++) {
9350 site
= vm_allocation_sites
[idx
];
9354 info
[idx
].mapped
= site
->mapped
;
9355 info
[idx
].tag
= site
->tag
;
9357 info
[idx
].size
= site
->total
;
9358 #if DEBUG || DEVELOPMENT
9359 info
[idx
].peak
= site
->peak
;
9360 #endif /* DEBUG || DEVELOPMENT */
9362 if (!site
->subtotalscount
&& (site
->total
!= info
[idx
].size
)) {
9363 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx
, site
->total
, info
[idx
].size
);
9364 info
[idx
].size
= site
->total
;
9367 info
[idx
].flags
|= VM_KERN_SITE_WIRED
;
9368 if (idx
< VM_KERN_MEMORY_FIRST_DYNAMIC
) {
9369 info
[idx
].site
= idx
;
9370 info
[idx
].flags
|= VM_KERN_SITE_TAG
;
9371 if (VM_KERN_MEMORY_ZONE
== idx
) {
9372 info
[idx
].flags
|= VM_KERN_SITE_HIDE
;
9373 info
[idx
].flags
&= ~VM_KERN_SITE_WIRED
;
9374 info
[idx
].collectable_bytes
= zones_collectable_bytes
;
9376 } else if ((namelen
= (VM_TAG_NAME_LEN_MAX
& (site
->flags
>> VM_TAG_NAME_LEN_SHIFT
)))) {
9378 info
[idx
].flags
|= VM_KERN_SITE_NAMED
;
9379 if (namelen
> sizeof(info
[idx
].name
)) {
9380 namelen
= sizeof(info
[idx
].name
);
9382 strncpy(&info
[idx
].name
[0], KA_NAME(site
), namelen
);
9383 } else if (VM_TAG_KMOD
& site
->flags
) {
9384 info
[idx
].site
= OSKextGetKmodIDForSite(site
, NULL
, 0);
9385 info
[idx
].flags
|= VM_KERN_SITE_KMOD
;
9387 info
[idx
].site
= VM_KERNEL_UNSLIDE(site
);
9388 info
[idx
].flags
|= VM_KERN_SITE_KERNEL
;
9392 nextinfo
= (vm_allocation_tag_highest
+ 1);
9394 if (count
>= num_info
) {
9398 for (idx
= 0; idx
< count
; idx
++) {
9399 site
= vm_allocation_sites
[idx
];
9403 #if VM_MAX_TAG_ZONES
9404 vm_allocation_zone_total_t
* zone
;
9406 vm_size_t elem_size
;
9408 if (vm_allocation_zone_totals
9409 && (zone
= vm_allocation_zone_totals
[idx
])
9410 && (nextinfo
< num_info
)) {
9411 for (zidx
= 0; zidx
< VM_MAX_TAG_ZONES
; zidx
++) {
9412 if (!zone
[zidx
].vazt_peak
) {
9415 info
[nextinfo
] = info
[idx
];
9416 info
[nextinfo
].zone
= (uint16_t)zone_index_from_tag_index(zidx
, &elem_size
);
9417 info
[nextinfo
].flags
&= ~VM_KERN_SITE_WIRED
;
9418 info
[nextinfo
].flags
|= VM_KERN_SITE_ZONE
;
9419 info
[nextinfo
].size
= zone
[zidx
].vazt_total
;
9420 info
[nextinfo
].peak
= zone
[zidx
].vazt_peak
;
9421 info
[nextinfo
].mapped
= 0;
9425 #endif /* VM_MAX_TAG_ZONES */
9426 if (site
->subtotalscount
) {
9427 uint64_t mapped
, mapcost
, take
;
9431 info
[idx
].size
= site
->total
;
9432 mapped
= info
[idx
].size
;
9433 info
[idx
].mapped
= mapped
;
9435 for (sub
= 0; sub
< site
->subtotalscount
; sub
++) {
9436 alloctag
= site
->subtotals
[sub
].tag
;
9437 assert(alloctag
< num_info
);
9438 if (info
[alloctag
].name
[0]) {
9441 take
= site
->subtotals
[sub
].total
;
9442 if (take
> info
[alloctag
].size
) {
9443 take
= info
[alloctag
].size
;
9445 if (take
> mapped
) {
9448 info
[alloctag
].mapped
-= take
;
9449 info
[alloctag
].size
-= take
;
9453 info
[idx
].size
= mapcost
;
9456 lck_spin_unlock(&vm_allocation_sites_lock
);
9462 vm_page_diagnose_estimate(void)
9464 vm_allocation_site_t
* site
;
9465 uint32_t count
= zone_view_count
;
9468 lck_spin_lock(&vm_allocation_sites_lock
);
9469 for (idx
= 0; idx
< VM_MAX_TAG_VALUE
; idx
++) {
9470 site
= vm_allocation_sites
[idx
];
9475 #if VM_MAX_TAG_ZONES
9476 if (vm_allocation_zone_totals
) {
9477 vm_allocation_zone_total_t
* zone
;
9478 zone
= vm_allocation_zone_totals
[idx
];
9482 for (uint32_t zidx
= 0; zidx
< VM_MAX_TAG_ZONES
; zidx
++) {
9483 count
+= (zone
[zidx
].vazt_peak
!= 0);
9488 lck_spin_unlock(&vm_allocation_sites_lock
);
9490 /* some slop for new tags created */
9492 count
+= VM_KERN_COUNTER_COUNT
;
9498 vm_page_diagnose_zone_stats(mach_memory_info_t
*info
, zone_stats_t zstats
,
9501 zpercpu_foreach(zs
, zstats
) {
9502 info
->size
+= zs
->zs_mem_allocated
- zs
->zs_mem_freed
;
9505 info
->size
*= zpercpu_count();
9507 info
->flags
|= VM_KERN_SITE_NAMED
| VM_KERN_SITE_ZONE_VIEW
;
9511 vm_page_diagnose_zone(mach_memory_info_t
*info
, zone_t z
)
9513 vm_page_diagnose_zone_stats(info
, z
->z_stats
, z
->z_percpu
);
9514 snprintf(info
->name
, sizeof(info
->name
),
9515 "%s%s[raw]", zone_heap_name(z
), z
->z_name
);
9519 vm_page_diagnose_heap(mach_memory_info_t
*info
, kalloc_heap_t kheap
)
9521 struct kheap_zones
*zones
= kheap
->kh_zones
;
9524 for (; i
< zones
->max_k_zone
; i
++) {
9525 vm_page_diagnose_zone(info
+ i
, zones
->k_zone
[i
]);
9528 for (kalloc_heap_t kh
= zones
->views
; kh
; kh
= kh
->kh_next
, i
++) {
9529 vm_page_diagnose_zone_stats(info
+ i
, kh
->kh_stats
, false);
9530 snprintf(info
[i
].name
, sizeof(info
[i
].name
),
9531 "%skalloc[%s]", kheap
->kh_name
, kh
->kh_name
);
9538 vm_page_diagnose(mach_memory_info_t
* info
, unsigned int num_info
, uint64_t zones_collectable_bytes
)
9540 uint64_t wired_size
;
9541 uint64_t wired_managed_size
;
9542 uint64_t wired_reserved_size
;
9544 mach_memory_info_t
* counts
;
9547 bzero(info
, num_info
* sizeof(mach_memory_info_t
));
9549 if (!vm_page_wire_count_initial
) {
9550 return KERN_ABORTED
;
9553 #if !XNU_TARGET_OS_OSX
9554 wired_size
= ptoa_64(vm_page_wire_count
);
9555 wired_reserved_size
= ptoa_64(vm_page_wire_count_initial
- vm_page_stolen_count
);
9556 #else /* !XNU_TARGET_OS_OSX */
9557 wired_size
= ptoa_64(vm_page_wire_count
+ vm_lopage_free_count
+ vm_page_throttled_count
);
9558 wired_reserved_size
= ptoa_64(vm_page_wire_count_initial
- vm_page_stolen_count
+ vm_page_throttled_count
);
9559 #endif /* !XNU_TARGET_OS_OSX */
9560 wired_managed_size
= ptoa_64(vm_page_wire_count
- vm_page_wire_count_initial
);
9562 wired_size
+= booter_size
;
9564 assert(num_info
>= VM_KERN_COUNTER_COUNT
);
9565 num_info
-= VM_KERN_COUNTER_COUNT
;
9566 counts
= &info
[num_info
];
9568 #define SET_COUNT(xcount, xsize, xflags) \
9569 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
9570 counts[xcount].site = (xcount); \
9571 counts[xcount].size = (xsize); \
9572 counts[xcount].mapped = (xsize); \
9573 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
9575 SET_COUNT(VM_KERN_COUNT_MANAGED
, ptoa_64(vm_page_pages
), 0);
9576 SET_COUNT(VM_KERN_COUNT_WIRED
, wired_size
, 0);
9577 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED
, wired_managed_size
, 0);
9578 SET_COUNT(VM_KERN_COUNT_RESERVED
, wired_reserved_size
, VM_KERN_SITE_WIRED
);
9579 SET_COUNT(VM_KERN_COUNT_STOLEN
, ptoa_64(vm_page_stolen_count
), VM_KERN_SITE_WIRED
);
9580 SET_COUNT(VM_KERN_COUNT_LOPAGE
, ptoa_64(vm_lopage_free_count
), VM_KERN_SITE_WIRED
);
9581 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT
, ptoa_64(vm_page_wire_count_on_boot
), 0);
9582 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN
, booter_size
, VM_KERN_SITE_WIRED
);
9583 SET_COUNT(VM_KERN_COUNT_WIRED_STATIC_KERNELCACHE
, ptoa_64(vm_page_kernelcache_count
), 0);
9585 #define SET_MAP(xcount, xsize, xfree, xlargest) \
9586 counts[xcount].site = (xcount); \
9587 counts[xcount].size = (xsize); \
9588 counts[xcount].mapped = (xsize); \
9589 counts[xcount].free = (xfree); \
9590 counts[xcount].largest = (xlargest); \
9591 counts[xcount].flags = VM_KERN_SITE_COUNTER;
9593 vm_map_size_t map_size
, map_free
, map_largest
;
9595 vm_map_sizes(kernel_map
, &map_size
, &map_free
, &map_largest
);
9596 SET_MAP(VM_KERN_COUNT_MAP_KERNEL
, map_size
, map_free
, map_largest
);
9598 zone_map_sizes(&map_size
, &map_free
, &map_largest
);
9599 SET_MAP(VM_KERN_COUNT_MAP_ZONE
, map_size
, map_free
, map_largest
);
9601 vm_map_sizes(kalloc_map
, &map_size
, &map_free
, &map_largest
);
9602 SET_MAP(VM_KERN_COUNT_MAP_KALLOC
, map_size
, map_free
, map_largest
);
9604 assert(num_info
>= zone_view_count
);
9605 num_info
-= zone_view_count
;
9606 counts
= &info
[num_info
];
9609 i
+= vm_page_diagnose_heap(counts
+ i
, KHEAP_DEFAULT
);
9610 if (KHEAP_DATA_BUFFERS
->kh_heap_id
== KHEAP_ID_DATA_BUFFERS
) {
9611 i
+= vm_page_diagnose_heap(counts
+ i
, KHEAP_DATA_BUFFERS
);
9613 if (KHEAP_KEXT
->kh_heap_id
== KHEAP_ID_KEXT
) {
9614 i
+= vm_page_diagnose_heap(counts
+ i
, KHEAP_KEXT
);
9616 assert(i
<= zone_view_count
);
9618 zone_index_foreach(zidx
) {
9619 zone_t z
= &zone_array
[zidx
];
9620 zone_view_t zv
= z
->z_views
;
9626 if (z
->kalloc_heap
== KHEAP_ID_NONE
) {
9627 vm_page_diagnose_zone(counts
+ i
, z
);
9629 assert(i
<= zone_view_count
);
9632 for (; zv
; zv
= zv
->zv_next
) {
9633 vm_page_diagnose_zone_stats(counts
+ i
, zv
->zv_stats
,
9635 snprintf(counts
[i
].name
, sizeof(counts
[i
].name
), "%s%s[%s]",
9636 zone_heap_name(z
), z
->z_name
, zv
->zv_name
);
9638 assert(i
<= zone_view_count
);
9642 iterate
= !VM_TAG_ACTIVE_UPDATE
;
9644 enum { kMaxKernelDepth
= 1 };
9645 vm_map_t maps
[kMaxKernelDepth
];
9646 vm_map_entry_t entries
[kMaxKernelDepth
];
9648 vm_map_entry_t entry
;
9649 vm_object_offset_t offset
;
9651 int stackIdx
, count
;
9653 #if !VM_TAG_ACTIVE_UPDATE
9654 vm_page_iterate_objects(info
, num_info
, &vm_page_count_object
);
9655 #endif /* ! VM_TAG_ACTIVE_UPDATE */
9661 for (entry
= map
->hdr
.links
.next
; map
; entry
= entry
->links
.next
) {
9662 if (entry
->is_sub_map
) {
9663 assert(stackIdx
< kMaxKernelDepth
);
9664 maps
[stackIdx
] = map
;
9665 entries
[stackIdx
] = entry
;
9667 map
= VME_SUBMAP(entry
);
9671 if (VME_OBJECT(entry
) == kernel_object
) {
9673 vm_object_lock(VME_OBJECT(entry
));
9674 for (offset
= entry
->links
.start
; offset
< entry
->links
.end
; offset
+= page_size
) {
9675 page
= vm_page_lookup(VME_OBJECT(entry
), offset
);
9676 if (page
&& VM_PAGE_WIRED(page
)) {
9680 vm_object_unlock(VME_OBJECT(entry
));
9683 assert(VME_ALIAS(entry
) != VM_KERN_MEMORY_NONE
);
9684 assert(VME_ALIAS(entry
) < num_info
);
9685 info
[VME_ALIAS(entry
)].size
+= ptoa_64(count
);
9688 while (map
&& (entry
== vm_map_last_entry(map
))) {
9694 map
= maps
[stackIdx
];
9695 entry
= entries
[stackIdx
];
9702 process_account(info
, num_info
, zones_collectable_bytes
, iterate
);
9704 return KERN_SUCCESS
;
9707 #if DEBUG || DEVELOPMENT
9710 vm_kern_allocation_info(uintptr_t addr
, vm_size_t
* size
, vm_tag_t
* tag
, vm_size_t
* zone_size
)
9715 vm_map_entry_t entry
;
9717 zsize
= zone_element_info((void *) addr
, tag
);
9719 *zone_size
= *size
= zsize
;
9720 return KERN_SUCCESS
;
9724 ret
= KERN_INVALID_ADDRESS
;
9725 for (map
= kernel_map
; map
;) {
9727 if (!vm_map_lookup_entry(map
, addr
, &entry
)) {
9730 if (entry
->is_sub_map
) {
9731 if (map
!= kernel_map
) {
9734 map
= VME_SUBMAP(entry
);
9737 if (entry
->vme_start
!= addr
) {
9740 *tag
= (vm_tag_t
)VME_ALIAS(entry
);
9741 *size
= (entry
->vme_end
- addr
);
9745 if (map
!= kernel_map
) {
9748 vm_map_unlock(kernel_map
);
9753 #endif /* DEBUG || DEVELOPMENT */
9756 vm_tag_get_kext(vm_tag_t tag
, char * name
, vm_size_t namelen
)
9758 vm_allocation_site_t
* site
;
9762 lck_spin_lock(&vm_allocation_sites_lock
);
9763 if ((site
= vm_allocation_sites
[tag
])) {
9764 if (VM_TAG_KMOD
& site
->flags
) {
9765 kmodId
= OSKextGetKmodIDForSite(site
, name
, namelen
);
9768 lck_spin_unlock(&vm_allocation_sites_lock
);
9774 #if CONFIG_SECLUDED_MEMORY
9776 * Note that there's no locking around other accesses to vm_page_secluded_target.
9777 * That should be OK, since these are the only place where it can be changed after
9778 * initialization. Other users (like vm_pageout) may see the wrong value briefly,
9779 * but will eventually get the correct value. This brief mismatch is OK as pageout
9780 * and page freeing will auto-adjust the vm_page_secluded_count to match the target
9783 unsigned int vm_page_secluded_suppress_cnt
= 0;
9784 unsigned int vm_page_secluded_save_target
;
9786 LCK_GRP_DECLARE(secluded_suppress_slock_grp
, "secluded_suppress_slock");
9787 LCK_SPIN_DECLARE(secluded_suppress_slock
, &secluded_suppress_slock_grp
);
9790 start_secluded_suppression(task_t task
)
9792 if (task
->task_suppressed_secluded
) {
9795 lck_spin_lock(&secluded_suppress_slock
);
9796 if (!task
->task_suppressed_secluded
&& vm_page_secluded_suppress_cnt
++ == 0) {
9797 task
->task_suppressed_secluded
= TRUE
;
9798 vm_page_secluded_save_target
= vm_page_secluded_target
;
9799 vm_page_secluded_target
= 0;
9800 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
9802 lck_spin_unlock(&secluded_suppress_slock
);
9806 stop_secluded_suppression(task_t task
)
9808 lck_spin_lock(&secluded_suppress_slock
);
9809 if (task
->task_suppressed_secluded
&& --vm_page_secluded_suppress_cnt
== 0) {
9810 task
->task_suppressed_secluded
= FALSE
;
9811 vm_page_secluded_target
= vm_page_secluded_save_target
;
9812 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
9814 lck_spin_unlock(&secluded_suppress_slock
);
9817 #endif /* CONFIG_SECLUDED_MEMORY */
9820 * Move the list of retired pages on the vm_page_queue_retired to
9821 * their final resting place on retired_pages_object.
9824 vm_retire_boot_pages(void)
9826 #if defined(__arm64__)
9829 vm_object_lock(retired_pages_object
);
9830 while (!vm_page_queue_empty(&vm_page_queue_retired
)) {
9831 vm_page_queue_remove_first(&vm_page_queue_retired
, p
, vmp_pageq
);
9833 vm_page_lock_queues();
9834 p
->vmp_q_state
= VM_PAGE_IS_WIRED
;
9835 p
->vmp_wire_count
++;
9836 vm_page_unlock_queues();
9837 vm_page_insert_wired(p
, retired_pages_object
, ptoa(VM_PAGE_GET_PHYS_PAGE(p
)), VM_KERN_MEMORY_RETIRED
);
9838 vm_object_unlock(retired_pages_object
);
9839 pmap_retire_page(VM_PAGE_GET_PHYS_PAGE(p
));
9840 vm_object_lock(retired_pages_object
);
9842 vm_object_unlock(retired_pages_object
);
9843 #endif /* defined(__arm64__) */
9847 * Returns the current number of retired pages, used for sysctl.
9850 vm_retired_pages_count(void)
9852 return retired_pages_object
->resident_page_count
;